├── docs
    ├── common
    ├── index.md
    ├── CONTRIBUTING.md
    ├── databuilder
    ├── search
    ├── frontend
    ├── metadata
    ├── k8s_install.md
    ├── img
    │   ├── graph_model.png
    │   ├── neo4j-debug.png
    │   ├── search-page.png
    │   ├── column_details.png
    │   ├── data_preview.png
    │   ├── landing_page.png
    │   ├── search_preview.png
    │   ├── search-exact-match.png
    │   ├── tutorials
    │   │   ├── postgres.png
    │   │   ├── table-badge.png
    │   │   ├── column-badge.png
    │   │   ├── table-postgres.png
    │   │   ├── search-postgres.png
    │   │   ├── superset-add-db.png
    │   │   ├── superset-welcome.png
    │   │   ├── amundsen-preview1.png
    │   │   ├── amundsen-preview2.png
    │   │   └── superset-sqllab-verify.png
    │   ├── Amundsen_Architecture.png
    │   ├── issue_process_diagram.png
    │   ├── logos
    │   │   ├── amundsen_mark_circle.png
    │   │   ├── amundsen_mark_tan_bg.png
    │   │   ├── amundsen_mark_transparent_bg.png
    │   │   ├── amundsen_mark_blue.svg
    │   │   ├── amundsen_mark_orange.svg
    │   │   ├── amundsen_logo_on_light.svg
    │   │   └── amundsen_logo_on_blue.svg
    │   └── table_detail_page_with_badges.png
    ├── installation-aws-ecs
    │   ├── userData.sh
    │   ├── ecs-params.yml
    │   ├── aws-ecs-deployment.md
    │   └── docker-ecs-amundsen.yml
    ├── css
    │   └── app.css
    ├── tutorials
    │   ├── index-postgres.md
    │   ├── user-profiles.md
    │   ├── badges.md
    │   ├── data-preview-with-superset.md
    │   ├── how-to-track-user-metric.md
    │   └── how-to-search-effective.md
    ├── architecture.md
    ├── issue_labeling.md
    ├── faq.md
    ├── installation.md
    ├── roadmap.md
    ├── authentication
    │   └── oidc.md
    └── developer_guide.md
├── requirements.txt
├── .github
    ├── titleLint.yml
    ├── CODEOWNERS
    ├── workflows
    │   └── deploy_docs.yml
    ├── ISSUE_TEMPLATE
    │   ├── feature-request.md
    │   └── bug-report.md
    └── PULL_REQUEST_TEMPLATE.md
├── NOTICE
├── .gitignore
├── CODE_OF_CONDUCT.md
├── .dependabot
    └── config.yml
├── amundsen-kube-helm
    ├── templates
    │   ├── helm
    │   │   ├── requirements.yaml
    │   │   ├── Chart.yaml
    │   │   ├── templates
    │   │   │   ├── pv-neo4j.yaml
    │   │   │   ├── pvc-neo4j.yaml
    │   │   │   ├── service-search.yaml
    │   │   │   ├── service-frontend.yaml
    │   │   │   ├── service-metadata.yaml
    │   │   │   ├── service-neo4j.yaml
    │   │   │   ├── secret-oidc-config.yaml
    │   │   │   ├── _helpers.tpl
    │   │   │   ├── configmap-neo4j.yaml
    │   │   │   ├── deployment-search.yaml
    │   │   │   ├── cronjob-neo4j-s3-backup.yaml
    │   │   │   ├── deployment-metadata.yaml
    │   │   │   ├── deployment-neo4j.yaml
    │   │   │   └── deployment-frontend.yaml
    │   │   └── values.yaml
    │   └── restore-backup
    │   │   ├── README.md
    │   │   └── restore-neo4j-pod.yaml
    └── README.md
├── .gitmodules
├── OWNERS.md
├── SECURITY.md
├── .all-contributorsrc
├── deploy_website.sh
├── docker-amundsen-atlas.yml
├── CONTRIBUTORS.md
├── docker-amundsen-local.yml
├── docker-amundsen.yml
├── CONTRIBUTING.md
├── MAINTAINING.md
├── mkdocs.yml
├── LICENSE
├── GOVERNANCE.md
└── example
    └── docker
        └── neo4j
            └── conf
                └── neo4j.conf


/docs/common:
--------------------------------------------------------------------------------
1 | ../amundsencommon


--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
1 | ../README.md


--------------------------------------------------------------------------------
/docs/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | ../CONTRIBUTING.md


--------------------------------------------------------------------------------
/docs/databuilder:
--------------------------------------------------------------------------------
1 | ../amundsendatabuilder


--------------------------------------------------------------------------------
/docs/search:
--------------------------------------------------------------------------------
1 | ../amundsensearchlibrary


--------------------------------------------------------------------------------
/docs/frontend:
--------------------------------------------------------------------------------
1 | ../amundsenfrontendlibrary


--------------------------------------------------------------------------------
/docs/metadata:
--------------------------------------------------------------------------------
1 | ../amundsenmetadatalibrary


--------------------------------------------------------------------------------
/docs/k8s_install.md:
--------------------------------------------------------------------------------
1 | ../amundsen-kube-helm/README.md


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | mkdocs==1.1
2 | mkdocs-material==4.6.3
3 | mkdocs-redirects==1.0.0
4 | 


--------------------------------------------------------------------------------
/.github/titleLint.yml:
--------------------------------------------------------------------------------
1 | regex: (build|ci|docs|feat|fix|perf|refactor|style|test|chore|other): .*
2 | 


--------------------------------------------------------------------------------
/docs/img/graph_model.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dropbox/amundsen/master/docs/img/graph_model.png


--------------------------------------------------------------------------------
/docs/img/neo4j-debug.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dropbox/amundsen/master/docs/img/neo4j-debug.png


--------------------------------------------------------------------------------
/docs/img/search-page.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dropbox/amundsen/master/docs/img/search-page.png


--------------------------------------------------------------------------------
/docs/img/column_details.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dropbox/amundsen/master/docs/img/column_details.png


--------------------------------------------------------------------------------
/docs/img/data_preview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dropbox/amundsen/master/docs/img/data_preview.png


--------------------------------------------------------------------------------
/docs/img/landing_page.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dropbox/amundsen/master/docs/img/landing_page.png


--------------------------------------------------------------------------------
/docs/img/search_preview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dropbox/amundsen/master/docs/img/search_preview.png


--------------------------------------------------------------------------------
/NOTICE:
--------------------------------------------------------------------------------
1 | amundsen
2 | Copyright 2018-2019 Lyft Inc.
3 | 
4 | This product includes software developed at Lyft Inc.
5 | 


--------------------------------------------------------------------------------
/docs/img/search-exact-match.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dropbox/amundsen/master/docs/img/search-exact-match.png


--------------------------------------------------------------------------------
/docs/img/tutorials/postgres.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dropbox/amundsen/master/docs/img/tutorials/postgres.png


--------------------------------------------------------------------------------
/docs/img/Amundsen_Architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dropbox/amundsen/master/docs/img/Amundsen_Architecture.png


--------------------------------------------------------------------------------
/docs/img/issue_process_diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dropbox/amundsen/master/docs/img/issue_process_diagram.png


--------------------------------------------------------------------------------
/docs/img/tutorials/table-badge.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dropbox/amundsen/master/docs/img/tutorials/table-badge.png


--------------------------------------------------------------------------------
/docs/img/tutorials/column-badge.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dropbox/amundsen/master/docs/img/tutorials/column-badge.png


--------------------------------------------------------------------------------
/docs/img/tutorials/table-postgres.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dropbox/amundsen/master/docs/img/tutorials/table-postgres.png


--------------------------------------------------------------------------------
/docs/installation-aws-ecs/userData.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | # For ElasticSearch
4 | sysctl -w vm.max_map_count=262144
5 | 
6 | 


--------------------------------------------------------------------------------
/docs/img/logos/amundsen_mark_circle.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dropbox/amundsen/master/docs/img/logos/amundsen_mark_circle.png


--------------------------------------------------------------------------------
/docs/img/logos/amundsen_mark_tan_bg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dropbox/amundsen/master/docs/img/logos/amundsen_mark_tan_bg.png


--------------------------------------------------------------------------------
/docs/img/tutorials/search-postgres.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dropbox/amundsen/master/docs/img/tutorials/search-postgres.png


--------------------------------------------------------------------------------
/docs/img/tutorials/superset-add-db.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dropbox/amundsen/master/docs/img/tutorials/superset-add-db.png


--------------------------------------------------------------------------------
/docs/img/tutorials/superset-welcome.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dropbox/amundsen/master/docs/img/tutorials/superset-welcome.png


--------------------------------------------------------------------------------
/docs/img/table_detail_page_with_badges.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dropbox/amundsen/master/docs/img/table_detail_page_with_badges.png


--------------------------------------------------------------------------------
/docs/img/tutorials/amundsen-preview1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dropbox/amundsen/master/docs/img/tutorials/amundsen-preview1.png


--------------------------------------------------------------------------------
/docs/img/tutorials/amundsen-preview2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dropbox/amundsen/master/docs/img/tutorials/amundsen-preview2.png


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | example/backup/
2 | example/docker/neo4j/plugins/
3 | example/docker/es_data*
4 | .local/
5 | 
6 | .idea/
7 | venv/
8 | site/
9 | 


--------------------------------------------------------------------------------
/docs/img/logos/amundsen_mark_transparent_bg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dropbox/amundsen/master/docs/img/logos/amundsen_mark_transparent_bg.png


--------------------------------------------------------------------------------
/docs/img/tutorials/superset-sqllab-verify.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dropbox/amundsen/master/docs/img/tutorials/superset-sqllab-verify.png


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | This project is governed by [Linux Foundation's code of conduct](https://www.linuxfoundation.org/code-of-conduct/).
2 | All contributors and participants agree to abide by its terms.
3 | 


--------------------------------------------------------------------------------
/.dependabot/config.yml:
--------------------------------------------------------------------------------
1 | version: 1
2 | update_configs:
3 |   - package_manager: "submodules"
4 |     directory: "/"
5 |     update_schedule: "weekly"
6 |     automerged_updates:
7 |       - match:
8 |           update_type: all
9 | 


--------------------------------------------------------------------------------
/amundsen-kube-helm/templates/helm/requirements.yaml:
--------------------------------------------------------------------------------
1 | dependencies:
2 | #  - name: neo4j
3 | #    version: 1.2.2
4 | #    repository: https://kubernetes-charts.storage.googleapis.com/
5 |   - name: elasticsearch
6 |     version: 1.32.0
7 |     repository: https://kubernetes-charts.storage.googleapis.com/
8 |     condition: elasticsearch.enabled
9 | 


--------------------------------------------------------------------------------
/docs/css/app.css:
--------------------------------------------------------------------------------
 1 | @import "theme.css";
 2 | 
 3 | /* Splits a long line descriptions in tables in to multiple lines */
 4 | .wy-table-responsive table td, .wy-table-responsive table th {
 5 |   white-space: normal !important;
 6 | }
 7 | 
 8 | /* align multi line csv table columns */
 9 | table.docutils div.line-block {
10 |     margin-left: 0;
11 | }
12 | 


--------------------------------------------------------------------------------
/docs/installation-aws-ecs/ecs-params.yml:
--------------------------------------------------------------------------------
 1 | version: 1
 2 | task_definition:
 3 |   services:
 4 |     neo4j:
 5 |       cpu_shares: 100
 6 |       mem_limit: 3GB
 7 |     elasticsearch:
 8 |       cpu_shares: 100
 9 |       mem_limit: 3GB
10 |     amundsensearch:
11 |       cpu_shares: 100
12 |       mem_limit: 500MB
13 |     amundsenmetadata:
14 |       cpu_shares: 100
15 |       mem_limit: 500MB   
16 |     amundsenfrontend:
17 |       cpu_shares: 100
18 |       mem_limit: 500MB
19 | 


--------------------------------------------------------------------------------
/docs/img/logos/amundsen_mark_blue.svg:
--------------------------------------------------------------------------------
1 | <svg id="Layer_1" data-name="Layer 1" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 500 500"><defs><style>.cls-1{fill:#050066;}</style></defs><title>amundsen_mark_blue</title><path class="cls-1" d="M250,45C142.21,129,75.73,289,75.73,455,198,455,250,374.13,250,374.13S302,455,424.27,455C424.27,289,357.79,129,250,45Zm0,292.55s-41.76,37.69-98.4,37.69C164.78,284.34,199.9,201,250,142c50.1,59,85.22,142.37,98.4,233.27C291.77,375.24,250,337.55,250,337.55Z"/></svg>


--------------------------------------------------------------------------------
/docs/img/logos/amundsen_mark_orange.svg:
--------------------------------------------------------------------------------
1 | <svg id="Layer_1" data-name="Layer 1" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 500 500"><defs><style>.cls-1{fill:#ff5d52;}</style></defs><title>amundsen_mark_orange</title><path class="cls-1" d="M250,45C142.21,129,75.73,289,75.73,455,198,455,250,374.13,250,374.13S302,455,424.27,455C424.27,289,357.79,129,250,45Zm0,292.55s-41.76,37.69-98.4,37.69C164.78,284.34,199.9,201,250,142c50.1,59,85.22,142.37,98.4,233.27C291.77,375.24,250,337.55,250,337.55Z"/></svg>


--------------------------------------------------------------------------------
/amundsen-kube-helm/templates/helm/Chart.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | description: Amundsen is a metadata driven application for improving the productivity of data analysts, data scientists and engineers when interacting with data.
 3 | name: amundsen
 4 | version: 2.0.0
 5 | icon: https://github.com/amundsen-io/amundsen/blob/master/docs/img/logos/amundsen_logo_on_light.svg
 6 | home: https://github.com/amundsen-io/amundsen
 7 | maintainers:
 8 |   - name: Amundsen TSC
 9 |     email: amundsen-tsc@lists.lfai.foundation
10 | sources:
11 |   - https://github.com/amundsen-io/amundsen
12 | keywords:
13 |   - metadata
14 |   - data


--------------------------------------------------------------------------------
/.github/CODEOWNERS:
--------------------------------------------------------------------------------
 1 | # Codeowners file by GitHub
 2 | # Reference: https://docs.github.com/en/github/creating-cloning-and-archiving-repositories/about-code-owners
 3 | # Each line is a file pattern followed by one or more owners.
 4 | # Order is important; the last matching pattern takes the most
 5 | # precedence.
 6 | 
 7 | # These owners will be the default owners for everything in
 8 | # the repo. Unless a later match takes precedence,
 9 | # @amundsen-io/amundsen-committerswill be requested for
10 | # review when someone opens a pull request.
11 | *       @amundsen-io/amundsen-committers
12 | 
13 | /amundsen-kube-helm/ @feng-tao @jornh @javamonkey79
14 | 


--------------------------------------------------------------------------------
/amundsen-kube-helm/templates/helm/templates/pv-neo4j.yaml:
--------------------------------------------------------------------------------
 1 | {{- if and .Values.neo4j.enabled .Values.neo4j.persistence .Values.neo4j.persistence.efs }}
 2 | apiVersion: v1
 3 | kind: PersistentVolume
 4 | metadata:
 5 |   name: neo4j-pv
 6 |   labels:
 7 |     app: {{ template "amundsen.fullname" . }}
 8 |     chart: "{{ .Chart.Name }}-{{ .Chart.Version }}"
 9 |     release: "{{ .Release.Name }}"
10 |     heritage: "{{ .Release.Service }}"
11 | spec:
12 |   accessModes:
13 |     - ReadWriteMany
14 |   capacity:
15 |     storage: {{ default "3Gi" .Values.neo4j.persistence.size }}
16 |   nfs:
17 |     server: {{ .Values.neo4j.persistence.efs.dns }}
18 |     path: "/"
19 | {{- end }}


--------------------------------------------------------------------------------
/amundsen-kube-helm/templates/restore-backup/README.md:
--------------------------------------------------------------------------------
 1 | # Restoring neo4j Backups
 2 | 
 3 | The Amundsen Helm chart includes a Kubernetes CronJob that backs up the neo4j database to S3. If you need to restore from one of these backups, use the one-off pod in this directory.
 4 | 
 5 | ## Create the Pod
 6 | 
 7 | You should have setup `kubectl` for the Kubernetes cluster you wish to restore in before running these commands.
 8 | 
 9 | Update the YAML file with the S3 Bucket for the backup you wish to restore and then apply the pod.
10 | 
11 | ```shell
12 | kubectl apply -n <your-namespace> -f restore-neo4j-pod.yaml
13 | ```
14 | 
15 | Once the pod has been created, it will automatically run the restore. You can check the pod's logs to see whether it has succeeded for failed.
16 | 


--------------------------------------------------------------------------------
/amundsen-kube-helm/templates/helm/templates/pvc-neo4j.yaml:
--------------------------------------------------------------------------------
 1 | {{- if and .Values.neo4j.enabled .Values.neo4j.persistence }}
 2 | apiVersion: v1
 3 | kind: PersistentVolumeClaim
 4 | metadata:
 5 |   name: neo4j-pvc
 6 |   namespace: {{ .Release.Namespace }}
 7 |   annotations:
 8 |     helm.sh/resource-policy: "keep"
 9 |   labels:
10 |     app: {{ template "amundsen.fullname" . }}
11 |     chart: "{{ .Chart.Name }}-{{ .Chart.Version }}"
12 |     release: "{{ .Release.Name }}"
13 |     heritage: "{{ .Release.Service }}"
14 | spec:
15 |   accessModes:
16 |     - {{ default "ReadWriteOnce" .Values.neo4j.persistence.accessMode }}
17 |   storageClassName: "{{ default "" .Values.neo4j.persistence.storageClass }}"
18 |   resources:
19 |     requests:
20 |       storage: {{ default "3Gi" .Values.neo4j.persistence.size }}
21 | {{- end }}
22 | 


--------------------------------------------------------------------------------
/.github/workflows/deploy_docs.yml:
--------------------------------------------------------------------------------
 1 | name: Publish docs via GitHub Pages
 2 | on:
 3 |   push:
 4 |     branches:
 5 |       - master
 6 | 
 7 | jobs:
 8 |   build:
 9 |     name: Deploy docs
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |       - name: Checkout master
13 |         uses: actions/checkout@v1
14 |       - name: Checkout submodules using a PAT
15 |         run: |
16 |           git config --file .gitmodules --get-regexp url | while read url; do
17 |             git config --file=.gitmodules $(echo "$url" | sed -E "s/git@github.com:|https:\/\/github.com\//https:\/\/${{ secrets.CI_PAT }}:${{ secrets.CI_PAT }}@github.com\//")
18 |           done
19 |           git submodule sync
20 |           git submodule update --init --recursive
21 |       - name: Deploy docs
22 |         uses: mhausenblas/mkdocs-deploy-gh-pages@master
23 |         env:
24 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
25 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
 1 | [submodule "amundsendatabuilder"]
 2 | 	path = amundsendatabuilder
 3 | 	url = https://github.com/amundsen-io/amundsendatabuilder
 4 | 	branch = master
 5 | [submodule "amundsenfrontendlibrary"]
 6 | 	path = amundsenfrontendlibrary
 7 | 	url = https://github.com/amundsen-io/amundsenfrontendlibrary
 8 | 	branch = master
 9 | [submodule "amundsenmetadatalibrary"]
10 | 	path = amundsenmetadatalibrary
11 | 	url = https://github.com/amundsen-io/amundsenmetadatalibrary
12 | 	branch = master
13 | [submodule "amundsensearchlibrary"]
14 | 	path = amundsensearchlibrary
15 | 	url = https://github.com/amundsen-io/amundsensearchlibrary
16 | 	branch = master
17 | [submodule "amundsencommon"]
18 | 	path = amundsencommon
19 | 	url = https://github.com/amundsen-io/amundsencommon
20 | 	branch = master
21 | [submodule "amundsengremlin"]
22 | 	path = amundsengremlin
23 | 	url = https://github.com/amundsen-io/amundsengremlin
24 | 	branch = master
25 | 


--------------------------------------------------------------------------------
/OWNERS.md:
--------------------------------------------------------------------------------
 1 | * See [CONTRIBUTING.md](CONTRIBUTING.md) for general contribution guidelines.
 2 | * See [GOVERNANCE.md](GOVERNANCE.md) for governance guidelines.
 3 | 
 4 | This page lists all the maintainers for Amundsen.  This can be used for
 5 | routing PRs, questions, etc. to the right place.
 6 | 
 7 | # Amundsen committers
 8 | - Tao Feng (https://github.com/feng-tao)
 9 | - Jin Hyuk Chang (https://github.com/jinhyukchang)
10 | - Tamika Tannis (https://github.com/ttannis)
11 | - Daniel Won (https://github.com/danwom)
12 | - Marcos Iglesias (https://github.com/golodhros)
13 | - Diksha Thakur (https://github.com/dikshathakur3119)
14 | - Allison Suarez Miranda (https://github.com/allisonsuarez)
15 | - Shenghu Yang (https://github.com/shenghuy)
16 | - Mark Grover (https://github.com/markgrover)
17 | - Verdan Mahmood (https://github.com/verdan)
18 | - Bolke de Bruin (https://github.com/bolkedebruin)
19 | - Mariusz Gorski (https://github.com/mgorsk1)
20 | 


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
 1 | # Security Policy
 2 | 
 3 | ## Reporting a Vulnerability
 4 | If you think you have found a security vulnerability, please send a report to amundsen-security@lists.lfaidata.foundation. Please do not post security vulnerabilities on Slack.
 5 | 
 6 | We don't currently have a PGP key, unfortunately.
 7 | 
 8 | An Amundsen committer will send you a response indicating the next steps in handling your report. After the initial reply to your report, the committer will keep you informed of the progress towards a fix and full announcement, and may ask for additional information or guidance.
 9 | 
10 | *Important:* Please don't disclose the vulnerability before it have been fixed and announced, to protect our users.
11 | 
12 | ## Security announcements
13 | 
14 | Please subscribe to [the announcements mailing list](https://lists.lfai.foundation/g/amundsen-announce), where we post notifications and remediation details for security vulnerabilities.
15 | 


--------------------------------------------------------------------------------
/.all-contributorsrc:
--------------------------------------------------------------------------------
 1 | {
 2 |   "files": ["CONTRIBUTORS.md"],
 3 |   "imageSize": 150,
 4 |   "commit": false,
 5 |   "contributors": [
 6 |     {
 7 |     "login": "ttannis",
 8 |     "name": "Tamika Tannis",
 9 |     "avatar_url": "https://avatars2.githubusercontent.com/u/1790900?v=4",
10 |     "profile": "https://www.linkedin.com/in/tamika-tannis/",
11 |     "contributions": [
12 |       "bug",
13 |       "code",
14 |       "content",
15 |       "doc",
16 |       "example",
17 |       "ideas",
18 |       "infra",
19 |       "maintenance",
20 |       "platform",
21 |       "plugin",
22 |       "projectManagement",
23 |       "question",
24 |       "review",
25 |       "security",
26 |       "tool",
27 |       "test",
28 |       "tutorial"
29 |     }
30 |   ],
31 |   "contributorsPerLine": 6,
32 |   "projectName": "amundsen",
33 |   "projectOwner": "amundsen-io",
34 |   "repoType": "github",
35 |   "repoHost": "https://github.com",
36 |   "skipCi": true
37 | }
38 | 


--------------------------------------------------------------------------------
/amundsen-kube-helm/templates/helm/templates/service-search.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | apiVersion: v1
 3 | kind: Service
 4 | metadata:
 5 |   name: {{ template "amundsen.name" . }}-{{ .Values.search.serviceName }}
 6 |   labels:
 7 |     app: {{ template "amundsen.name" . }}
 8 |     run: {{ .Chart.Name }}-{{ .Values.search.serviceName }}
 9 |     component: {{ .Values.search.serviceName }}
10 |     chart: {{ template "amundsen.chart" . }}
11 |     release: {{ .Release.Name }}
12 |     heritage: {{ .Release.Service }}
13 | {{- with .Values.search.annotations }}
14 |   annotations:
15 | {{ toYaml . | indent 4 }}
16 | {{- end}}   
17 | spec:
18 |   type: {{ .Values.search.serviceType }}
19 |   selector:
20 |     app: {{ template "amundsen.name" . }}
21 |     component: {{ .Values.search.serviceName }}
22 |     release: {{ .Release.Name }}
23 |   ports:
24 |     - name: {{ .Chart.Name }}-{{ .Values.search.serviceName }}-{{ .Values.environment }}-http
25 |       port: 5001
26 |       targetPort: 5001
27 | 


--------------------------------------------------------------------------------
/amundsen-kube-helm/templates/helm/templates/service-frontend.yaml:
--------------------------------------------------------------------------------
 1 | ---         
 2 | apiVersion: v1
 3 | kind: Service
 4 | metadata:
 5 |   name: {{ template "amundsen.name" . }}-{{ .Values.frontEnd.serviceName }}
 6 |   labels:
 7 |     app: {{ template "amundsen.name" . }}
 8 |     run: {{ .Chart.Name }}-{{ .Values.frontEnd.serviceName }}
 9 |     component: {{ .Values.frontEnd.serviceName }}
10 |     chart: {{ template "amundsen.chart" . }}
11 |     release: {{ .Release.Name }}
12 |     heritage: {{ .Release.Service }}
13 | {{- with .Values.frontEnd.annotations }}
14 |   annotations:
15 | {{ toYaml . | indent 4 }}
16 | {{- end}}   
17 | spec:
18 |   type: {{ .Values.frontEnd.serviceType }}
19 |   selector:
20 |     app: {{ template "amundsen.name" . }}
21 |     component: {{ .Values.frontEnd.serviceName }}
22 |     release: {{ .Release.Name }}
23 |   ports:
24 |     - name: {{ .Chart.Name }}-{{ .Values.frontEnd.serviceName }}-{{ .Values.environment }}-http
25 |       port: 5000
26 |       targetPort: 5000
27 | 


--------------------------------------------------------------------------------
/amundsen-kube-helm/templates/helm/templates/service-metadata.yaml:
--------------------------------------------------------------------------------
 1 | ---  
 2 | apiVersion: v1
 3 | kind: Service
 4 | metadata:
 5 |   name: {{ template "amundsen.name" . }}-{{ .Values.metadata.serviceName }}
 6 |   labels:
 7 |     app: {{ template "amundsen.name" . }}
 8 |     run: {{ .Chart.Name }}-{{ .Values.metadata.serviceName }}
 9 |     component: {{ .Values.metadata.serviceName }}
10 |     chart: {{ template "amundsen.chart" . }}
11 |     release: {{ .Release.Name }}
12 |     heritage: {{ .Release.Service }}
13 | {{- with .Values.metadata.annotations }}
14 |   annotations:
15 | {{ toYaml . | indent 4 }}
16 | {{- end}}           
17 | spec:
18 |   type: {{ .Values.metadata.serviceType }}
19 |   selector:
20 |     app: {{ template "amundsen.name" . }}
21 |     component: {{ .Values.metadata.serviceName }}
22 |     release: {{ .Release.Name }}
23 |   ports:
24 |     - name: {{ .Chart.Name }}-{{ .Values.metadata.serviceName }}-{{ .Values.environment }}-http
25 |       port: 5002
26 |       targetPort: 5002
27 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature-request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature Request
 3 | about: Create a feature request
 4 | title: Feature Proposal
 5 | labels: feature proposal
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | <!--- Provide a general summary of the feature request or improvement in the Title above -->
11 | <!--- Look through existing open and closed feature proposals to see if someone has asked for the feature before -->
12 | 
13 | ## Expected Behavior or Use Case
14 | <!--- Tell us how it should work -->
15 | 
16 | ## Service or Ingestion ETL
17 | <!--- Tell us to which service or component this request is related to -->
18 | 
19 | ## Possible Implementation
20 | <!--- Not obligatory, suggest ideas of how to implement the addition or change -->
21 | 
22 | ## Example Screenshots (if appropriate):
23 | 
24 | ## Context
25 | <!--- Why do you need this feature or improvement? What is your use case? What are you trying to accomplish? -->
26 | <!--- Providing context helps us come up with a solution that is most useful in the real world -->


--------------------------------------------------------------------------------
/deploy_website.sh:
--------------------------------------------------------------------------------
 1 | set -ex
 2 | 
 3 | #REPO="git@github.com:amundsen-io/amundsen.git"
 4 | #DIR=temp-clone
 5 | # We use https://www.mkdocs.org/user-guide/deploying-your-docs/ to build/deploy docs
 6 | # Currently the doc is built/deployed manually. We should first build and deploy locally and verify it.
 7 | # Here are some basic steps:
 8 | # 1. virtualenv venv
 9 | # 2. source venv/bin/activate
10 | # 3. pip3 install -r requirements.txt
11 | # 4. brew install mkdocs
12 | # 5. mkdocs serve # build locally and serve it in localhost:8000 . On mac OS, you may face ImportError and you may need to downgrade openssl by $ brew switch openssl 1.0.2r
13 | # 6. mkdocs gh-deploy # deploy to gh page
14 | 
15 | # Delete any existing temporary website clone.
16 | #rm -rf $DIR
17 | 
18 | # Clone the current repo into temp folder.
19 | #git clone $REPO $DIR
20 | 
21 | # Move working directory into temp folder.
22 | #cd $DIR
23 | 
24 | # Build the site and push the new files up to GitHub.
25 | mkdocs gh-deploy
26 | git checkout gh-pages
27 | git push
28 | 
29 | # Delete our temp folder.
30 | #cd ..
31 | #rm -rf $DIR
32 | 


--------------------------------------------------------------------------------
/amundsen-kube-helm/templates/helm/templates/service-neo4j.yaml:
--------------------------------------------------------------------------------
 1 | {{ if .Values.neo4j.enabled }}
 2 | apiVersion: v1
 3 | kind: Service
 4 | metadata:
 5 |   name: neo4j
 6 |   labels:
 7 |     app: {{ template "amundsen.name" . }}
 8 |     run: neo4j
 9 |     component: neo4j
10 |     chart: {{ template "amundsen.chart" . }}
11 |     release: {{ .Release.Name }}
12 |     heritage: {{ .Release.Service }}
13 | {{- with .Values.neo4j.annotations }}
14 |   annotations:
15 | {{ toYaml . | indent 4 }}
16 | {{- end}}   
17 | spec:
18 |   type: {{ .Values.neo4j.serviceType }}
19 |   selector:
20 |     app: {{ template "amundsen.name" . }}
21 |     component: neo4j
22 |     release: {{ .Release.Name }}
23 |   ports:
24 |     - port: 7473
25 |       name: neo4j-{{ .Values.environment }}-https
26 |       targetPort: 7473
27 |     - port: 7474
28 |       name: neo4j-{{ .Values.environment }}-http
29 |       targetPort: 7474
30 |     - port: 7687
31 |       name: neo4j-{{ .Values.environment }}-bolt
32 |       targetPort: 7687
33 |     - port: 1337
34 |       name: neo4j-{{ .Values.environment }}-shell
35 |       targetPort: 1337
36 | {{ end }}
37 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | <!---
 2 | Provide a general summary of your changes in the Title above
 3 | Include one of these prefixes:
 4 |   fix – Fixes an unexpected problem or unintended behavior
 5 |   feat – Adds a new feature
 6 |   docs – A documentation improvement task
 7 |   build – A task related to our build system
 8 |   ci – A task related to our ci system
 9 |   perf – A performance improvement
10 |   refactor – A code refactor PR
11 |   style – A task about styling
12 |   test – A PR that improve test coverage
13 |   chore – A regular maintenance chore or task
14 |   other – Any other kind of PR
15 | 
16 | Example: docs: Improves the documentation on...
17 | -->
18 | 
19 | ### Summary of Changes
20 | 
21 | <!-- Include a summary of changes -->
22 | 
23 | ### Documentation
24 | 
25 | <!-- What documentation did you add or modify and why? Add any relevant links -->
26 | 
27 | ### CheckList
28 | 
29 | Make sure you have checked **all** steps below to ensure a timely review.
30 | 
31 | - [ ] PR title addresses the issue accurately and concisely, including a title prefix.
32 | - [ ] PR includes a summary of changes.
33 | - [ ] My commits follow the guidelines from "[How to write a good git commit message](http://chris.beams.io/posts/git-commit/)"
34 | 


--------------------------------------------------------------------------------
/amundsen-kube-helm/templates/helm/templates/secret-oidc-config.yaml:
--------------------------------------------------------------------------------
 1 | {{- if .Values.frontEnd.createOidcSecret }}
 2 | apiVersion: v1
 3 | kind: Secret
 4 | metadata:
 5 |     name: oidc-config
 6 |     namespace: {{ .Release.Namespace }}
 7 | stringData:
 8 |   OIDC_CLIENT_SECRET: {{ .Values.frontEnd.OIDC_CLIENT_SECRET }}
 9 |   client_secrets.json: |-
10 |     {
11 |       "web": {
12 |         "client_id": "{{ .Values.frontEnd.OIDC_CLIENT_ID }}",
13 |         "client_secret": "{{ .Values.frontEnd.OIDC_CLIENT_SECRET }}",
14 |         "auth_uri": "{{ .Values.frontEnd.OIDC_ORG_URL }}/oauth2/{{ .Values.frontEnd.OIDC_AUTH_SERVER_ID }}/v1/authorize",
15 |         "token_uri": "{{ .Values.frontEnd.OIDC_ORG_URL }}/oauth2/{{ .Values.frontEnd.OIDC_AUTH_SERVER_ID }}/v1/token",
16 |         "issuer": "{{ .Values.frontEnd.OIDC_ORG_URL }}/oauth2/{{ .Values.frontEnd.OIDC_AUTH_SERVER_ID }}",
17 |         "userinfo_uri": "{{ .Values.frontEnd.OIDC_ORG_URL }}/oauth2/{{ .Values.frontEnd.OIDC_AUTH_SERVER_ID }}/v1/userinfo",
18 |         "redirect_uris": [
19 |           "http://localhost/oidc_callback"
20 |         ],
21 |         "token_introspection_uri": "{{ .Values.frontEnd.OIDC_ORG_URL }}/oauth2/{{ .Values.frontEnd.OIDC_AUTH_SERVER_ID }}/v1/introspect"
22 |       }
23 |     }
24 | {{- end }}
25 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug-report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug Report
 3 | about: Create a bug report
 4 | title: Bug Report
 5 | labels: bug
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | <!--- Provide a general summary of the issue in the Title above -->
11 | <!--- Look through existing open and closed issues to see if someone has reported the issue before -->
12 | 
13 | ## Expected Behavior
14 | <!--- Tell us what should happen -->
15 | 
16 | ## Current Behavior
17 | <!--- Tell us what happens instead of the expected behavior -->
18 | 
19 | ## Possible Solution
20 | <!--- Not obligatory, but suggest a fix/reason for the bug -->
21 | 
22 | ## Steps to Reproduce
23 | <!--- Provide a link to a live example, or an unambiguous set of steps to -->
24 | <!--- reproduce this bug. Include code to reproduce, if relevant -->
25 | 1.
26 | 2.
27 | 3.
28 | 4.
29 | 
30 | ## Screenshots (if appropriate)
31 | 
32 | ## Context
33 | <!--- How has this issue affected you? -->
34 | <!--- Providing context helps us come up with a solution that is most useful in the real world -->
35 | 
36 | ## Your Environment
37 | <!--- Include as many relevant details about the environment you experienced the bug in -->
38 | * Amunsen version used:
39 | * Data warehouse stores:
40 | * Deployment (k8s or native):
41 | * Link to your fork or repository:


--------------------------------------------------------------------------------
/amundsen-kube-helm/templates/restore-backup/restore-neo4j-pod.yaml:
--------------------------------------------------------------------------------
 1 |     apiVersion: v1
 2 |     kind: Pod
 3 |     metadata:
 4 |       name: restore-neo4j-from-latest
 5 |       annotations:
 6 |         fill_in_here_with_correct_key: fill_in_here_with_correct_value
 7 |     spec:
 8 |       containers:
 9 |       - name: restore-neo4j-from-latest
10 |         image: neo4j:3.3.0
11 |         command:
12 |          - "/bin/sh"
13 |          - "-c"
14 |          - |
15 |             apk -v --update add --no-cache --quiet curl python py-pip && pip install awscli -q
16 |             latest_backup=$(aws s3api list-objects-v2 --bucket "$BUCKET" --query 'reverse(sort_by(Contents, &LastModified))[:1].Key' --output=text)
17 |             aws s3 cp s3://$BUCKET/$latest_backup /tmp
18 |             tar -xf /tmp/$latest_backup -C /
19 |             data_file=`ls /data|grep \.data`
20 |             schema_file=`ls /data|grep \.schema`
21 |             ./bin/neo4j-shell -host neo4j -file /data/$schema_file
22 |             echo "CALL apoc.import.graphml('/data/$data_file', {useTypes: true, readLabels: true});" | /var/lib/neo4j/bin/neo4j-shell -host neo4j
23 |         env:
24 |           - name: BUCKET
25 |             value: s3://dev/null
26 |         volumeMounts:
27 |           - name: data
28 |             mountPath: /data
29 |       restartPolicy: OnFailure
30 |       volumes:
31 |         - name: data
32 |           persistentVolumeClaim:
33 |             claimName: neo4j-pvc
34 | 


--------------------------------------------------------------------------------
/amundsen-kube-helm/templates/helm/templates/_helpers.tpl:
--------------------------------------------------------------------------------
 1 | {{/* vim: set filetype=mustache: */}}
 2 | {{/*
 3 | Expand the name of the chart.
 4 | */}}
 5 | {{- define "amundsen.name" -}}
 6 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}}
 7 | {{- end -}}
 8 | 
 9 | {{/*
10 | Create a default fully qualified app name.
11 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
12 | If release name contains chart name it will be used as a full name.
13 | */}}
14 | {{- define "amundsen.fullname" -}}
15 | {{- if .Values.fullnameOverride -}}
16 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}}
17 | {{- else -}}
18 | {{- $name := default .Chart.Name .Values.nameOverride -}}
19 | {{- if contains $name .Release.Name -}}
20 | {{- .Release.Name | trunc 63 | trimSuffix "-" -}}
21 | {{- else -}}
22 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}}
23 | {{- end -}}
24 | {{- end -}}
25 | {{- end -}}
26 | 
27 | {{/*
28 | Create chart name and version as used by the chart label.
29 | */}}
30 | {{- define "amundsen.chart" -}}
31 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}}
32 | {{- end -}}
33 | 
34 | {{/*
35 | Common labels
36 | */}}
37 | {{- define "amundsen.labels" -}}
38 | app.kubernetes.io/name: {{ include "amundsen.name" . }}
39 | helm.sh/chart: {{ include "amundsen.chart" . }}
40 | app.kubernetes.io/instance: {{ .Release.Name }}
41 | {{- if .Chart.AppVersion }}
42 | app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
43 | {{- end }}
44 | app.kubernetes.io/managed-by: {{ .Release.Service }}
45 | {{- end -}}
46 | 


--------------------------------------------------------------------------------
/docs/tutorials/index-postgres.md:
--------------------------------------------------------------------------------
 1 | # How to index metadata for real life databases
 2 | 
 3 | From previous [doc](../installation.md), we have indexed tables from a csv files. In real production cases, 
 4 | the table metadata is stored in data warehouses(e.g Hive, Postgres, Mysql, Snowflake, Bigquery etc.) which Amundsen has 
 5 | the extractors for metadata extraction.
 6 | 
 7 | In this tutorial, we will use a postgres db as an example to walk through how to index metadata for a postgres database.
 8 | The doc won't cover how to setup a postgres database.
 9 | 
10 | 1. In the example, we have a postgres table in localhost postgres named `films`.
11 | ![](../img/tutorials/postgres.png)
12 | 
13 | 2. We leverage the [postgres metadata extractor](https://github.com/amundsen-io/amundsendatabuilder/blob/master/databuilder/extractor/postgres_metadata_extractor.py)
14 | to extract the metadata information of the postgres database. We could call the metadata extractor 
15 | in an adhoc python function as this [example](https://github.com/amundsen-io/amundsendatabuilder/pull/248/commits/f5064e58a19a5bfa380b333cfc657ebb34702a2c)
16 | or from an Airflow DAG.
17 | 
18 | 3. Once we run the script, we could search the `films` table using Amundsen Search.
19 | ![](../img/tutorials/search-postgres.png)
20 | 
21 | 4. We could also find and view the `films` table in the table detail page.
22 | ![](../img/tutorials/table-postgres.png)
23 | 
24 | This tutorial uses postgres to serve as an example, but you could apply the same approach for your various data warehouses. If Amundsen 
25 | doesn't provide the extractor, you could build one based on the API and contribute the extractor back to us!
26 | 


--------------------------------------------------------------------------------
/docs/tutorials/user-profiles.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ## People resources 
 3 | 
 4 | ### What can I do with User Resources? 
 5 | User profile pages and the ability to bookmark/favorite and search for users is also available as of now. See a demo of what they feels like from an end user viewpoint from around the 36 minute mark of [this September 2019 talk](https://youtu.be/Gr3-RfWn49A?t=36m00s) - so you could actually argue that this video snippet can work as an end user guide.
 6 | 
 7 | ### How do I enable User pages?
 8 | 
 9 | The configuration to have `Users` available consists of:
10 | 
11 | 1. Enable the users profile page index and display feature by performing [this frontend configuration](../../frontend/docs/application_config#index-users)
12 | 
13 | 2. There are two different alternative ways to populate user profile data. You can either:
14 | 
15 |     * Configure the Metadata service to a do a [live lookup](../../metadata/docs/configurations#user_detail_method-optional) in some directory service, like LDAP or a HR system.
16 | 
17 |     * Setup ongoing ingest of user profile data as they onboard/change/offboard into Neo4j and Elasticsearch effectively caching it with the pros/cons of that (similar to what the Databuilder sample loader does from user CSV, see the “pre-cooked demo data” link in the [Architecture overview](../../architecture#databuilder)
18 | 
19 |     !!! note
20 |         Currently, for both of these options Amundsen _only_ provides these hooks/interfaces to add your own implementation. If you build something you think is generally useful, contributions are welcome!
21 | 
22 | 3. Configure login, according to the [Authentication guide](../../authentication/oidc)
23 | 
24 | 


--------------------------------------------------------------------------------
/amundsen-kube-helm/templates/helm/templates/configmap-neo4j.yaml:
--------------------------------------------------------------------------------
 1 |   
 2 | {{ if .Values.neo4j.enabled }}
 3 | apiVersion: v1
 4 | kind: ConfigMap
 5 | metadata:
 6 |   name: neo4j-configmap
 7 |   labels:
 8 |     app: {{ template "amundsen.name" . }}
 9 |     component: neo4j
10 |     chart: {{ template "amundsen.chart" . }}
11 |     release: {{ .Release.Name }}
12 |     heritage: {{ .Release.Service }}
13 | data:
14 |   neo4j.conf: |-
15 |     apoc.export.file.enabled=true
16 |     apoc.import.file.enabled=true
17 |     cypher.forbid_shortestpath_common_nodes=false
18 |     dbms.connector.bolt.enabled=true
19 |     dbms.connector.bolt.listen_address=:7687
20 |     dbms.connector.bolt.tls_level=OPTIONAL
21 |     dbms.connector.http.enabled=true
22 |     dbms.connector.https.enabled=true
23 |     dbms.connectors.default_listen_address=0.0.0.0
24 |     dbms.directories.import=/mnt
25 |     dbms.jvm.additional=-Djdk.tls.ephemeralDHKeySize=2048
26 |     dbms.jvm.additional=-Dunsupported.dbms.udc.source=tarball
27 |     dbms.jvm.additional=-XX:+AlwaysPreTouch
28 |     dbms.jvm.additional=-XX:+DisableExplicitGC
29 |     dbms.jvm.additional=-XX:+UseG1GC
30 |     dbms.logs.query.enabled=true
31 |     dbms.logs.query.rotation.keep_number=7
32 |     dbms.logs.query.rotation.size=20m
33 |     dbms.memory.heap.initial_size={{ .Values.neo4j.config.dbms.heap_initial_size }}
34 |     dbms.memory.heap.max_size={{ .Values.neo4j.config.dbms.heap_max_size }}
35 |     dbms.memory.pagecache.size={{ .Values.neo4j.config.dbms.pagecache_size }}
36 |     dbms.security.allow_csv_import_from_file_urls=true
37 |     dbms.security.auth_enabled=false
38 |     dbms.security.procedures.unrestricted=algo.*,apoc.*
39 |     dbms.shell.enabled=true
40 |     dbms.shell.host=0.0.0.0
41 |     dbms.windows_service_name=neo4j
42 | {{ end }}
43 | 


--------------------------------------------------------------------------------
/docker-amundsen-atlas.yml:
--------------------------------------------------------------------------------
 1 | version: '3'
 2 | services:
 3 |   atlas:
 4 |     # the image comes from https://github.com/ing-bank/rokku-dev-apache-atlas
 5 |     # it might take some time for Atlas to start
 6 |     image: wbaa/rokku-dev-apache-atlas:0.1.5
 7 |     container_name: atlas
 8 |     ports:
 9 |       - 21000:21000
10 |     networks:
11 |       - amundsennet
12 |     environment:
13 |       - ATLAS_KICKSTART_AMUNDSEN=true
14 |   amundsensearch:
15 |     build:
16 |       context: ./amundsensearchlibrary
17 |       dockerfile: public.Dockerfile
18 |     container_name: amundsensearch
19 |     ports:
20 |       - 5001:5001
21 |     environment:
22 |       - CREDENTIALS_PROXY_USER=admin
23 |       - CREDENTIALS_PROXY_PASSWORD=admin
24 |       - PROXY_ENDPOINT=http://atlas:21000
25 |       - PROXY_CLIENT=ATLAS
26 |     networks:
27 |       - amundsennet
28 |   amundsenmetadata:
29 |     build:
30 |       context: ./amundsenmetadatalibrary
31 |       dockerfile: public.Dockerfile
32 |     container_name: amundsenmetadata
33 |     ports:
34 |       - 5002:5002
35 |     networks:
36 |       - amundsennet
37 |     environment:
38 |       - CREDENTIALS_PROXY_USER=admin
39 |       - CREDENTIALS_PROXY_PASSWORD=admin
40 |       - PROXY_HOST=http://atlas
41 |       - PROXY_PORT=21000
42 |       - PROXY_CLIENT=ATLAS
43 |   amundsenfrontend:
44 |     build:
45 |       context: ./amundsenfrontendlibrary
46 |       args:
47 |         SEARCHSERVICE_BASE: http://amundsensearch:5001
48 |         METADATASERVICE_BASE: http://amundsenmetadata:5002
49 |       dockerfile: local.Dockerfile
50 |     container_name: amundsenfrontend
51 |     depends_on:
52 |       - amundsenmetadata
53 |       - amundsensearch
54 |     ports:
55 |       - 5000:5000
56 |     networks:
57 |       - amundsennet
58 |     environment:
59 |       - METADATASERVICE_BASE=http://amundsenmetadata:5002
60 |       - SEARCHSERVICE_BASE=http://amundsensearch:5001
61 | networks:
62 |   amundsennet:
63 | 


--------------------------------------------------------------------------------
/CONTRIBUTORS.md:
--------------------------------------------------------------------------------
 1 | # Contributors
 2 | 
 3 | Here is a list (WIP) of contributors to Amundsen, listing their contributions following this [emoji system](https://allcontributors.org/docs/en/emoji-key). This list is in progress, so feel free to use the [All Contributors bot](https://allcontributors.org/docs/en/bot/usage) to update it with the missing contributors!
 4 | 
 5 | <!-- ALL-CONTRIBUTORS-LIST:START - Do not remove or modify this section -->
 6 | <!-- prettier-ignore-start -->
 7 | <!-- markdownlint-disable -->
 8 | <table>
 9 |   <tr>
10 |     <td align="center"><a href="https://www.linkedin.com/in/tamika-tannis/"><img src="https://avatars2.githubusercontent.com/u/1790900?v=4" width="100px;" alt=""/><br /><sub><b>Tamika Tannis</b></sub></a><br /><a href="https://github.com/amundsen-io/amundsen/issues?q=author%3Attannis" title="Bug reports">🐛</a> <a href="https://github.com/amundsen-io/amundsen/commits?author=ttannis" title="Code">💻</a> <a href="#content-ttannis" title="Content">🖋</a> <a href="https://github.com/amundsen-io/amundsen/commits?author=ttannis" title="Documentation">📖</a> <a href="#example-ttannis" title="Examples">💡</a> <a href="#ideas-ttannis" title="Ideas, Planning, & Feedback">🤔</a> <a href="#infra-ttannis" title="Infrastructure (Hosting, Build-Tools, etc)">🚇</a> <a href="#maintenance-ttannis" title="Maintenance">🚧</a> <a href="#platform-ttannis" title="Packaging/porting to new platform">📦</a> <a href="#plugin-ttannis" title="Plugin/utility libraries">🔌</a> <a href="#projectManagement-ttannis" title="Project Management">📆</a> <a href="#question-ttannis" title="Answering Questions">💬</a> <a href="https://github.com/amundsen-io/amundsen/pulls?q=is%3Apr+reviewed-by%3Attannis" title="Reviewed Pull Requests">👀</a> <a href="#security-ttannis" title="Security">🛡️</a> <a href="#tool-ttannis" title="Tools">🔧</a> <a href="https://github.com/amundsen-io/amundsen/commits?author=ttannis" title="Tests">⚠️</a> <a href="#tutorial-ttannis" title="Tutorials">✅</a></td>
11 |   </tr>
12 | </table>
13 | 
14 | <!-- markdownlint-enable -->
15 | <!-- prettier-ignore-end -->
16 | 
17 | <!-- ALL-CONTRIBUTORS-LIST:END -->
18 | 


--------------------------------------------------------------------------------
/docker-amundsen-local.yml:
--------------------------------------------------------------------------------
 1 | version: '3'
 2 | services:
 3 |   neo4j:
 4 |       image: neo4j:3.3.0
 5 |       container_name: neo4j_amundsen
 6 |       environment:
 7 |         - NEO4J_AUTH=neo4j/test
 8 |       ulimits:
 9 |         nofile:
10 |           soft: 40000
11 |           hard: 40000
12 |       ports:
13 |           - 7474:7474
14 |           - 7687:7687
15 |       volumes:
16 |           - ./example/docker/neo4j/conf:/conf
17 |           - ./.local/neo4j/data:/neo4j/data
18 |       networks:
19 |         - amundsennet
20 |   elasticsearch:
21 |       image: elasticsearch:6.7.0
22 |       container_name: es_amundsen
23 |       ports:
24 |           - 9200:9200
25 |       networks:
26 |         - amundsennet
27 |       ulimits:
28 |         nofile:
29 |           soft: 65536
30 |           hard: 65536
31 |       volumes:
32 |         - ./.local/elasticsearch/data:/usr/share/elasticsearch/data
33 |   amundsensearch:
34 |       build:
35 |         context: ./amundsensearchlibrary
36 |         dockerfile: public.Dockerfile
37 |       container_name: amundsensearch
38 |       ports:
39 |         - 5001:5001
40 |       depends_on:
41 |         - elasticsearch
42 |       networks:
43 |         - amundsennet
44 |       environment:
45 |         - PROXY_ENDPOINT=es_amundsen
46 |   amundsenmetadata:
47 |       build:
48 |         context: ./amundsenmetadatalibrary
49 |         dockerfile: public.Dockerfile
50 |       container_name: amundsenmetadata
51 |       depends_on:
52 |         - neo4j
53 |       ports:
54 |         - 5002:5002
55 |       networks:
56 |         - amundsennet
57 |       environment:
58 |          - PROXY_HOST=bolt://neo4j_amundsen
59 |   amundsenfrontend:
60 |       build:
61 |         context: ./amundsenfrontendlibrary
62 |         args:
63 |           SEARCHSERVICE_BASE: http://amundsensearch:5001
64 |           METADATASERVICE_BASE: http://amundsenmetadata:5002
65 |         dockerfile: local.Dockerfile
66 |       container_name: amundsenfrontend
67 |       depends_on:
68 |         - amundsenmetadata
69 |         - amundsensearch
70 |       ports:
71 |         - 5000:5000
72 |       networks:
73 |         - amundsennet
74 |       environment:
75 |         - SEARCHSERVICE_BASE=http://amundsensearch:5001
76 |         - METADATASERVICE_BASE=http://amundsenmetadata:5002
77 | 
78 | networks:
79 |   amundsennet:
80 | 


--------------------------------------------------------------------------------
/docs/architecture.md:
--------------------------------------------------------------------------------
 1 | # Architecture
 2 | 
 3 | The following diagram shows the overall architecture for Amundsen.
 4 | ![](img/Amundsen_Architecture.png)
 5 | 
 6 | ## Frontend
 7 | 
 8 | The [frontend service](https://github.com/amundsen-io/amundsenfrontendlibrary#amundsen-frontend-service) serves as web UI portal for users interaction.
 9 | It is Flask-based web app which representation layer is built with React with Redux, Bootstrap, Webpack, and Babel.
10 | 
11 | ## Search
12 | 
13 | The [search service](https://github.com/amundsen-io/amundsensearchlibrary#amundsen-search-service) proxy leverages Elasticsearch's search functionality (or Apache Atlas's search API, if that's the backend you picked) and
14 | provides a RESTful API to serve search requests from the frontend service. This [API is documented and live explorable](https://github.com/amundsen-io/amundsensearchlibrary#api-documentation) through OpenAPI aka "Swagger".
15 | Currently only [table resources](https://github.com/amundsen-io/amundsendatabuilder/blob/master/databuilder/models/elasticsearch_document.py) are indexed and searchable.
16 | The search index is built with the [databuilder elasticsearch publisher](https://github.com/amundsen-io/amundsendatabuilder/blob/master/databuilder/publisher/elasticsearch_publisher.py).
17 | 
18 | ## Metadata
19 | 
20 | The [metadata service](https://github.com/amundsen-io/amundsenmetadatalibrary#amundsen-metadata-service) currently uses a Neo4j proxy to interact with Neo4j graph db and serves frontend service's metadata.
21 | The metadata is represented as a graph model:
22 | ![](img/graph_model.png)
23 | The above diagram shows how metadata is modeled in Amundsen.
24 | 
25 | ## Databuilder
26 | 
27 | Amundsen provides a [data ingestion library](https://github.com/amundsen-io/amundsendatabuilder) for building the metadata. At Lyft, we build the metadata once a day
28 | using an Airflow DAG ([examples](https://github.com/amundsen-io/amundsendatabuilder/tree/master/example/dags)).
29 | 
30 | In addition to "real use" the databuilder is also employed as a handy tool to ingest some ["pre-cooked" demo data](https://github.com/amundsen-io/amundsendatabuilder/blob/master/example/sample_data/) used in the Quickstart guide. This allows you to have a supersmall sample of data to explore so many of the features in Amundsen are lit up without you even having to setup any connections to databases etc. to ingest real data.
31 | 


--------------------------------------------------------------------------------
/amundsen-kube-helm/templates/helm/templates/deployment-search.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | apiVersion: apps/v1
 3 | kind: Deployment
 4 | metadata:
 5 |   name: {{ template "amundsen.fullname" . }}-{{ .Values.search.serviceName }}
 6 |   labels:
 7 |     app: {{ template "amundsen.name" . }}
 8 |     component: {{ .Values.search.serviceName }}
 9 |     chart: {{ template "amundsen.chart" . }}
10 |     release: {{ .Release.Name }}
11 |     heritage: {{ .Release.Service }}
12 | spec:
13 |   selector:
14 |     matchLabels:
15 |       app: {{ template "amundsen.name" . }}
16 |       component: {{ .Values.search.serviceName }}
17 |       release: {{ .Release.Name }}
18 |   replicas: {{ default 1 .Values.search.replicas }}
19 |   template:
20 |     metadata:
21 |       {{- with default .Values.podAnnotations .Values.search.podAnnotations }}
22 |       annotations:
23 | {{ toYaml . | indent 8 }}
24 |       {{- end }}
25 |       labels:
26 |         app: {{ template "amundsen.name" . }}
27 |         component: {{ .Values.search.serviceName }}
28 |         release: {{ .Release.Name }}
29 |     spec:
30 |       {{- with default .Values.nodeSelector .Values.search.nodeSelector }}
31 |       nodeSelector:
32 | {{ toYaml . | indent 8 }}
33 |       {{- end }}
34 |       {{- with default .Values.affinity .Values.search.affinity }}
35 |       affinity:
36 | {{ toYaml . | indent 8 }}
37 |       {{- end }}
38 |       {{- with default .Values.tolerations .Values.search.tolerations }}
39 |       tolerations:
40 | {{ toYaml . | indent 8 }}
41 |       {{- end }}
42 |       containers:
43 |       - name: {{ .Chart.Name }}-{{ .Values.search.serviceName }}
44 |         image: {{ .Values.search.image }}:{{ .Values.search.imageTag }}
45 |         ports:
46 |         - containerPort: 5001
47 |         env:
48 |         - name: PROXY_ENDPOINT
49 |           value: {{ if .Values.search.elasticsearchEndpoint }}{{ .Values.search.elasticsearchEndpoint }}{{ else }}{{ .Release.Name }}-elasticsearch-client.{{ .Release.Namespace }}.svc.cluster.local{{ end }}
50 |         livenessProbe:
51 |           httpGet:
52 |             path: "/healthcheck"
53 |             port: 5001
54 |           initialDelaySeconds: 60
55 |           periodSeconds: 60
56 |           timeoutSeconds: 1
57 |           successThreshold: 1
58 |           failureThreshold: 5
59 |         {{- with .Values.search.resources }}
60 |         resources:
61 | {{ toYaml . | indent 10 }}
62 |         {{- end }}
63 | 


--------------------------------------------------------------------------------
/docs/img/logos/amundsen_logo_on_light.svg:
--------------------------------------------------------------------------------
1 | <svg id="Layer_1" data-name="Layer 1" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 766.62 231"><defs><style>.cls-1{fill:#ff5d52;}</style></defs><title>amundsen_logo_on_light</title><path class="cls-1" d="M98.93,57.31C68.34,81.15,49.47,126.58,49.47,173.69c34.69,0,49.46-22.95,49.46-22.95s14.77,22.95,49.47,22.95C148.4,126.58,129.53,81.15,98.93,57.31Zm0,83S87.08,151.05,71,151.05c3.74-25.8,13.71-49.46,27.93-66.22,14.22,16.76,24.19,40.42,27.93,66.22C110.79,151.05,98.93,140.35,98.93,140.35Z"/><path d="M236.24,139.48H205.1L199.82,154H184.53L213,77H228.2l28.61,77H241.52Zm-5-13.64L220.61,96.47l-10.67,29.37Z"/><path d="M350.08,119.9V154H336.55V121.11c0-6.82-4.73-11.55-11.22-11.55A14.68,14.68,0,0,0,313.78,115V154H300.25V121.11c0-6.82-4.84-11.55-11.33-11.55A14.32,14.32,0,0,0,277.48,115V154H264V97.9h12.87v5.17c3-4.07,9-6.27,15.29-6.27,7,0,13.64,3.19,16.83,7.48A28.76,28.76,0,0,1,328.3,96.8C342.27,96.8,350.08,106.59,350.08,119.9Z"/><path d="M412.89,97.9V154H400.13v-5.17A23.13,23.13,0,0,1,383.85,155c-12,0-22.77-7.81-22.77-23.1v-34h13.53v32.89c0,7.26,6.16,11.55,12.65,11.55a15.48,15.48,0,0,0,12.1-5.72V97.9Z"/><path d="M476.8,119.9V154H463.27V121.11c0-7.37-5.5-11.55-12.76-11.55-4.62,0-9.24,1.76-12,5.61V154H425V97.9h12.87v5.17c4-4.18,9.68-6.27,16.17-6.27C466.68,96.8,476.8,104.61,476.8,119.9Z"/><path d="M541.15,75.79V154H528.39V148c-4,5.06-10.56,7-16.72,7-15.18,0-27.17-11.66-27.17-29.26,0-16.94,11.66-28.93,27.5-28.93,6.16,0,11.77,1.76,15.62,5.94V75.79Zm-13.53,39.49a19.85,19.85,0,0,0-13.86-5.72c-9.24,0-15.95,6.16-15.95,16,0,10.12,6.71,16.83,16.17,16.83a18.83,18.83,0,0,0,13.64-5.83Z"/><path d="M552.37,136c5.83,4.51,14.85,7.37,22.22,7.37,6.6,0,9.13-2.31,9.13-5.17,0-4.62-6.16-5.06-13.2-6.49-9-1.76-18.37-4.84-18.37-16.94,0-11.33,8.25-17.93,22.11-17.93a34.56,34.56,0,0,1,17.6,4.51v12.32a37.61,37.61,0,0,0-17.71-5.06c-6.16,0-9.46,2.53-9.46,5.94,0,4.4,4.73,4.84,12.65,6.38,8.47,1.65,19,4,19,16,0,10.78-8.91,18.15-22.44,18.15a41,41,0,0,1-21.56-6.16Z"/><path d="M654.89,131H616.5c.44,7.37,7.92,12.32,17,12.32,7,0,13.86-2.86,19.8-6.82v12.1c-6.27,4.51-14.19,6.38-21.45,6.38C614.41,155,603,143.33,603,125.73c0-17.38,11.33-28.93,27.28-28.93,15.4,0,24.64,10.56,24.64,27.61Zm-38.28-10.12h25.08c0-7.48-4.73-12-11.55-12S617.27,113.08,616.61,120.89Z"/><path d="M717.15,119.9V154H703.62V121.11c0-7.37-5.5-11.55-12.76-11.55-4.62,0-9.24,1.76-12,5.61V154H665.34V97.9h12.87v5.17c4-4.18,9.68-6.27,16.17-6.27C707,96.8,717.15,104.61,717.15,119.9Z"/></svg>


--------------------------------------------------------------------------------
/docs/img/logos/amundsen_logo_on_blue.svg:
--------------------------------------------------------------------------------
1 | <svg id="Layer_1" data-name="Layer 1" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 766.62 231"><defs><style>.cls-1{fill:#ff5d52;}.cls-2{fill:#fff;}</style></defs><title>amundsen_logo_on_blue</title><path class="cls-1" d="M98.93,57.31C68.34,81.15,49.47,126.58,49.47,173.69c34.69,0,49.46-22.95,49.46-22.95s14.77,22.95,49.47,22.95C148.4,126.58,129.53,81.15,98.93,57.31Zm0,83S87.08,151.05,71,151.05c3.74-25.8,13.71-49.46,27.93-66.22,14.22,16.76,24.19,40.42,27.93,66.22C110.79,151.05,98.93,140.35,98.93,140.35Z"/><path class="cls-2" d="M236.24,139.48H205.1L199.82,154H184.53L213,77H228.2l28.61,77H241.52Zm-5-13.64L220.61,96.47l-10.67,29.37Z"/><path class="cls-2" d="M350.08,119.9V154H336.55V121.11c0-6.82-4.73-11.55-11.22-11.55A14.68,14.68,0,0,0,313.78,115V154H300.25V121.11c0-6.82-4.84-11.55-11.33-11.55A14.32,14.32,0,0,0,277.48,115V154H264V97.9h12.87v5.17c3-4.07,9-6.27,15.29-6.27,7,0,13.64,3.19,16.83,7.48A28.76,28.76,0,0,1,328.3,96.8C342.27,96.8,350.08,106.59,350.08,119.9Z"/><path class="cls-2" d="M412.89,97.9V154H400.13v-5.17A23.13,23.13,0,0,1,383.85,155c-12,0-22.77-7.81-22.77-23.1v-34h13.53v32.89c0,7.26,6.16,11.55,12.65,11.55a15.48,15.48,0,0,0,12.1-5.72V97.9Z"/><path class="cls-2" d="M476.8,119.9V154H463.27V121.11c0-7.37-5.5-11.55-12.76-11.55-4.62,0-9.24,1.76-12,5.61V154H425V97.9h12.87v5.17c4-4.18,9.68-6.27,16.17-6.27C466.68,96.8,476.8,104.61,476.8,119.9Z"/><path class="cls-2" d="M541.15,75.79V154H528.39V148c-4,5.06-10.56,7-16.72,7-15.18,0-27.17-11.66-27.17-29.26,0-16.94,11.66-28.93,27.5-28.93,6.16,0,11.77,1.76,15.62,5.94V75.79Zm-13.53,39.49a19.85,19.85,0,0,0-13.86-5.72c-9.24,0-15.95,6.16-15.95,16,0,10.12,6.71,16.83,16.17,16.83a18.83,18.83,0,0,0,13.64-5.83Z"/><path class="cls-2" d="M552.37,136c5.83,4.51,14.85,7.37,22.22,7.37,6.6,0,9.13-2.31,9.13-5.17,0-4.62-6.16-5.06-13.2-6.49-9-1.76-18.37-4.84-18.37-16.94,0-11.33,8.25-17.93,22.11-17.93a34.56,34.56,0,0,1,17.6,4.51v12.32a37.61,37.61,0,0,0-17.71-5.06c-6.16,0-9.46,2.53-9.46,5.94,0,4.4,4.73,4.84,12.65,6.38,8.47,1.65,19,4,19,16,0,10.78-8.91,18.15-22.44,18.15a41,41,0,0,1-21.56-6.16Z"/><path class="cls-2" d="M654.89,131H616.5c.44,7.37,7.92,12.32,17,12.32,7,0,13.86-2.86,19.8-6.82v12.1c-6.27,4.51-14.19,6.38-21.45,6.38C614.41,155,603,143.33,603,125.73c0-17.38,11.33-28.93,27.28-28.93,15.4,0,24.64,10.56,24.64,27.61Zm-38.28-10.12h25.08c0-7.48-4.73-12-11.55-12S617.27,113.08,616.61,120.89Z"/><path class="cls-2" d="M717.15,119.9V154H703.62V121.11c0-7.37-5.5-11.55-12.76-11.55-4.62,0-9.24,1.76-12,5.61V154H665.34V97.9h12.87v5.17c4-4.18,9.68-6.27,16.17-6.27C707,96.8,717.15,104.61,717.15,119.9Z"/></svg>


--------------------------------------------------------------------------------
/docker-amundsen.yml:
--------------------------------------------------------------------------------
 1 | version: '3'
 2 | services:
 3 |   neo4j:
 4 |       image: neo4j:3.3.0
 5 |       container_name: neo4j_amundsen
 6 |       environment:
 7 |         - NEO4J_AUTH=neo4j/test
 8 |       ulimits:
 9 |         nofile:
10 |           soft: 40000
11 |           hard: 40000
12 |       ports:
13 |           - 7474:7474
14 |           - 7687:7687
15 |       volumes:
16 |           - ./example/docker/neo4j/conf:/conf
17 |           - ./example/docker/neo4j/plugins:/plugins
18 |           - ./example/backup:/backup
19 |           - neo4j_data:/neo4j/data
20 |       networks:
21 |         - amundsennet
22 |   elasticsearch:
23 |       image: elasticsearch:6.7.0
24 |       container_name: es_amundsen
25 |       ports:
26 |           - 9200:9200
27 |       volumes:
28 |         - es_data:/usr/share/elasticsearch/data
29 |       networks:
30 |         - amundsennet
31 |       ulimits:
32 |         nofile:
33 |           soft: 65536
34 |           hard: 65536
35 |   amundsensearch:
36 |       image: amundsendev/amundsen-search:2.4.1
37 |       container_name: amundsensearch
38 |       ports:
39 |         - 5001:5000
40 |       depends_on:
41 |         - elasticsearch
42 |       networks:
43 |         - amundsennet
44 |       environment:
45 |         - PROXY_ENDPOINT=es_amundsen
46 |       command: gunicorn -w 2 --bind :5000 search_service.search_wsgi
47 |   amundsenmetadata:
48 |       image: amundsendev/amundsen-metadata:3.0.0
49 |       container_name: amundsenmetadata
50 |       depends_on:
51 |         - neo4j
52 |       ports:
53 |         - 5002:5000
54 |       networks:
55 |         - amundsennet
56 |       environment:
57 |          - PROXY_HOST=bolt://neo4j_amundsen
58 |       command: gunicorn -w 2 --bind :5000 metadata_service.metadata_wsgi
59 |   amundsenfrontend:
60 |       image: amundsendev/amundsen-frontend:3.0.0
61 |       container_name: amundsenfrontend
62 |       depends_on:
63 |         - amundsenmetadata
64 |         - amundsensearch
65 |       ports:
66 |         - 5000:5000
67 |       networks:
68 |         - amundsennet
69 |       environment:
70 |         - SEARCHSERVICE_BASE=http://amundsensearch:5000
71 |         - METADATASERVICE_BASE=http://amundsenmetadata:5000
72 |         # Only for easy config-less Quickstart bookmark evalutation. `TestConfig` extends ordinary `LocalConfig` by
73 |         # defining `AUTH_USER_METHOD` to a hardcoded dummy user in `amundsen_application.tests.test_utils.get_test_user()`
74 |         # See further docs in https://github.com/amundsen-io/amundsenfrontendlibrary/blob/master/docs/configuration.md#flask
75 |         # and https://github.com/amundsen-io/amundsenfrontendlibrary/blob/master/docs/configuration.md#authentication
76 |         - FRONTEND_SVC_CONFIG_MODULE_CLASS=amundsen_application.config.TestConfig
77 |       command: gunicorn -w 2 --bind :5000 amundsen_application.wsgi
78 | 
79 | networks:
80 |   amundsennet:
81 | 
82 | volumes:
83 |   es_data:
84 |   neo4j_data:
85 | 


--------------------------------------------------------------------------------
/docs/installation-aws-ecs/aws-ecs-deployment.md:
--------------------------------------------------------------------------------
 1 | # Deployment of non-production Amundsen on AWS ECS using aws-cli
 2 | 
 3 | The following is a set of instructions to run Amundsen on AWS Elastic Container Service. The current configuration is very basic but it is working. It is a migration of the docker-amundsen.yml to run on AWS ECS.
 4 | 
 5 | ## Install ECS CLI
 6 | 
 7 | The first step is to install ECS CLI, please follow the instructions from AWS [documentation](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/ECS_CLI_installation.html)
 8 | 
 9 | ### Get your access and secret keys from IAM
10 | 
11 | ```bash
12 | # in ~/<your-path-to-cloned-repo>/amundsenfrontendlibrary/docs/instalation-aws-ecs
13 | $ export AWS_ACCESS_KEY_ID=xxxxxxxx
14 | $ export AWS_SECRET_ACCESS_KEY=xxxxxx
15 | $ export AWS_PROFILE=profilename
16 | ```
17 | 
18 | For the purpose of this instruction we used the [tutorial](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/ecs-cli-tutorial-ec2.html#ECS_CLI_tutorial_compose_create) on AWS documentation
19 | 
20 | 
21 | ## STEP 1: Create a cluster configuration:
22 | 
23 | ```bash
24 | # in ~/<your-path-to-cloned-repo>/amundsenfrontendlibrary/docs/instalation-aws-ecs
25 | $ ecs-cli configure --cluster amundsen --region us-west-2 --default-launch-type EC2 --config-name amundsen
26 | ```
27 | 
28 | ### STEP 2: Create a profile using your access key and secret key:
29 | 
30 | ```bash
31 | # in ~/<your-path-to-cloned-repo>/amundsenfrontendlibrary/docs/instalation-aws-ecs
32 | $ ecs-cli configure profile --access-key $AWS_ACCESS_KEY_ID --secret-key $AWS_SECRET_ACCESS_KEY --profile-name amundsen
33 | ```
34 | 
35 | ### STEP 3: Create the Cluster Use profile name from \~/.aws/credentials
36 | 
37 | ```bash
38 | # in ~/<your-path-to-cloned-repo>/amundsenfrontendlibrary/docs/instalation-aws-ecs
39 | $ ecs-cli up --keypair JoaoCorreia --extra-user-data userData.sh --capability-iam --size 1 --instance-type t2.large --cluster-config amundsen --verbose --force --aws-profile $AWS_PROFILE
40 | ```
41 | 
42 | ### STEP 4: Deploy the Compose File to a Cluster
43 | 
44 | ```bash
45 | # in ~/<your-path-to-cloned-repo>/amundsenfrontendlibrary/docs/instalation-aws-ecs
46 | $ ecs-cli compose --cluster-config amundsen --file docker-ecs-amundsen.yml up --create-log-groups
47 | ```
48 | 
49 | You can use the ECS CLI to see what tasks are running.
50 | 
51 | ```bash
52 | $ ecs-cli ps
53 | ```
54 | 
55 | ### STEP 5 Open the EC2 Instance
56 | 
57 | Edit the Security Group to allow traffic to your IP, you should be able to see the frontend, elasticsearch and neo4j by visiting the URLs:
58 | 
59 | - http://xxxxxxx:5000/
60 | - http://xxxxxxx:9200/
61 | - http://xxxxxxx:7474/browser/
62 | 
63 | ## TODO
64 | 
65 | - Configuration sent to services not working properly (amunsen.db vs graph.db)
66 | - Create a persistent volume for graph/metadata storage. [See this](https://aws.amazon.com/blogs/compute/amazon-ecs-and-docker-volume-drivers-amazon-ebs/)
67 | - Refactor the VPC and default security group permissions
68 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing Guide
 2 | 
 3 | ## Reporting an Issue
 4 | 
 5 | The easiest way you can contribute to Amundsen is by creating issues. For that, please use the [issues][issues] section of the Amundsen repository and search for a similar problem. If you don't find it, submit your bug, question, proposal or feature request.
 6 | 
 7 | In the case of bugs, please be descriptive and, if possible, include a screenshot of the issue.
 8 | 
 9 | ## Creating Pull Requests
10 | 
11 | Before sending a Pull Request with significant changes, please use the [issue tracker][issues] to discuss the potential improvements you want to make.
12 | 
13 | ## First-Time Contributors
14 | 
15 | If this is your first contribution to open source, you can [follow this tutorial][contributionTutorial] or check [this video series][contributionVideos] to learn about the contribution workflow with GitHub.
16 | 
17 | We always have tickets labeled ['good first issue'][goodFirstIssues] and ['help wanted'][helpWantedIssues]. These are a great starting point if you want to contribute. Don't hesitate to ask questions about the issue if you are not sure about the strategy to follow.
18 | 
19 | ## Requesting a Feature
20 | 
21 | We have created a [Roadmap][roadmap] document with our plans for next releases, however, we are open to hear your ideas for new features!
22 | 
23 | For that, you can create an issue and select the "Feature Proposal" template. Fill in as much information as possible, and if you can, add responses to the following questions:
24 | 
25 | - We'll need to add a new model or change any existing model?
26 | - What would the Migration Plan look like? Will it be backwards-compatible?
27 | - Which alternatives did you consider?
28 | 
29 | ## Setup
30 | 
31 | To start contributing to Amundsen, you need to set up your machine to develop with the project. For that, we have prepareda a [Developer Guide][developerGuide] that will guide you to set up your environment to develop locally with Amundsen.
32 | 
33 | ## Get Recognition
34 | 
35 | You can add yourself or somebody else to the contributors list by using the [All Contributors bot][allContributorsBot].
36 | 
37 | ## Next Steps
38 | 
39 | Once you have your environment set and ready to go, you can check our [documentation][documentationHomepage] and the project's [Roadmap][roadmap] to see what's coming.
40 | 
41 | [issues]: https://github.com/amundsen-io/amundsen/issues
42 | [allContributorsBot]: https://allcontributors.org/docs/en/bot/usage
43 | [contributionTutorial]: https://github.com/firstcontributions/first-contributions#first-contributions
44 | [contributionVideos]: https://egghead.io/courses/how-to-contribute-to-an-open-source-project-on-github
45 | [goodFirstIssues]: https://github.com/amundsen-io/amundsen/labels/good%20first%20issue
46 | [helpWantedIssues]: https://github.com/amundsen-io/amundsen/labels/help%20wanted
47 | [developerGuide]: https://www.amundsen.io/amundsen/developer_guide/
48 | [roadmap]: https://www.amundsen.io/amundsen/roadmap/
49 | [documentationHomepage]: https://www.amundsen.io/amundsen/
50 | 


--------------------------------------------------------------------------------
/docs/installation-aws-ecs/docker-ecs-amundsen.yml:
--------------------------------------------------------------------------------
 1 | version: '3'
 2 | services:
 3 |   neo4j:
 4 |       image: neo4j:3.3.0
 5 |       container_name: neo4j_amundsen
 6 |       environment:
 7 |         - NEO4J_AUTH=neo4j/test
 8 |         # These dont seem to be working though!
 9 |         - NEO4J_dbms.active_database=amundsen.db
10 |         - NEO4J_dbms.directories.data=/neo4j/data
11 |         - NEO4J_dbms.directories.logs=/var/log/neo4j
12 |         - NEO4J_dbms.directories.import=/var/lib/neo4j/import
13 |         - NEO4J_dbms.security.auth_enabled=false
14 |         - NEO4J_dbms.connectors.default_listen_address=0.0.0.0
15 |       ulimits:
16 |         nofile:
17 |           soft: 40000
18 |           hard: 40000
19 |       ports:
20 |           - 7474:7474
21 |           - 7687:7687
22 |       logging:
23 |         driver: awslogs
24 |         options: 
25 |           awslogs-group: amundsen-neo4j
26 |           awslogs-region: us-west-2
27 |           awslogs-stream-prefix: amundsen-neo4j
28 | 
29 |   elasticsearch:
30 |       image: elasticsearch:6.7.0
31 |       container_name: es_amundsen
32 |       ports:
33 |           - 9200:9200
34 |       ulimits:
35 |         nofile:
36 |            soft: 65536
37 |            hard: 65536
38 |       logging:
39 |         driver: awslogs
40 |         options: 
41 |           awslogs-group: amundsen-elasticsearch
42 |           awslogs-region: us-west-2
43 |           awslogs-stream-prefix: amundsen-elasticsearch
44 | 
45 |   amundsensearch:
46 |       image: amundsendev/amundsen-search:1.1.1
47 |       container_name: amundsensearch
48 |       ports:
49 |         - 5001:5000
50 |       depends_on:
51 |         - elasticsearch
52 |       environment:
53 |         - PROXY_ENDPOINT=es_amundsen
54 |       logging:
55 |         driver: awslogs
56 |         options: 
57 |           awslogs-group: amundsensearch
58 |           awslogs-region: us-west-2
59 |           awslogs-stream-prefix: amundsensearch
60 | 
61 |   amundsenmetadata:
62 |       image: amundsendev/amundsen-metadata:1.0.7
63 |       container_name: amundsenmetadata
64 |       depends_on:
65 |         - neo4j
66 |       ports:
67 |         - 5002:5000
68 |       environment:
69 |          - PROXY_HOST=bolt://neo4j_amundsen
70 |       logging:
71 |         driver: awslogs
72 |         options: 
73 |           awslogs-group: amundsenmetadata
74 |           awslogs-region: us-west-2
75 |           awslogs-stream-prefix: amundsenmetadata
76 | 
77 |   amundsenfrontend:
78 |       image: amundsendev/amundsen-frontend:1.0.5
79 |       container_name: amundsenfrontend
80 |       depends_on:
81 |         - amundsenmetadata
82 |         - amundsensearch
83 |       ports:
84 |         - 5000:5000
85 |       environment:
86 |         - SEARCHSERVICE_BASE=http://amundsensearch:5000
87 |         - METADATASERVICE_BASE=http://amundsenmetadata:5000
88 |       logging:
89 |         driver: awslogs
90 |         options: 
91 |           awslogs-group: amundsenfrontend
92 |           awslogs-region: us-west-2
93 |           awslogs-stream-prefix: amundsenfrontend
94 | 
95 | 
96 | 


--------------------------------------------------------------------------------
/amundsen-kube-helm/templates/helm/templates/cronjob-neo4j-s3-backup.yaml:
--------------------------------------------------------------------------------
 1 | {{ if and .Values.neo4j.enabled (and .Values.neo4j.backup.enabled .Values.neo4j.backup.s3Path .Values.neo4j.persistence) }}
 2 | apiVersion: batch/v1beta1
 3 | kind: CronJob
 4 | metadata:
 5 |   name: neo4j-s3-backup
 6 |   labels:
 7 |     app: {{ template "amundsen.name" . }}
 8 |     component: neo4j-s3-backup
 9 |     chart: {{ template "amundsen.chart" . }}
10 |     release: {{ .Release.Name }}
11 |     heritage: {{ .Release.Service }}
12 | spec:
13 |   schedule: {{ .Values.neo4j.backup.schedule | quote }}
14 |   concurrencyPolicy: Forbid
15 |   jobTemplate:
16 |     spec:
17 |       template:
18 |         metadata:
19 |           labels:
20 |             app: {{ template "amundsen.name" . }}
21 |             component: neo4j-s3-backup
22 |             release: {{ .Release.Name }}
23 |       {{- with .Values.neo4j.backup.podAnnotations }}
24 |           annotations:
25 | {{ toYaml . | indent 12 }}
26 |       {{- end }}
27 |         spec:
28 |           restartPolicy: Never
29 |           containers:
30 |           - name: backup-neo4j
31 |             image: neo4j:{{ .Values.neo4j.version }}
32 |             command:
33 |             - "/bin/sh"
34 |             - "-c"
35 |             - |
36 |               apk -v --update add --no-cache --quiet curl python py-pip &&
37 |               pip install awscli -q &&
38 |               NOW="$(date "+%Y-%m-%d-%H:%M:%S")" &&
39 |               BACKUP_SCHEMA_NAME="graph.db-backup-$NOW.schema" &&
40 |               BACKUP_DATA_NAME="graph.db-backup-$NOW.data" &&
41 |               BACKUP_NAME="graph.db-backup-$NOW" &&
42 |               echo "CALL apoc.export.cypher.schema('/var/lib/neo4j/data/$BACKUP_SCHEMA_NAME', {});" | /var/lib/neo4j/bin/neo4j-shell -host neo4j &&
43 |               echo "CALL apoc.export.graphml.all('/var/lib/neo4j/data/$BACKUP_DATA_NAME', {useTypes: true, readLabels: true});" | /var/lib/neo4j/bin/neo4j-shell -host neo4j &&
44 |               printf "\nTarring -> /data/$BACKUP_SCHEMA_NAME and /data/$BACKUP_DATA_NAME to /data/$BACKUP_NAME.tar" &&
45 |               while [ ! -f /data/$BACKUP_DATA_NAME ]; do echo "backup data file does not exist: [/data/$BACKUP_DATA_NAME] sleeping..." && ls "/data/" && sleep 30; done &&
46 |               tar -cvf "/data/$BACKUP_NAME.tar" "/data/$BACKUP_SCHEMA_NAME" "/data/$BACKUP_DATA_NAME" &&
47 |               printf "\nZipping -> /data/$BACKUP_NAME.tar.gz\n" &&
48 |               gzip -9 "/data/$BACKUP_NAME.tar" &&
49 |               printf "Pushing /data/$BACKUP_NAME.tar.gz -> $BUCKET" &&
50 |               aws s3 cp "/data/$BACKUP_NAME.tar.gz" "$BUCKET" &&
51 |               printf "Cleaning up /data/graph.db-backup*" &&
52 |               rm /data/graph.db-backup*
53 |             env:
54 |             - name: BUCKET
55 |               value: {{ .Values.neo4j.backup.s3Path }}
56 |             volumeMounts:
57 |                 - name: data
58 |                   mountPath: /data
59 |           volumes:
60 |           - name: data
61 |             persistentVolumeClaim:
62 |               claimName: neo4j-pvc
63 | {{- end}}
64 | 


--------------------------------------------------------------------------------
/amundsen-kube-helm/templates/helm/templates/deployment-metadata.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | kind: Deployment
 3 | metadata:
 4 |   name: {{ template "amundsen.fullname" . }}-{{ .Values.metadata.serviceName }}
 5 |   labels:
 6 |     app: {{ template "amundsen.name" . }}
 7 |     component: {{ .Values.metadata.serviceName }}
 8 |     chart: {{ template "amundsen.chart" . }}
 9 |     release: {{ .Release.Name }}
10 |     heritage: {{ .Release.Service }}
11 | spec:
12 |   selector:
13 |     matchLabels:
14 |       app: {{ template "amundsen.name" . }}
15 |       component: {{ .Values.metadata.serviceName }}
16 |       release: {{ .Release.Name }}
17 |   template:
18 |     metadata:
19 |       {{- with default .Values.podAnnotations .Values.metadata.podAnnotations }}
20 |       annotations:
21 | {{ toYaml . | indent 8 }}
22 |       {{- end }}
23 |       labels:
24 |         app: {{ template "amundsen.name" . }}
25 |         component: {{ .Values.metadata.serviceName }}
26 |         release: {{ .Release.Name }}
27 |     spec:
28 |       {{- with default .Values.nodeSelector .Values.metadata.nodeSelector }}
29 |       nodeSelector:
30 | {{ toYaml . | indent 8 }}
31 |       {{- end }}
32 |       {{- with default .Values.affinity .Values.metadata.affinity }}
33 |       affinity:
34 | {{ toYaml . | indent 8 }}
35 |       {{- end }}
36 |       {{- with default .Values.tolerations .Values.metadata.tolerations }}
37 |       tolerations:
38 | {{ toYaml . | indent 8 }}
39 |       {{- end }}
40 |       volumes:
41 |       {{- if .Values.frontEnd.oidcEnabled }}
42 |         - name: oidc-config
43 |           secret:
44 |             secretName: oidc-config
45 |       {{- end }}
46 |       containers:
47 |       - name: {{ .Chart.Name }}-{{ .Values.metadata.serviceName }}
48 |         image: {{ .Values.metadata.image }}:{{ .Values.metadata.imageTag }}
49 |         imagePullPolicy: Always
50 |         ports:
51 |         - containerPort: 5002
52 |         env:
53 |         - name: PROXY_HOST
54 |           value: {{ if .Values.metadata.neo4jEndpoint }}{{ .Values.metadata.neo4jEndpoint }}{{ else }}bolt://neo4j.{{ .Release.Namespace }}.svc.cluster.local{{ end }}
55 |         {{- if .Values.frontEnd.oidcEnabled }}
56 |         - name: FLASK_OIDC_CLIENT_SECRETS
57 |           value: /etc/client_secrets.json
58 |         - name: FLASK_OIDC_SECRET_KEY
59 |           valueFrom:
60 |             secretKeyRef:
61 |               name: oidc-config
62 |               key: OIDC_CLIENT_SECRET
63 |         {{- end }}
64 |         livenessProbe:
65 |           httpGet:
66 |             path: "/healthcheck"
67 |             port: 5002
68 |           initialDelaySeconds: 60
69 |           periodSeconds: 60
70 |           timeoutSeconds: 1
71 |           successThreshold: 1
72 |           failureThreshold: 5
73 |         volumeMounts:
74 |         {{- if .Values.frontEnd.oidcEnabled }}
75 |           - name: oidc-config
76 |             mountPath: /etc/client_secrets.json
77 |             subPath: client_secrets.json
78 |         {{- end }}
79 |         {{- with .Values.metadata.resources }}
80 |         resources:
81 | {{ toYaml . | indent 10 }}
82 |         {{- end }}
83 | 


--------------------------------------------------------------------------------
/docs/tutorials/badges.md:
--------------------------------------------------------------------------------
 1 | # How to add table level and column level badges
 2 | Amundsen supports use of clickable badges on tables, and non clickable badges for columns. Clickable badges trigger a search for all of the resources with the given badge name as a filter.
 3 | 
 4 | ![](../img/tutorials/table-badge.png)
 5 | *Table badge*
 6 | 
 7 | ![](../img/tutorials/column-badge.png)
 8 | *Column badge*
 9 | ## Badges configuration
10 | 
11 | In order for amundsen to accept new badges via metadata and to change the style in the UI there are two configs that need to be setup:
12 | 
13 | On [amundsen metadata library](https://github.com/amundsen-io/amundsenmetadatalibrary/blob/3c9a55e6af4cac9b342803c34cfe81851470e7f5/metadata_service/config.py) you should add your badges to the whitelist within your custom configuration file following the format of this example:
14 | 
15 | ```
16 | # whitelist badges
17 | WHITELIST_BADGES: List[Badge] = [
18 |     Badge(badge_name='alpha',
19 |           category='table_status'),
20 |     Badge(badge_name='beta',
21 |           category='table_status'),
22 | ]
23 | ```
24 | 
25 | In order to set up the color and display name on [amundsen frontend library](https://github.com/amundsen-io/amundsenfrontendlibrary/blob/master/amundsen_application/static/js/config/config-custom.ts) you should add the desired badges style as follows:
26 | 
27 | ```
28 | const configCustom: AppConfigCustom = {
29 |     badges: {
30 |         'alpha': {
31 |             style: BadgeStyle.DEFAULT,
32 |             displayName: 'Alpha',
33 |         },
34 |         'partition column': {
35 |             style: BadgeStyle.DEFAULT,
36 |             displayName: 'Partition Column',
37 |         },
38 |     }
39 | }
40 | ```
41 | 
42 | **Note:** any badges that are not defined in this configuration will show up with `BadgeStyle.DEFAULT`.
43 | 
44 | 
45 | ## Adding table badges through metadata library
46 | To manually add a badge to a particular table the metadata API can be used. Here are the available requests:
47 | 
48 | To add a badge on a table:
49 | ```
50 | curl -X PUT https://{your metadata url}/table/{table key}/badge/{badge name}?category={badge category}
51 | ```
52 | 
53 | To delete a badge on a table:
54 | ```
55 | curl -X DELETE https://{your metadata url}/table/{table key}/badge/{badge name}?category={badge category}
56 | ```
57 | 
58 | ## Adding badges throught databuilder (and column level badges)
59 | 
60 | To add badges using databuilder, you can use the [BadgeMetadata](https://github.com/amundsen-io/amundsendatabuilder/blob/master/databuilder/models/badge.py) class and pass in the entity you want to create a badge relationship for. For an example of how this is done search for badge in [TableMetadata](https://github.com/amundsen-io/amundsendatabuilder/blob/master/databuilder/models/table_metadata.py) to see how we add badge nodes and relationships to neo4j.
61 | In [hive_table_metadata_extractor.py](https://github.com/amundsen-io/amundsendatabuilder/blob/8655338725bf279ea0332e5e6ab0592c8c7459ae/databuilder/extractor/hive_table_metadata_extractor.py#L106) you can see how the partition column is obtained and added to a column so the badge node can be created and related to the correct column.


--------------------------------------------------------------------------------
/docs/issue_labeling.md:
--------------------------------------------------------------------------------
 1 | # Issue and Feature Labeling
 2 | > On Amundsen, we aim to be methodical on using issue labels, offering our community a way to understand what are the issues about and their status within or development process.
 3 | 
 4 | We use a bunch of GitHub labels. They are a mix of custom labels and the default Github labels for open-source projects. We base these labels on four main types: **status labels**, **issue type labels**, **project labels**, and the **“other” category**. Read on to learn more about them.
 5 | 
 6 | ## Status Labels
 7 | * They show at a glance the status and progress of each issue
 8 | * Prefixed with "Status:", followed by the label
 9 | * Only *one status label* will be applied to any particular issue
10 | 
11 | ### Labels
12 | - **Status: Needs Reproducing** – For bugs that need to be reproduced in order to get fixed
13 | - **Status: Review Needed** – Issue that needs review to be considered
14 | - **Status: Accepted** – Feature that we want to implement going forward
15 | - **Status: In Progress** – Issue that is being worked on right now.
16 | - **Status: Completed** – Issue is completed and on master
17 | - **Status: Abandoned** – Issue we won’t go ahead and implement, or that needs a “champion” to take it through
18 | - **Status: Blocked** – Issue blocked by any reason (dependencies, previous work, etc.)
19 | - **Status: On Hold** – Issue that is being considered but stopped due to lack of resources or changes in the roadmap
20 | 
21 | Here is a diagram representing these states within the lifecycles:
22 | ![Feature and Bug Lifecycle](https://raw.githubusercontent.com/amundsen-io/amundsen/master/docs/img//issue_process_diagram.png)
23 | 
24 | ## Type Labels
25 | * They show the type of the issue
26 | * Prefixed with "Type:", followed by the label
27 | 
28 | ### Labels
29 | - **Type: Bug** – An unexpected problem or unintended behavior
30 | - **Type: Feature** – A new feature request
31 | - **Type: Maintenance** – A regular maintenance chore or task, including refactors, build system, CI, performance improvements
32 | - **Type: Documentation** – A documentation improvement task
33 | - **Type: Question** – An issue or PR that needs more information or a user question
34 | 
35 | ## Project Labels
36 | * They indicate which project the issue refers to
37 | * Prefixed with "Project:", followed by the name of the project
38 | 
39 | ### Labels
40 | - **Project: Common** – From amundsencommon
41 | - **Project: Databuilder** – From amundsendatabuilder
42 | - **Project: Frontend** – From amundsenfrontendlibrary
43 | - **Project: Metadata** – From amundsenmetadatalibrary
44 | - **Project: Search** – From amundsensearchlibrary
45 | - **Project: k8s** – Related to the Kubernetes helm chart
46 | - **Project: All** – Related to all the projects above
47 | 
48 | ## Other Labels
49 | * Some of these are part of the standard GitHub labels and intended for OSS contributors
50 | * Some are related to the tools we use to maintain the library
51 | * They are not prefixed
52 | 
53 | ### Labels
54 | - **help wanted** – Indicates we are looking for contributors on this issue
55 | - **good first issue** – Indicates the issue is a great one to tackle by newcomers to the project or OSS in general
56 | - **keep fresh** – Avoids getting the issue archived by our stale bot
57 | 


--------------------------------------------------------------------------------
/docs/tutorials/data-preview-with-superset.md:
--------------------------------------------------------------------------------
 1 | # How to setup a preview client with Apache Superset
 2 | 
 3 | In the previous [tutorial](./index-postgres.md), we talked about how to index the table metadata
 4 | for a postgres database. In this tutorial, we will walk through how to configure data preview for this `films` table
 5 | using Apache Superset.
 6 | 
 7 | Amundsen provides an integration between Amundsen and BI Viz tool for data preview. It is not necessary to use Apache Superset
 8 | as long as the BI Viz tool provides endpoint to do querying and get the results back from the BI tool.
 9 | [Apache Superset](https://superset.apache.org/) is an open-source business intelligence tool
10 | that can be used for data exploration and it is what we leverage internally at Lyft to support the feature.
11 | 
12 | 1. Please setup Apache Superset following its official installation
13 | [guide](https://superset.apache.org/installation.html#superset-installation-and-initialization):
14 |    ```bash
15 |     # Install superset
16 |     pip install apache-superset
17 | 
18 |     # Initialize the database
19 |     superset db upgrade
20 | 
21 |     # Create an admin user (you will be prompted to set a username, first and last name before setting a password)
22 |     $ export FLASK_APP=superset
23 |     superset fab create-admin
24 | 
25 |     # Load some data to play with
26 |     superset load_examples
27 | 
28 |     # Create default roles and permissions
29 |     superset init
30 | 
31 |     # To start a development web server on port 8088, use -p to bind to another port
32 |     superset run -p 8088 --with-threads --reload --debugger
33 |    ```
34 | 
35 |    Once setup properly, you could view the superset UI as following:
36 |    ![](../img/tutorials/superset-welcome.png)
37 | 
38 | 2. We need to add the postgres database to superset as the following:
39 | ![](../img/tutorials/superset-add-db.png)
40 | 
41 | 3. We could verify the content of the `films` table using superset's sqlab feature:
42 | ![](../img/tutorials/superset-sqllab-verify.png)
43 | 
44 | 4. Next, We need to build a preview client following this [guide](https://github.com/amundsen-io/amundsenfrontendlibrary/blob/master/docs/examples/superset_preview_client.md)
45 | and the [example client code](https://github.com/amundsen-io/amundsenfrontendlibrary/blob/master/amundsen_application/base/examples/example_superset_preview_client.py).
46 | There are a couple of things to keep in mind:
47 |     - We could start with an unauthenticated Superset([example superset config](https://gist.github.com/feng-tao/b89e6faf7236372cef70a44f13615c39)),
48 |     but in production, we will need to send the impersonate info to Superset
49 |     to properly verify whether the given user could view the data.
50 |     - When we build the client, we could need to configure the database id instead of the database name when send the request to superset.
51 | 
52 | 5. Once we configure the preview client, put it in the frontend service entry point ([example](https://github.com/lyft/amundsenfrontendlibrary/blob/master/docs/configuration.md#python-entry-points)) and restart the frontend.
53 | 
54 | 6. We could now view the preview data for the `films` table in Amundsen.
55 | ![](../img/tutorials/amundsen-preview1.png)
56 | From the above figure, the preview button on the table page is clickable.
57 | Once it clicked, you could see the actual data queried
58 | from Apache Superset:
59 | ![](../img/tutorials/amundsen-preview2.png)
60 | 


--------------------------------------------------------------------------------
/amundsen-kube-helm/templates/helm/templates/deployment-neo4j.yaml:
--------------------------------------------------------------------------------
 1 | {{ if .Values.neo4j.enabled }}
 2 | apiVersion: apps/v1
 3 | kind: Deployment
 4 | metadata:
 5 |   name: neo4j
 6 |   labels:
 7 |     app: {{ template "amundsen.name" . }}
 8 |     component: neo4j
 9 |     chart: {{ template "amundsen.chart" . }}
10 |     release: {{ .Release.Name }}
11 |     heritage: {{ .Release.Service }}
12 | spec:
13 |   selector:
14 |     matchLabels:
15 |       app: {{ template "amundsen.name" . }}
16 |       component: neo4j
17 |       release: {{ .Release.Name }}
18 |   replicas: 1
19 |   template:
20 |     metadata:
21 |       {{- with default .Values.podAnnotations .Values.neo4j.podAnnotations }}
22 |       annotations:
23 | {{ toYaml . | indent 8 }}
24 |       {{- end }}
25 |       labels:
26 |         app: {{ template "amundsen.name" . }}
27 |         component: neo4j
28 |         release: {{ .Release.Name }}
29 |     spec:
30 |       {{- with .Values.neo4j.nodeSelector }}
31 |       nodeSelector:
32 | {{ toYaml . | indent 8 }}
33 |       {{- end }}
34 |       {{- with .Values.neo4j.affinity }}
35 |       affinity:
36 | {{ toYaml . | indent 8 }}
37 |       {{- end }}
38 |       {{- with .Values.neo4j.tolerations }}
39 |       tolerations:
40 | {{ toYaml . | indent 8 }}
41 |       {{- end }}
42 |       initContainers:
43 |         - name: init-neo4j-plugins
44 |           image: "appropriate/curl:latest"
45 |           imagePullPolicy: "IfNotPresent"
46 |           command:
47 |             - "/bin/sh"
48 |             - "-c"
49 |             - |
50 |               curl -L https://github.com/neo4j-contrib/neo4j-apoc-procedures/releases/download/3.3.0.4/apoc-3.3.0.4-all.jar -O
51 |               curl -L https://github.com/neo4j-contrib/neo4j-graph-algorithms/releases/download/3.3.5.0/graph-algorithms-algo-3.3.5.0.jar -O
52 |               curl -L https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-core/1.11.250/aws-java-sdk-core-1.11.250.jar -O
53 |               curl -L https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-s3/1.11.250/aws-java-sdk-s3-1.11.250.jar -O
54 |               curl -L https://repo1.maven.org/maven2/org/apache/httpcomponents/httpclient/4.5.4/httpclient-4.5.4.jar -O
55 |               curl -L https://repo1.maven.org/maven2/org/apache/httpcomponents/httpcore/4.4.8/httpcore-4.4.8.jar -O
56 |               curl -L https://repo1.maven.org/maven2/joda-time/joda-time/2.9.9/joda-time-2.9.9.jar -O
57 |               chmod 755 *.jar
58 |               mv *.jar /var/lib/neo4j/plugins
59 |           volumeMounts:
60 |             - name: plugins
61 |               mountPath: /var/lib/neo4j/plugins
62 |       containers:
63 |       - name: neo4j
64 |         image: neo4j:{{ .Values.neo4j.version }}
65 |         ports:
66 |         - containerPort: 7474
67 |         - containerPort: 7687
68 |         - containerPort: 1337
69 |         env:
70 |           - name: NEO4J_CONF
71 |             value: "/conf"
72 |         volumeMounts:
73 |         - name: conf
74 |           mountPath: /conf
75 |         {{- if .Values.neo4j.persistence }}
76 |         - name: data
77 |           mountPath: /var/lib/neo4j/data
78 |         {{- end}}
79 |         - name: plugins
80 |           mountPath: /var/lib/neo4j/plugins
81 |         {{- with .Values.neo4j.resources }}
82 |         resources:
83 | {{ toYaml . | indent 10 }}
84 |         {{- end}}
85 |       volumes:
86 |         - name: conf
87 |           configMap:
88 |             name: neo4j-configmap
89 |         {{- if .Values.neo4j.persistence }}
90 |         - name: data
91 |           persistentVolumeClaim:
92 |             claimName: neo4j-pvc
93 |         {{- end}}
94 |         - name: plugins
95 |           emptyDir: {}
96 | {{ end }}
97 | 


--------------------------------------------------------------------------------
/MAINTAINING.md:
--------------------------------------------------------------------------------
 1 | # Maintaining Amundsen
 2 | 
 3 | As maintainers of the project, this is our guide. Most of the steps and guidelines
 4 | in the [Contributing](CONTRIBUTING.md) document apply here, including how to set
 5 | up your environment, write code to fit the code style, run tests, craft commits
 6 | and manage branches.
 7 | 
 8 | Beyond this, this document provides some details that would
 9 | be too low-level for contributors.
10 | 
11 | ## Table of Contents
12 | 
13 | - [Communication](#communication)
14 | - [Managing the community](#managing-the-community)
15 | - [Workflow](#workflow)
16 | - [Architecture](#architecture)
17 | - [Updating the changelog](#updating-the-changelog)
18 | - [Documentation](#documentation)
19 | - [Labels](#labels)
20 | - [Adding new projects](#adding-new-projects)
21 | - [Related Documents](#related-documents)
22 | 
23 | ## Communication
24 | 
25 | We have several ways that we can communicate with each other:
26 | 
27 | - To show our direction and next steps, the [**roadmap**][roadmap] is the best place.
28 | - To track progress on the movement of issues, [**labels**](#labels)
29 |   are useful.
30 | - To learn about what the community has been working lately, our [community meeting] is a great event. It happens the first Thursday of every month at 9AM PST, and you can watch past meeting recordings [here][cmeetingrecordings]
31 | - To chat with the maintainers team, get support or connect with Amundsen's community, join our Slack
32 | 
33 | [roadmap]: https://www.amundsen.io/amundsen/roadmap/
34 | [cmeeting]: meet.google.com/mqz-ndck-jmj
35 | [cmeetingrecordings]: https://www.youtube.com/channel/UCgOyzG0sEoolxuC9YXDYPeg
36 | [slack]: amundsenworkspace.slack.com
37 | 
38 | ## Managing the community
39 | 
40 | We try to create and foster a community around Amundsen. We do this by:
41 | 
42 | - Answering questions from members of the community
43 | - Triaging Github issues, adding the proper [labels][labels] to new tickets
44 | - Closing stale issues and feature requests
45 | - Keeping the community informed by ensuring that we add communications regularly with the new features
46 | - Ensuring that the documentation, as well as the documentation site, is kept up to
47 |   date
48 | - Doing code reviews for other maintainers and the community
49 | - Reviewing [RFCs][rfcs] and shaping the future of the project
50 | 
51 | [labels]: https://github.com/amundsen-io/amundsen/labels
52 | [rfcs]: https://github.com/amundsen-io/rfcs
53 | 
54 | ## Workflow
55 | 
56 | We generally follow [GitHub Flow]. The `master` branch is the main line, and all
57 | branches are cut from and get merged back into this branch. Generally, the
58 | workflow is as follows:
59 | 
60 | [github flow]: https://help.github.com/articles/github-flow/
61 | 
62 | - Cut a feature or bugfix branch from this branch
63 | - Upon completing a branch, create a PR and ask another maintainer to approve
64 |   it
65 | - Try to keep the commit history as clean as possible. Before merging, squash
66 |   "WIP" or related commits together and rebase as needed
67 | - Once your PR is approved, and you've cleaned up your branch, you're free to
68 |   merge it in
69 | 
70 | ## Architecture
71 | 
72 | We have covered Amundsen's architecture in our [docs](https://lyft.github.io/amundsen/architecture/).
73 | 
74 | ## Documentation
75 | 
76 | We use [mkdocs] for creating our documentation from Markdown files. This system is configured from the 'mkdocs.yml' file in the root of this repository.
77 | 
78 | Currently, our docs are built and deployed automatically with a GitHub action, so we shouldn't need to do anything.
79 | 
80 | [mkdocs]: https://www.mkdocs.org/
81 | 
82 | ## Labels
83 | 
84 | We've found labels to be useful for cataloging and marking progress on features and bugs. You can read about our labels on the [issue_labeling](https://lyft.github.io/amundsen/issue_labeling/) document.
85 | 
86 | ## Adding new projects
87 | 
88 | To add new projects to the amundsen-io organization, we will first discuss it through a GitHub issue. Once we discuss it thoroughly (~3-5 business days, depending on the volume of conversation), the maintainers will decide whether the new project should be added.
89 | 
90 | ## Related Documents
91 | 
92 | - [Contributing Guide](https://www.amundsen.io/amundsen/CONTRIBUTING/)
93 | - [Governance Document](https://github.com/amundsen-io/amundsen/blob/master/GOVERNANCE.md)
94 | 


--------------------------------------------------------------------------------
/mkdocs.yml:
--------------------------------------------------------------------------------
  1 | site_name: Amundsen
  2 | repo_name: Amundsen
  3 | repo_url: https://github.com/amundsen-io/amundsen
  4 | site_description: "Amundsen is a metadata driven application for improving the productivity of data analysts, data scientists and engineers when interacting with data."
  5 | site_author: Amundsen Project Authors.
  6 | site_url: https://www.amundsen.io/amundsen/
  7 | remote_branch: gh-pages
  8 | 
  9 | copyright: 'Copyright &copy; 2018-2020 Amundsen Project Authors.'
 10 | 
 11 | theme:
 12 |   name: 'material'
 13 |   logo: img/logos/amundsen_mark_orange.svg
 14 |   favicon: 'img/logos/amundsen_mark_orange.svg'
 15 |   palette:
 16 |     primary: '#2B1B81'
 17 |     accent: '#2B1B81'
 18 |   feature:
 19 |     tabs: true
 20 | 
 21 | -extra_css:
 22 |   - 'css/app.css'
 23 | 
 24 | 
 25 | markdown_extensions:
 26 |   - admonition
 27 |   - smarty
 28 |   - codehilite:
 29 |       guess_lang: false
 30 |       linenums: True
 31 |   - footnotes
 32 |   - meta
 33 |   - toc:
 34 |       permalink: true
 35 |   - pymdownx.betterem:
 36 |       smart_enable: all
 37 |   - pymdownx.caret
 38 |   - pymdownx.details
 39 |   - pymdownx.inlinehilite
 40 |   - pymdownx.magiclink
 41 |   - pymdownx.smartsymbols
 42 |   - pymdownx.superfences
 43 |   - tables
 44 | 
 45 | 
 46 | extra:
 47 |   # type is the name of the FontAwesome icon without the fa- prefix.
 48 |   social:
 49 |     - type: globe
 50 |       link: https://www.amundsen.io/
 51 |     - type: github-alt
 52 |       link: https://github.com/amundsen-io
 53 |     - type: twitter
 54 |       link: https://twitter.com/amundsenio
 55 |     - type: linkedin
 56 |       link: https://www.linkedin.com/company/the-linux-foundation/
 57 | 
 58 | nav:
 59 |   - 'Overview': index.md
 60 |   - 'Architecture': architecture.md
 61 |   - 'Developer Guide':
 62 |     - 'Overview': developer_guide.md
 63 |     - 'Issue and Feature Labeling': issue_labeling.md
 64 |     - 'Contributing Guide': CONTRIBUTING.md
 65 |   - 'User Guide':
 66 |     - 'Quick Start': 'installation.md'
 67 |     - 'Tutorials':
 68 |         - 'How to index metadata for real life databases': 'tutorials/index-postgres.md'
 69 |         - 'How to setup a preview client with Apache Superset': 'tutorials/data-preview-with-superset.md'
 70 |         - 'How to setup user profiles': 'tutorials/user-profiles.md'
 71 |         - 'How to ingest Dashboard': 'databuilder/docs/dashboard_ingestion_guide.md'
 72 |         - 'How to track user metric for Amundsen': 'tutorials/how-to-track-user-metric.md'
 73 |         - 'How to add table level and column level badges': 'tutorials/badges.md'
 74 |         - 'How to search Amundsen effectively': 'tutorials/how-to-search-effective.md'
 75 |     - 'Deployment':
 76 |       - 'Authentication': 'authentication/oidc.md'
 77 |       - 'AWS ECS Installation': 'installation-aws-ecs/aws-ecs-deployment.md'
 78 |       - 'K8S Installation': 'k8s_install.md'
 79 |   - 'Components':
 80 |     - 'Frontend':
 81 |         - 'Overview': 'frontend/README.md'
 82 |         - 'Configuration':
 83 |           - 'Application Config': 'frontend/docs/application_config.md'
 84 |           - 'React Configuration': 'frontend/docs/configuration.md'
 85 |           - 'Flask Configuration': 'frontend/docs/flask_config.md'
 86 |           - 'Preview Client Setup': 'frontend/docs/examples/superset_preview_client.md'
 87 |         - 'FE Developer Guide': 'frontend/docs/developer_guide.md'
 88 |         - 'FE Installation Guide': 'frontend/docs/installation.md'
 89 |         - 'Recommended Practices': 'frontend/docs/recommended_practices.md'
 90 |     - 'Search':
 91 |       - 'Overview': 'search/README.md'
 92 |       - 'Proxy':
 93 |         - 'Atlas Backend': 'search/docs/atlas-search.md'
 94 |     - 'Metadata':
 95 |       - 'Overview': 'metadata/README.md'
 96 |       - 'Configuration':
 97 |         - 'Overview': 'metadata/docs/configurations.md'
 98 |       - 'Metadata API Structure': 'metadata/docs/structure.md'
 99 |       - 'Proxy':
100 |         - 'Atlas Backend':
101 |             - 'Overview': 'metadata/docs/proxy/atlas_proxy.md'
102 |             - 'Popular Table': 'metadata/docs/proxy/atlas/popular_tables.md'
103 |         - 'Gremlin Backend': 'metadata/docs/proxy/gremlin.md'
104 |         - 'Neptune Backend': 'metadata/docs/proxy/neptune.md'
105 |     - 'Databuilder':
106 |       - 'Overview': 'databuilder/README.md'
107 |       - 'Models': 'databuilder/docs/models.md'
108 |       - 'Dashboard Ingestion guide': 'databuilder/docs/dashboard_ingestion_guide.md'
109 |     - 'Common':
110 |       - 'Overview': 'common/README.md'
111 |   - 'FAQ': faq.md
112 |   - 'Roadmap': roadmap.md
113 | 


--------------------------------------------------------------------------------
/amundsen-kube-helm/templates/helm/templates/deployment-frontend.yaml:
--------------------------------------------------------------------------------
  1 | ---
  2 | apiVersion: apps/v1
  3 | kind: Deployment
  4 | metadata:
  5 |   name: {{ template "amundsen.fullname" . }}-{{ .Values.frontEnd.serviceName }}
  6 |   labels:
  7 |     app: {{ template "amundsen.name" . }}
  8 |     component: {{ .Values.frontEnd.serviceName }}
  9 |     chart: {{ template "amundsen.chart" . }}
 10 |     release: {{ .Release.Name }}
 11 |     heritage: {{ .Release.Service }}
 12 | spec:
 13 |   selector:
 14 |     matchLabels:
 15 |       app: {{ template "amundsen.name" . }}
 16 |       component: {{ .Values.frontEnd.serviceName }}
 17 |       release: {{ .Release.Name }}
 18 |   replicas: {{ default 1 .Values.frontEnd.replicas }}
 19 |   template:
 20 |     metadata:
 21 |       {{- with default .Values.podAnnotations .Values.frontEnd.podAnnotations }}
 22 |       annotations:
 23 | {{ toYaml . | indent 8 }}
 24 |       {{- end }}
 25 |       labels:
 26 |         app: {{ template "amundsen.name" . }}
 27 |         component: {{ .Values.frontEnd.serviceName }}
 28 |         release: {{ .Release.Name }}
 29 |     spec:
 30 |       {{- with default .Values.nodeSelector .Values.frontEnd.nodeSelector }}
 31 |       nodeSelector:
 32 | {{ toYaml . | indent 8 }}
 33 |       {{- end }}
 34 |       {{- with default .Values.affinity .Values.frontEnd.affinity }}
 35 |       affinity:
 36 | {{ toYaml . | indent 8 }}
 37 |       {{- end }}
 38 |       {{- with default .Values.tolerations .Values.frontEnd.tolerations }}
 39 |       tolerations:
 40 | {{ toYaml . | indent 8 }}
 41 |       {{- end }}
 42 |       volumes:
 43 |       {{- if .Values.frontEnd.oidcEnabled }}
 44 |         - name: oidc-config
 45 |           secret:
 46 |             secretName: oidc-config
 47 |       {{- end }}
 48 |       containers:
 49 |       - name: {{ .Chart.Name }}-{{ .Values.frontEnd.serviceName }}
 50 |         image: {{ .Values.frontEnd.image }}:{{ .Values.frontEnd.imageTag }}
 51 |         imagePullPolicy: Always
 52 |         ports:
 53 |           - containerPort: 5000
 54 |         env:
 55 |           - name: FRONTEND_BASE
 56 |             value: {{ .Values.frontEnd.baseUrl }}
 57 |           - name: SEARCHSERVICE_BASE
 58 |             value: http://{{ .Chart.Name }}-{{ .Values.search.serviceName }}:5001
 59 |           - name: METADATASERVICE_BASE
 60 |             value: http://{{ .Chart.Name }}-{{ .Values.metadata.serviceName }}:5002
 61 |           - name: LONG_RANDOM_STRING
 62 |             value: {{ quote .Values.LONG_RANDOM_STRING }}
 63 |         {{- if .Values.frontEnd.oidcEnabled }}
 64 |           - name: FRONTEND_SVC_CONFIG_MODULE_CLASS
 65 |             value: amundsen_application.oidc_config.OidcConfig
 66 |           - name: FLASK_OIDC_WHITELISTED_ENDPOINTS
 67 |             value: status,healthcheck,health,logout
 68 |           - name: SQLALCHEMY_DATABASE_URI
 69 |             value: sqlite:///sessions.db
 70 |           - name: APP_WRAPPER
 71 |             value: flaskoidc
 72 |           - name: APP_WRAPPER_CLASS
 73 |             value: FlaskOIDC
 74 |           - name: OIDC_CLIENT_SECRETS
 75 |             value: /etc/client_secrets.json
 76 |           {{- if .Values.frontEnd.OVERWRITE_REDIRECT_URI }}
 77 |           - name: OVERWRITE_REDIRECT_URI
 78 |             value: {{ .Values.frontEnd.OVERWRITE_REDIRECT_URI }}
 79 |           {{- end }}
 80 |           - name: OIDC_SECRET_KEY
 81 |             valueFrom:
 82 |               secretKeyRef:
 83 |                 name: oidc-config
 84 |                 key: OIDC_CLIENT_SECRET
 85 |         {{- end }}
 86 |         command: ["gunicorn"]
 87 |         args: ['-w', '4', '--bind', ':5000', 'amundsen_application.wsgi']
 88 |         readinessProbe:
 89 |           httpGet:
 90 |             path: "/healthcheck"
 91 |             port: 5000
 92 |           initialDelaySeconds: 10
 93 |           periodSeconds: 60
 94 |           timeoutSeconds: 1
 95 |           successThreshold: 1
 96 |           failureThreshold: 5
 97 |         livenessProbe:
 98 |           httpGet:
 99 |             path: "/healthcheck"
100 |             port: 5000
101 |           initialDelaySeconds: 10
102 |           periodSeconds: 60
103 |           timeoutSeconds: 1
104 |           successThreshold: 1
105 |           failureThreshold: 5
106 |         volumeMounts:
107 |         {{- if .Values.frontEnd.oidcEnabled }}
108 |           - name: oidc-config
109 |             mountPath: /etc/client_secrets.json
110 |             subPath: client_secrets.json
111 |         {{- end }}
112 |         {{- with .Values.metadata.resources }}
113 |         resources:
114 | {{ toYaml . | indent 10 }}
115 |         {{- end }}
116 | 


--------------------------------------------------------------------------------
/docs/faq.md:
--------------------------------------------------------------------------------
 1 | # FAQ
 2 | 
 3 | ## How to select between Neo4j and Atlas as backend for Amundsen?
 4 | 
 5 | ### Why Neo4j?
 6 | 1. Amundsen has direct influence over the data model if you use neo4j. This, at least initially, will benefit the speed by which new features in amundsen can arrive
 7 | 2. Atlas is developed with data governance in mind and not with data discovery. You could view "slapping amundsen on top of Atlas" as a kind of Frankenstein: never able to properly able to cater to your audience
 8 | 3. Atlas seems to have a slow development cycle and it's community is not very responsive although some small improvements have been made
 9 | 4. Atlas has the "Hadoop" era "smell" which isn't considered very sexy nowadays
10 | 5. Neo4j for it is the market leader in Graph database and also was proven by Airbnb’s Data portal on their Data discovery tool.
11 | 
12 | ### Why Atlas?
13 | 1. Atlas has lineage support already available. It's been tried and tested.
14 | 2. Tag propagation is supported
15 | 3. It has a robust authentication and authorization system
16 | 4. Atlas does data governance adding amundsen for discovery makes it best of both worlds
17 | 5. It has support for push based due to its many plugins
18 | 6. The free version of Neo4j does not have authorization support(Enterprise version does). Your question should actually be why use "neo4j over janusgraph" cause that is the right level of comparison. Atlas adds a whole bunch on top of the graph database.
19 | 
20 | ##  What are the prerequisites to use Apache Atlas as backend for Amundsen?
21 | To run Amundsen with Atlas, latest versions of following components should be used:
22 | 1. [Apache Atlas](https://github.com/apache/atlas/) - built from `master` branch. Ref [`103e867cc126ddb84e64bf262791a01a55bee6e5`](https://github.com/apache/atlas/commit/103e867cc126ddb84e64bf262791a01a55bee6e5) (or higher).
23 | 2. [amundsenatlastypes](https://pypi.org/project/amundsenatlastypes/) - library for installing Atlas entity definitions specific to Amundsen integration. Version `1.1.0` (or higher).
24 | 
25 | ## How to migrate from Amundsen 1.x -> 2.x?
26 | 
27 | v2.0 renames a handful of fields in the services to be more consistent. Unfortunately one side effect is that the 2.0 versions of the services will need to be deployed simultaneously, as they are not interoperable with the 1.x versions.
28 | 
29 | Additionally, some indexed field names in the elasticsearch document change as well, so if you're using elasticsearch, you'll need to republish Elasticsearch index via Databuilder job.
30 | 
31 | The data in the metadata store, however, can be preserved when migrating from 1.x to 2.0.
32 | 
33 | v2.0 deployments consists of deployment of all three services along with republishing Elasticsearch document on Table with v2.0 Databuilder.
34 | 
35 | Keep in mind there is likely to be some downtime as v2.0.0, between deploying 3 services and re-seeding the elasticsearch indexes, so it might be ideal to stage a rollout by datacenter/environment if uptime is key
36 | 
37 | ## How to avoid certain metadatas in Amundsen got erased by databuilder ingestion?
38 | 
39 | By default, databuilder always upserts the metadata. If you want to prevent that happens on certain type of metadata, you could add the following
40 | config to your databuilder job's config
41 | 
42 | ```python
43 | 'publisher.neo4j.{}'.format(neo4j_csv_publisher.NEO4J_CREATE_ONLY_NODES): [DESCRIPTION_NODE_LABEL],
44 | ```
45 | 
46 | This config means that databuilder will only update the table / column description if it doesn't exist before which could be the table is newly created.
47 | This is useful when we treat Amundsen graph as the source of truth for certain types of metadata (e.g description).
48 | 
49 | ## How to capture all Google Analytics?
50 | 
51 | Users are likely to have some sort of adblocker installed, making your Google Analytics less accurate.
52 | 
53 | To put a proxy in place to bypass any adblockers and capture all analytics, follow these steps:
54 | 
55 | 1. Follow https://github.com/ZitRos/save-analytics-from-content-blockers#setup to set up your own proxy server.
56 | 2. In the same repository, run `npm run mask www.googletagmanager.com/gtag/js?id=UA-XXXXXXXXX` and save the output.
57 | 3. In your custom frontend, override https://github.com/amundsen-io/amundsenfrontendlibrary/blob/master/amundsen_application/static/templates/fragments/google-analytics-loader.html#L6 to <script async src="https://my-proxy-domain/MASKEDGOOGLETAGAMANAGERURL"></script>
58 | 4. Now, note that network requests to www.googletagmanager.com will be sent from behind your masked proxy endpoint, saving your analytics from content blockers!


--------------------------------------------------------------------------------
/docs/installation.md:
--------------------------------------------------------------------------------
 1 | # Installation
 2 | 
 3 | ## Bootstrap a default version of Amundsen using Docker
 4 | The following instructions are for setting up a version of Amundsen using Docker.
 5 | 
 6 | 1. Make sure you have at least 3GB available to docker. Install `docker` and  `docker-compose`.
 7 | 2. Clone [this repo](https://github.com/amundsen-io/amundsen) and its submodules by running:
 8 |    ```bash
 9 |    $ git clone --recursive git@github.com:amundsen-io/amundsen.git
10 |    ```
11 | 3. Enter the cloned directory and run:
12 |     ```bash
13 |     # For Neo4j Backend
14 |     $ docker-compose -f docker-amundsen.yml up
15 | 
16 |     # For Atlas
17 |     $ docker-compose -f docker-amundsen-atlas.yml up
18 |     ```
19 | 4. Ingest provided sample data into Neo4j by doing the following: _(Please skip if you are using Atlas backend)_
20 | 
21 |    * In a separate terminal window, change directory to the [amundsendatabuilder](https://github.com/amundsen-io/amundsendatabuilder) submodule.
22 |    * `sample_data_loader` python script included in `examples/` directory uses _elasticsearch client_, _pyhocon_ and other libraries. Install the dependencies in a virtual env and run the script by following the commands below:
23 |    ```bash
24 |     $ python3 -m venv venv
25 |     $ source venv/bin/activate  
26 |     $ pip3 install -r requirements.txt
27 |     $ python3 setup.py install
28 |     $ python3 example/scripts/sample_data_loader.py
29 |    ```
30 | 5. View UI at [`http://localhost:5000`](http://localhost:5000) and try to search `test`, it should return some result.
31 | ![](img/search-page.png)
32 | 
33 | 6. We could also do an exact matched search for table entity. For example: search `test_table1` in table field and 
34 | it return the records that matched.
35 | ![](img/search-exact-match.png)
36 | 
37 | **Atlas Note:** Atlas takes some time to boot properly. So you may not be able to see the results immediately 
38 | after `docker-compose up` command. 
39 | Atlas would be ready once you'll have the following output in the docker output `Amundsen Entity Definitions Created...`  
40 | 
41 | ### Verify setup
42 | 
43 | 1. You can verify dummy data has been ingested into Neo4j by by visiting [`http://localhost:7474/browser/`](http://localhost:7474/browser/) and run `MATCH (n:Table) RETURN n LIMIT 25` in the query box. You should see two tables:
44 |    1. `hive.test_schema.test_table1`
45 |    2. `hive.test_schema.test_table2`
46 | ![](img/neo4j-debug.png)
47 | 2. You can verify the data has been loaded into the metadataservice by visiting:
48 |    1. [`http://localhost:5000/table_detail/gold/hive/test_schema/test_table1`](http://localhost:5000/table_detail/gold/hive/test_schema/test_table1)
49 |    2. [`http://localhost:5000/table_detail/gold/dynamo/test_schema/test_table2`](http://localhost:5000/table_detail/gold/dynamo/test_schema/test_table2)
50 | 
51 | ### Troubleshooting
52 | 
53 | 1. If the docker container doesn't have enough heap memory for Elastic Search, `es_amundsen` will fail during `docker-compose`.
54 |    1. docker-compose error: `es_amundsen | [1]: max virtual memory areas vm.max_map_count [65530] is too low, increase to at least [262144]`
55 |    2. Increase the heap memory [detailed instructions here](https://www.elastic.co/guide/en/elasticsearch/reference/7.1/docker.html#docker-cli-run-prod-mode)
56 |       1. Edit `/etc/sysctl.conf`
57 |       2. Make entry `vm.max_map_count=262144`. Save and exit.
58 |       3. Reload settings `$ sysctl -p`
59 |       4. Restart `docker-compose`
60 |       
61 | 2. If `docker-amundsen-local.yml` stops because of `org.elasticsearch.bootstrap.StartupException: java.lang.IllegalStateException: Failed to create node environment`, then `es_amundsen` [cannot write](https://discuss.elastic.co/t/elastic-elasticsearch-docker-not-assigning-permissions-to-data-directory-on-run/65812/4) to `.local/elasticsearch`. 
62 |    1. `chown -R 1000:1000 .local/elasticsearch`
63 |    2. Restart `docker-compose` 
64 | 3. If when running the sample data loader you recieve a connection error related to ElasticSearch or like this for Neo4j:
65 | ```
66 |     Traceback (most recent call last):
67 |       File "/home/ubuntu/amundsen/amundsendatabuilder/venv/lib/python3.6/site-packages/neobolt/direct.py", line 831, in _connect
68 |         s.connect(resolved_address)
69 |     ConnectionRefusedError: [Errno 111] Connection refused
70 | ```
71 | 4. If `elastic search` container stops with an error `max file descriptors [4096] for elasticsearch process is too low, increase to at least [65535]`, then add the below code to the file `docker-amundsen-local.yml` in the `elasticsearch` definition.
72 | ```
73 |  ulimits:
74 |    nofile:
75 |      soft: 65535
76 |      hard: 65535
77 |  ```
78 |    Then check if all 5 Amundsen related containers are running with `docker ps`? Can you connect to the Neo4j UI at http://localhost:7474/browser/ and similarly the raw ES API at http://localhost:9200? Does Docker logs reveal any serious issues?
79 | 


--------------------------------------------------------------------------------
/docs/roadmap.md:
--------------------------------------------------------------------------------
  1 | # Amundsen Roadmap
  2 | The following roadmap gives an overview of what we are currently working on and what we want to tackle next. This helps potential contributors understand the current status of your project and where it's going next, as well as giving a chance to be part of the planning.
  3 | 
  4 | ## Amundsen Mission
  5 | > *To organize all information about data and make it universally actionable*
  6 | 
  7 | ## Vision for 2020
  8 | > *Centralize a comprehensive and actionable map of all our data resources that can be leveraged to solve a growing number of use cases and workflows*
  9 | 
 10 | ## Short Term - Our Current focus
 11 | 
 12 | #### Provide Rich metadata to make data trust worthy
 13 | *What*: Enrich table detail page with additional structure metadata / programmatic description.
 14 | 
 15 | *Status*: tech spec WIP
 16 | 
 17 | #### Native lineage integration
 18 | *What*: We want to create a native lineage integration in Amundsen, to better surface how data assets interact with each other.
 19 | 
 20 | *Status*: tech spec out
 21 | 
 22 | #### Integrate with Data Quality system
 23 | *What*: Integrate with different data quality systems to provide quality score.
 24 | 
 25 | *Status*: planning
 26 | 
 27 | ## Mid Term - Our Next steps
 28 | #### Improve search ranking
 29 | *What*: Update search ranking to be informed by "badges" that may exist on data sets e.g. deprecated, etc.
 30 | 
 31 | *Status*: planning
 32 | 
 33 | #### Notifications when a table evolves
 34 | *What*: Notify users in Amundsen (akin to Facebook notifications or similar) when a table evolves. Owners of data and consumers of data will likely need to be notified of different things.
 35 | 
 36 | *Status*: planning has not started 
 37 | 
 38 | #### Commonly joined tables / browsing the data model
 39 | *What*: As a data user, I would like to see commonly joined tables and how to join them.
 40 | One option would be to show commonly joined tables and showing example join queries. Another option would be to provide a navigational experience for data model, showing foreign keys and which tables they come from.
 41 | 
 42 | *Status*: planning has not started 
 43 | 
 44 | #### Curated navigation experience
 45 | *What*: Currently Amundsen's experience is very focussed on search. However, especially for new users, an experience where they are able to navigate through the data hierarchy is very important. This item proposes to revamp the navigational experience in Amundsen (currently, barebones - based on tags) to do justice to the user need to browse through data sets when they don't know what to even search for.
 46 | 
 47 | *Status*: planning
 48 | 
 49 | #### Push ingest API
 50 | *What*: We want to create a push API so that it is as easy as possible for a new data resource type to be ingested  
 51 | 
 52 | *Status*: implementation has started (around 80% complete)  
 53 | 
 54 | #### GET Rest API
 55 | *What*: enable users to access our data map programmatically through a Rest API  
 56 | 
 57 | *Status*: implementation has started  
 58 | 
 59 | #### Granular Access Control
 60 | *What*: we want to have a more granular control of the access. For example, only certain types of people would be able to see certain types of metadata/functionality  
 61 | 
 62 | *Status*: implementation has not started  
 63 | 
 64 | #### Show distinct column values
 65 | *What*: When a column has a limited set of possible values, we want to make then easily discoverable  
 66 | 
 67 | *Status*: implementation has not started  
 68 | 
 69 | #### “Order by” for columns
 70 | *What*: we want to help users make sense of what are the columns people use in the tables we index. Within a frequently used table, a column might not be used anymore because it is know to be deprecated  
 71 | 
 72 | *Status*: implementation has not started  
 73 | 
 74 | #### Versioning system
 75 | *What*: We want to create a versioning system for our indexed resources, to be able to index different versions of the same resource. This is especially required for machine learning purposes.  
 76 | 
 77 | *Status*: implementation has not started  
 78 | 
 79 | #### Index Processes
 80 | *What*: we want to index ETLs and pipelines from our Machine Learning Engine  
 81 | 
 82 | *Status*: implementation has not started  
 83 | 
 84 | #### Index Teams
 85 | *What*: We want to add teams pages to enable users to see what are the important tables and dashboard a team uses  
 86 | 
 87 | *Status*: implementation has not started  
 88 | 
 89 | #### Index Services
 90 | *What*: With our microservices architecture, we want to index services and show how these services interact with data artifacts
 91 | 
 92 | *Status*: implementation has not started
 93 | 
 94 | #### Index S3 buckets
 95 | *What*: add these new resource types to our data map and create resource pages for them  
 96 | 
 97 | *Status*:  implementation has not started
 98 | 
 99 | #### Index Pub/Sub systems
100 | *What*: We want to make our pub/sub systems discoverable
101 | 
102 | *Status*: implementation has not started
103 | 
104 | ## How to Get Involved
105 | Let us know in the [Slack channel](https://app.slack.com/client/TGFR0CZM3/CGFBVT23V) if you are interested in taking a stab at leading the development of one of these features.
106 | 
107 | You can also jump right in by tackling one of our issues labeled as ['help wanted'](https://github.com/amundsen-io/amundsen/labels/help%20wanted) or, if you are new to Amundsen, try one of our ['good first issue'](https://github.com/amundsen-io/amundsen/labels/good%20first%20issue) tickets.
108 | 


--------------------------------------------------------------------------------
/docs/tutorials/how-to-track-user-metric.md:
--------------------------------------------------------------------------------
  1 | # How to track Amundsen user metric
  2 | 
  3 | After you have deployed Amundsen into production, you want to track how user interacts with Amundsen for various reasons.
  4 | 
  5 | The easier way is to leverage [Google Analytics](https://analytics.google.com/analytics/web/) for basic user tracking. You could first
  6 | get the analytics token for your domain and put it as the [frontend config](https://github.com/amundsen-io/amundsenfrontendlibrary/blob/54de01bdc574665316f0517aefbd55cf7ca37ef0/amundsen_application/static/js/config/config-default.ts#L22)
  7 | 
  8 | 
  9 | Besides implementing Google Analytics, we provide a way called `action_logging` to do fine grained user action tracking.
 10 | The `action_logging` is a decorator to allow you to integrate user info and pipe it to your inhouse event tracking system(e.g Kafka).
 11 | 
 12 | You need to put the custom method into entry_points following this
 13 | [example](https://github.com/amundsen-io/amundsenfrontendlibrary/blob/54de01bdc574665316f0517aefbd55cf7ca37ef0/docs/configuration.md#action-logging).
 14 | 
 15 | And here is the IDL proto we used at Lyft to send the event message:
 16 | ```bash
 17 | message UserAction {
 18 |     // Sending host name
 19 |     google.protobuf.StringValue host_name = 1;
 20 |     // start time in epoch ms
 21 |     google.protobuf.Int64Value start_epoch_ms = 2;
 22 |     // end time in epoch ms
 23 |     google.protobuf.Int64Value end_epoch_ms = 3;
 24 |     // json array contains positional arguments
 25 |     common.LongString pos_args_json = 4;
 26 |     // json object contains key word arguments
 27 |     common.LongString keyword_args_json = 5;
 28 |     // json object contains output of command
 29 |     common.LongString output = 6;
 30 |     // an error message or exception stacktrace
 31 |     common.LongString error = 7;
 32 |     // `user`
 33 |     google.protobuf.StringValue user = 8;
 34 | }
 35 | ```
 36 | 
 37 | It matches the action log model defined in [here](https://github.com/amundsen-io/amundsenfrontendlibrary/blob/ccfd2d6b82957fef347e956b243e4048c191fc0d/amundsen_application/log/action_log_model.py).
 38 | 
 39 | Once you have the event in your data warehouse, you could start building different KPI user metric:
 40 | 
 41 | 1. WAU
 42 | 
 43 | Sample query if the event table named as `default.event_amundsenfrontend_user_action`
 44 | ```
 45 | SELECT date_trunc('week', CAST("ds" AS TIMESTAMP)) AS "__timestamp",
 46 |    COUNT(DISTINCT user_value) AS "count_distinct_active_users"
 47 | FROM
 48 | (SELECT *
 49 | FROM default.event_amundsenfrontend_user_action
 50 | WHERE ds > '2019-09-01') AS "expr_qry"
 51 | WHERE "ds" >= '2020-04-21 00:00:00.000000'
 52 | AND "ds" <= '2020-10-21 05:31:14.000000'
 53 | GROUP BY date_trunc('week', CAST("ds" AS TIMESTAMP))
 54 | ORDER BY "count_distinct_active_users" DESC
 55 | LIMIT 10000
 56 | ```
 57 | 
 58 | 2. DAU
 59 | 
 60 | Sample query if the event table named as `default.event_amundsenfrontend_user_action`
 61 | ```
 62 | SELECT date_trunc('day', CAST("ds" AS TIMESTAMP)) AS "__timestamp",
 63 |    COUNT(DISTINCT user_value) AS "count_distinct_active_users"
 64 | FROM
 65 | (SELECT *
 66 | FROM default.event_amundsenfrontend_user_action
 67 | WHERE ds > '2019-09-01') AS "expr_qry"
 68 | WHERE "ds" >= '2020-07-21 00:00:00.000000'
 69 | AND "ds" <= '2020-10-21 00:00:00.000000'
 70 | GROUP BY date_trunc('day', CAST("ds" AS TIMESTAMP))
 71 | ORDER BY "count_distinct_active_users" DESC
 72 | LIMIT 50000
 73 | ```
 74 | 
 75 | You could also exclude weekends:
 76 | ```
 77 | SELECT date_trunc('day', CAST("ds" AS TIMESTAMP)) AS "__timestamp",
 78 |    COUNT(DISTINCT user_value) AS "count_distinct_active_users"
 79 | FROM
 80 | (SELECT *
 81 | FROM default.event_amundsenfrontend_user_action
 82 | WHERE ds > '2019-09-01') AS "expr_qry"
 83 | WHERE "ds" >= '2020-04-21 00:00:00.000000'
 84 | AND "ds" <= '2020-10-21 05:33:11.000000'
 85 | AND day_of_week(logged_at) NOT IN (6,
 86 |                                  7)
 87 | GROUP BY date_trunc('day', CAST("ds" AS TIMESTAMP))
 88 | ORDER BY "count_distinct_active_users" DESC
 89 | LIMIT 50000
 90 | ```
 91 | 
 92 | 3. User Penetration per role
 93 | 
 94 | Sample query if the event table named as `default.event_amundsenfrontend_user_action` and a table for user:
 95 | ```
 96 | SELECT "title" AS "title",
 97 |    COUNT(DISTINCT email) * 100 / MAX(role_count) AS "penetration_percent"
 98 | FROM
 99 | (SELECT e.occurred_at,
100 |       u.email,
101 |       u.title,
102 |       tmp.role_count
103 | FROM default.family_user u
104 | JOIN default.event_amundsenfrontend_user_action e ON u.email = e.user_value
105 | JOIN
106 |  (SELECT title,
107 |          count(*) role_count
108 |   FROM default.family_user
109 |   GROUP BY 1) as tmp ON u.title = tmp.title
110 | where ds is not NULL) AS "expr_qry"
111 | WHERE "occurred_at" >= from_iso8601_timestamp('2020-10-14T00:00:00.000000')
112 | AND "occurred_at" <= from_iso8601_timestamp('2020-10-21T00:00:00.000000')
113 | AND "role_count" > 20
114 | GROUP BY "title"
115 | ORDER BY "penetration_percent" DESC
116 | LIMIT 100
117 | ```
118 | 
119 | 4. Usage breakdown per role_count
120 | 
121 | sample query:
122 | ```
123 | SELECT "title" AS "title",
124 |    count("email") AS "COUNT(email)"
125 | FROM
126 | (SELECT e.occurred_at,
127 |       u.email,
128 |       u.title,
129 |       tmp.role_count
130 | FROM default.family_user u
131 | JOIN default.event_amundsenfrontend_user_action e ON u.email = e.user_value
132 | JOIN
133 |  (SELECT title,
134 |          count(*) role_count
135 |   FROM default.family_user
136 |   GROUP BY 1) as tmp ON u.title = tmp.title
137 | where ds is not NULL) AS "expr_qry"
138 | WHERE "occurred_at" >= from_iso8601_timestamp('2020-10-14T00:00:00.000000')
139 | AND "occurred_at" <= from_iso8601_timestamp('2020-10-21T00:00:00.000000')
140 | GROUP BY "title"
141 | ORDER BY "COUNT(email)" DESC
142 | LIMIT 15
143 | ```
144 | 
145 | 5. search click through rate
146 | 
147 | sample query:
148 | ```
149 | SELECT date_trunc('day', CAST("occurred_at" AS TIMESTAMP)) AS "__timestamp",
150 |    SUM(CASE
151 |            WHEN CAST(json_extract_scalar(keyword_args_json, '$.index') AS BIGINT) <= 3 THEN 1
152 |            ELSE 0
153 |        END) * 100 / COUNT(*) AS "click_through_rate"
154 | FROM
155 | (SELECT *
156 | FROM default.event_amundsenfrontend_user_action
157 | WHERE ds > '2019-09-01') AS "expr_qry"
158 | WHERE "occurred_at" >= from_iso8601_timestamp('2020-09-21T00:00:00.000000')
159 | AND "occurred_at" <= from_iso8601_timestamp('2020-10-21T00:00:00.000000')
160 | AND "command" IN ('_get_table_metadata',
161 |                 '_get_dashboard_metadata',
162 |                 '_log_get_user')
163 | AND json_extract_scalar(keyword_args_json, '$.source') IN ('search_results',
164 |                                                          'inline_search')
165 | GROUP BY date_trunc('day', CAST("occurred_at" AS TIMESTAMP))
166 | ORDER BY "click_through_rate" DESC
167 | LIMIT 10000
168 | ```
169 | 
170 | 6. Top 50 active user
171 | 
172 | 7. Top search term
173 | 
174 | 8. Top popular tables
175 | 
176 | 9. Search click index
177 | 
178 | 10. Metadata edits
179 | 
180 | 11. Metadata edit leaders
181 | 
182 | 12. Amundsen user per role (by joining with employee data)
183 | 
184 | 13. ...
185 | 


--------------------------------------------------------------------------------
/docs/tutorials/how-to-search-effective.md:
--------------------------------------------------------------------------------
  1 | # How to search Amundsen effectively
  2 | 
  3 | The goal of this tutorial is to provide a few tips on how to search for datasets effectively in Amundsen. 
  4 | 
  5 | ## Overview
  6 | 
  7 | Amundsen currently indexes three types of entities: tables, people, and dashboards. This tutorial mostly covers how to search for a table entity effectively.
  8 | 
  9 | We will cover other entities in the future.
 10 | 
 11 | ## General Search
 12 | 
 13 | Once the users are on the Amundsen home page,  the users could search for any random information in the search bar. In the backend, the search system will use the same query term from users and search across three different entities (tables, people, and dashboards) and return the results with the highest ranking. For Table search, it will search across different fields, including table name, schema name, table or column descriptions, tags and etc. Amundsen also supports typeahead search which will search in the backend as soon as users enter new characters. 
 14 | 
 15 | 
 16 | Tips:
 17 | 
 18 | - If you know the full table name (e.g. schema.table), try to search with that full table name, which will provide as the top result in general.
 19 | - If you are unsure of the table name, search with `word1 word2` with space in between. For example, if your table's name is `test.test_rides`  but you don't know the exact table name but only know the table name includes test and rides, please search with `test rides` (space in between). In this case, Amundsen will return tables that match either test or rides and union the result together based on the search algorithm ranking.
 20 | - If you know your table name but don't know the schema of the table name, you could search with `word1_word2`. For example, if you know your table name is `test_rides`, please search with `test_rides` that will only return the table matched that given name.
 21 | 
 22 | ## Advanced Search
 23 | 
 24 | If you want to do the traditional faceted search, which will allow users to apply multiple filters, you could try out the advanced search. Currently, only the table entity is supported with the advanced search. But we plan to add the support for the dashboard entity as well in the near future.
 25 | 
 26 | You could use wildcard in the search box as well. In the above example, the users put `rides*`  on the table box. This will search across all the tables that have rides* as table name from different databases, including bigquery/druid/hive/presto/rs, etc.
 27 | 
 28 | If you want to narrow down the search results, you could put more filters. In the above example, the users try to search a table name that is `rides*`, which has beta as the badge. Once the search is finished, you could see only one table matches the criteria (test.rides in this case). 
 29 | 
 30 | ## Searching Ranking Algorithm Demystified
 31 | 
 32 | Currently, Amundsen provides the same search ranking for all the different personas. It ranks the table based on the query count in the presto search query log from the past 90 days at Lyft. It could be different based on your company's setup.
 33 | 
 34 | ## Try out different search heuristic
 35 | 
 36 | You could always try out different search heuristic using the kibana devtools.
 37 | 
 38 | For example for table, you could use:
 39 | ```
 40 | GET table_search_index/_search
 41 | {
 42 |   "query": {
 43 |               "function_score": {
 44 |                 "query": {
 45 |                     "multi_match": {
 46 |                         "query": "$term",
 47 |                         "fields": ["display_name^1000",
 48 |                                    "name.raw^75",
 49 |                                    "name^5",
 50 |                                    "schema^3",
 51 |                                    "description^3",
 52 |                                    "column_names^2",
 53 |                                    "column_descriptions",
 54 |                                    "tags",
 55 |                                    "badges",
 56 |                                    "programmatic_descriptions"]
 57 |                     }
 58 |                 },
 59 |                 "field_value_factor": {
 60 |                     "field": "total_usage",
 61 |                     "modifier": "log2p"
 62 |                 }
 63 |             }
 64 |   }
 65 | }
 66 | ```
 67 | 
 68 | The result will be ranked with certain weight based on total usage. It is the same as the following with painless script:
 69 | ```
 70 | "function_score": {
 71 |     "query": {
 72 |         "multi_match": {
 73 |             "query": query_term,
 74 |             "fields": ["display_name^1000",
 75 |                        "name.raw^75",
 76 |                        "name^5",
 77 |                        "schema^3",
 78 |                        "description^3",
 79 |                        "column_names^2",
 80 |                        "column_descriptions",
 81 |                        "tags",
 82 |                        "badges",
 83 |                        "programmatic_descriptions"],
 84 |         }
 85 |     },
 86 |     "functions": [
 87 |         {
 88 |             "script_score": {
 89 |                 "script": "def scores = 0; scores = doc['total_usage'].value; return _score * Math.log10(2 + scores); }"
 90 | 
 91 |             }
 92 |         }
 93 |     ]
 94 | }
 95 | ```
 96 | 
 97 | If you want to boot the search result that has certain badge:
 98 | ```
 99 | "function_score": {
100 |     "query": {
101 |         "multi_match": {
102 |             "query": query_term,
103 |             "fields": ["display_name^1000",
104 |                        "name.raw^75",
105 |                        "name^5",
106 |                        "schema^3",
107 |                        "description^3",
108 |                        "column_names^2",
109 |                        "column_descriptions",
110 |                        "tags",
111 |                        "badges",
112 |                        "programmatic_descriptions"],
113 |         }
114 |     },
115 |     "functions": [
116 |         {
117 |             "script_score": {
118 |                 "script": "def scores = 0; scores = doc['total_usage'].value; if (doc['badges'].value == "
119 |                           "'$badge_for_boost') {return _score * Math.log10(2 + scores) "
120 |                           "* 1.5} else{ return _score * Math.log10(2 + scores); }"
121 | 
122 |             }
123 |         }
124 |     ]
125 | }
126 | ```
127 | 
128 | In this case, the table with a badge ($badge_for_boost or replace with your own badge), the search ranking score will get boosted.
129 | 
130 | For dashboard, you could try out the following:
131 | ```
132 | GET dashboard_search_index/_search
133 | {
134 |   "query": {
135 |     "function_score": {
136 |                 "query": {
137 |                     "multi_match": {
138 |                         "query": "$search-term",
139 |                         "fields": ["name.raw^75",
140 |                                    "name^7",
141 |                                    "group_name.raw^15",
142 |                                    "group_name^7",
143 |                                    "description^3",
144 |                                    "query_names^3"]
145 |                     }
146 |                 },
147 |                 "field_value_factor": {
148 |                     "field": "total_usage",
149 |                     "modifier": "log2p"
150 |                 }
151 |             }
152 |   }
153 | }
154 | ```
155 | 
156 | Hope this tutorial gives you some ideas on how the search works.
157 | 


--------------------------------------------------------------------------------
/docs/authentication/oidc.md:
--------------------------------------------------------------------------------
  1 | # OIDC (Keycloak) Authentication
  2 | 
  3 | Setting up end-to-end authentication using OIDC is fairly simple and can be done using a Flask wrapper i.e., [flaskoidc](https://github.com/verdan/flaskoidc).
  4 | 
  5 | `flaskoidc` leverages the Flask's `before_request` functionality to authenticate each request before passing that to
  6 | the views. It also accepts headers on each request if available in order to validate bearer token from incoming requests.
  7 | 
  8 | ## Installation
  9 | 
 10 | Please refer to the [flaskoidc documentation](https://github.com/verdan/flaskoidc/blob/master/README.md)
 11 | for the installation and the configurations.
 12 | 
 13 | Note: You need to install and configure `flaskoidc` for each microservice of Amundsen
 14 | i.e., for frontendlibrary, metadatalibrary and searchlibrary in order to secure each of them.
 15 | 
 16 | ## Amundsen Configuration
 17 | 
 18 | Once you have `flaskoidc` installed and configured for each microservice, please set the following environment variables:
 19 | 
 20 | - amundsenfrontendlibrary:
 21 | ```bash
 22 |     APP_WRAPPER: flaskoidc
 23 |     APP_WRAPPER_CLASS: FlaskOIDC
 24 | ```
 25 | 
 26 | - amundsenmetadatalibrary:
 27 | ```bash
 28 |     FLASK_APP_MODULE_NAME: flaskoidc
 29 |     FLASK_APP_CLASS_NAME: FlaskOIDC
 30 | ```
 31 | 
 32 | - amundsensearchlibrary: _(Needs to be implemented)_
 33 | ```bash
 34 |     FLASK_APP_MODULE_NAME: flaskoidc
 35 |     FLASK_APP_CLASS_NAME: FlaskOIDC
 36 | ```
 37 | 
 38 | By default `flaskoidc` whitelist the healthcheck URLs, to not authenticate them. In case of metadatalibrary and searchlibrary
 39 | we may want to whitelist the healthcheck APIs explicitly using following environment variable.
 40 | 
 41 | ```bash
 42 |     FLASK_OIDC_WHITELISTED_ENDPOINTS: 'api.healthcheck'
 43 | ```
 44 | 
 45 | ## Setting Up Request Headers
 46 | 
 47 | To communicate securely between the microservices, you need to pass the bearer token from frontend in each request
 48 | to metadatalibrary and searchlibrary. This should be done using `REQUEST_HEADERS_METHOD` config variable in frontendlibrary.
 49 | 
 50 | - Define a function to add the bearer token in each request in your config.py:
 51 | ```python
 52 | def get_access_headers(app):
 53 |     """
 54 |     Function to retrieve and format the Authorization Headers
 55 |     that can be passed to various microservices who are expecting that.
 56 |     :param oidc: OIDC object having authorization information
 57 |     :return: A formatted dictionary containing access token
 58 |     as Authorization header.
 59 |     """
 60 |     try:
 61 |         access_token = app.oidc.get_access_token()
 62 |         return {'Authorization': 'Bearer {}'.format(access_token)}
 63 |     except Exception:
 64 |         return None
 65 | ```
 66 | 
 67 | - Set the method as the request header method in your config.py:
 68 | ```python
 69 | REQUEST_HEADERS_METHOD = get_access_headers
 70 | ```
 71 | 
 72 | This function will be called using the current `app` instance to add the headers in each request when calling any endpoint of
 73 | metadatalibrary and searchlibrary [here](https://github.com/amundsen-io/amundsenfrontendlibrary/blob/master/amundsen_application/api/utils/request_utils.py)
 74 | 
 75 | ## Setting Up Auth User Method
 76 | 
 77 | In order to get the current authenticated user (which is being used in Amundsen for many operations), we need to set
 78 | `AUTH_USER_METHOD` config variable in frontendlibrary.
 79 | This function should return email address, user id and any other required information.
 80 | 
 81 | - Define a function to fetch the user information in your config.py:
 82 | ```python
 83 | def get_auth_user(app):
 84 |     """
 85 |     Retrieves the user information from oidc token, and then makes
 86 |     a dictionary 'UserInfo' from the token information dictionary.
 87 |     We need to convert it to a class in order to use the information
 88 |     in the rest of the Amundsen application.
 89 |     :param app: The instance of the current app.
 90 |     :return: A class UserInfo
 91 |     """
 92 |     from flask import g
 93 |     user_info = type('UserInfo', (object,), g.oidc_id_token)
 94 |     # noinspection PyUnresolvedReferences
 95 |     user_info.user_id = user_info.preferred_username
 96 |     return user_info
 97 | ```
 98 | 
 99 | - Set the method as the auth user method in your config.py:
100 | ```python
101 | AUTH_USER_METHOD = get_auth_user
102 | ```
103 | 
104 | Once done, you'll have the end-to-end authentication in Amundsen without any proxy or code changes.
105 | 
106 | ## Using Okta with Amundsen on K8s
107 | 
108 | Assumptions:
109 | 
110 | - You have access to okta (you can create a developer account for free!)
111 | - You are using k8s to setup amundsen. See [amundsen-kube-helm](../../amundsen-kube-helm/README.md)
112 | 
113 | 1. You need to have a stable DNS entry for amundsen-frontend that can be registered in okta.
114 |     - for example in AWS you can setup route53
115 |     I will assume for the rest of this tutorial that your stable uri is "http://amundsen-frontend"
116 | 2. You need to register amundsen in okta as an app. More info [here](https://developer.okta.com/blog/2018/07/12/flask-tutorial-simple-user-registration-and-login).
117 | But here are specific instructions for amundsen:
118 |     - At this time, I have only succesfully tested integration after ALL grants were checked.
119 |     - Set the Login redirect URIs to: `http://amundsen-frontend/oidc_callback`
120 |     - No need to set a logout redirect URI
121 |     - Set the Initiate login URI to: `http://amundsen-frontend/`
122 |         (This is where okta will take you if users click on amundsen via okta landing page)
123 |     - Copy the Client ID and Client secret as you will need this later.
124 | 3. At present, there is no oidc build of the frontend. So you will need to build an oidc build yourself and upload it to, for example ECR, for use by k8s.
125 |    You can then specify which image you want to use as a property override for your helm install like so:
126 | 
127 |    ```yaml
128 |    frontEndServiceImage: 123.dkr.ecr.us-west-2.amazonaws.com/edmunds/amundsen-frontend:oidc-test
129 |    ```
130 | 
131 |    Please see further down in this doc for more instructions on how to build frontend.
132 | 4. When you start up helm you will need to provide some properties. Here are the properties that need to be overridden for oidc to work:
133 | 
134 |     ```yaml
135 |     oidcEnabled: true
136 |     createOidcSecret: true
137 |     OIDC_CLIENT_ID: YOUR_CLIENT_ID
138 |     OIDC_CLIENT_SECRET: YOUR_SECRET_ID
139 |     OIDC_ORG_URL: https://edmunds.okta.com
140 |     OIDC_AUTH_SERVER_ID: default
141 |     # You also will need a custom oidc frontend build too
142 |     frontEndServiceImage: 123.dkr.ecr.us-west-2.amazonaws.com/edmunds/amundsen-frontend:oidc-test
143 |     ```
144 | 
145 | ## Building frontend with OIDC
146 | 
147 | 1. Please look at [this guide](../developer_guide.md) for instructions on how to build a custom frontend docker image.
148 | 2. The only difference to above is that in your docker file you will want to add the following at the end. This will make sure its ready to go for oidc.
149 | You can take alook at the public.Dockerfile as a reference.
150 | 
151 | ```dockerfile
152 | RUN pip3 install .[oidc]
153 | ENV FRONTEND_SVC_CONFIG_MODULE_CLASS amundsen_application.oidc_config.OidcConfig
154 | ENV APP_WRAPPER flaskoidc
155 | ENV APP_WRAPPER_CLASS FlaskOIDC
156 | ENV FLASK_OIDC_WHITELISTED_ENDPOINTS status,healthcheck,health
157 | ENV SQLALCHEMY_DATABASE_URI sqlite:///sessions.db
158 | ```
159 | 
160 | 
161 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 
177 |    END OF TERMS AND CONDITIONS
178 | 
179 |    APPENDIX: How to apply the Apache License to your work.
180 | 
181 |       To apply the Apache License to your work, attach the following
182 |       boilerplate notice, with the fields enclosed by brackets "{}"
183 |       replaced with your own identifying information. (Don't include
184 |       the brackets!)  The text should be enclosed in the appropriate
185 |       comment syntax for the file format. We also recommend that a
186 |       file or class name and description of purpose be included on the
187 |       same "printed page" as the copyright notice for easier
188 |       identification within third-party archives.
189 | 
190 |    Copyright 2018 Lyft, Inc.
191 | 
192 |    Licensed under the Apache License, Version 2.0 (the "License");
193 |    you may not use this file except in compliance with the License.
194 |    You may obtain a copy of the License at
195 | 
196 |        http://www.apache.org/licenses/LICENSE-2.0
197 | 
198 |    Unless required by applicable law or agreed to in writing, software
199 |    distributed under the License is distributed on an "AS IS" BASIS,
200 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 |    See the License for the specific language governing permissions and
202 |    limitations under the License.
203 | 


--------------------------------------------------------------------------------
/amundsen-kube-helm/README.md:
--------------------------------------------------------------------------------
  1 | # Amundsen K8s Helm Charts
  2 | 
  3 | Source code can be found [here](https://github.com/amundsen-io/amundsen)
  4 | 
  5 | ## What is this?
  6 | 
  7 | This is setup templates for deploying [amundsen](https://github.com/amundsen-io/amundsen) on [k8s (kubernetes)](https://kubernetes.io/), using [helm.](https://helm.sh/)
  8 | 
  9 | ## How do I get started?
 10 | 
 11 | 1. Make sure you have the following command line clients setup:
 12 |     - k8s (kubectl)
 13 |     - helm
 14 | 2. Build out a cloud based k8s cluster, such as [Amazon EKS](https://aws.amazon.com/eks/)
 15 | 3. Ensure you can connect to your cluster with cli tools in step 1.
 16 | 
 17 | ## Prerequisites
 18 | 
 19 | 1. Helm 2.14+
 20 | 2. Kubernetes 1.14+
 21 | 
 22 | ## Chart Requirements
 23 | 
 24 | | Repository | Name | Version |
 25 | |------------|------|---------|
 26 | | https://kubernetes-charts.storage.googleapis.com/ | elasticsearch | 1.32.0 |
 27 | 
 28 | ## Chart Values
 29 | 
 30 | The following table lists the configurable parameters of the Amundsen charts and their default values.
 31 | 
 32 | | Key | Type | Default | Description |
 33 | |-----|------|---------|-------------|
 34 | | LONG_RANDOM_STRING | int | `1234` | A long random string. You should probably provide your own. This is needed for OIDC. |
 35 | | affinity | object | `{}` | amundsen application wide configuration of affinity. This applies to search, metadata, frontend and neo4j. Elasticsearch has it's own configuation properties for this. [ref](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity) |
 36 | | dnsZone | string | `"teamname.company.com"` | **DEPRECATED - its not standard to pre construct urls this way.** The dns zone (e.g. group-qa.myaccount.company.com) the app is running in. Used to construct dns hostnames (on aws only). |
 37 | | dockerhubImagePath | string | `"amundsendev"` | **DEPRECATED - this is not useful, it would be better to just allow the whole image to be swapped instead.** The image path for dockerhub. |
 38 | | elasticsearch.client.replicas | int | `1` | only running amundsen on 1 client replica |
 39 | | elasticsearch.cluster.env.EXPECTED_MASTER_NODES | int | `1` | required to match master.replicas |
 40 | | elasticsearch.cluster.env.MINIMUM_MASTER_NODES | int | `1` | required to match master.replicas |
 41 | | elasticsearch.cluster.env.RECOVER_AFTER_MASTER_NODES | int | `1` | required to match master.replicas |
 42 | | elasticsearch.data.replicas | int | `1` | only running amundsen on 1 data replica |
 43 | | elasticsearch.enabled | bool | `true` | set this to false, if you want to provide your own ES instance. |
 44 | | elasticsearch.master.replicas | int | `1` | only running amundsen on 1 master replica |
 45 | | environment | string | `"dev"` | **DEPRECATED - its not standard to pre construct urls this way.** The environment the app is running in. Used to construct dns hostnames (on aws only) and ports. |
 46 | | frontEnd.OIDC_AUTH_SERVER_ID | string | `nil` | The authorization server id for OIDC. |
 47 | | frontEnd.OIDC_CLIENT_ID | string | `nil` | The client id for OIDC. |
 48 | | frontEnd.OIDC_CLIENT_SECRET | string | `""` | The client secret for OIDC. |
 49 | | frontEnd.OIDC_ORG_URL | string | `nil` | The organization URL for OIDC. |
 50 | | frontEnd.affinity | object | `{}` | Frontend pod specific affinity. |
 51 | | frontEnd.annotations | object | `{}` | Frontend service specific tolerations. |
 52 | | frontEnd.baseUrl | string | `"http://localhost"` | used by notifications util to provide links to amundsen pages in emails. |
 53 | | frontEnd.createOidcSecret | bool | `false` | OIDC needs some configuration. If you want the chart to make your secrets, set this to true and set the next four values. If you don't want to configure your secrets via helm, you can still use the amundsen-oidc-config.yaml as a template |
 54 | | frontEnd.image | string | `"amundsendev/amundsen-frontend"` | The image of the frontend container. |
 55 | | frontEnd.imageTag | string | `"2.0.0"` | The image tag of the frontend container. |
 56 | | frontEnd.nodeSelector | object | `{}` | Frontend pod specific nodeSelector. |
 57 | | frontEnd.oidcEnabled | bool | `false` | To enable auth via OIDC, set this to true. |
 58 | | frontEnd.podAnnotations | object | `{}` | Frontend pod specific annotations. |
 59 | | frontEnd.replicas | int | `1` | How many replicas of the frontend service to run. |
 60 | | frontEnd.resources | object | `{}` | See pod resourcing [ref](https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/) |
 61 | | frontEnd.serviceName | string | `"frontend"` | The frontend service name. |
 62 | | frontEnd.servicePort | int | `80` | The port the frontend service will be exposed on via the loadbalancer. |
 63 | | frontEnd.serviceType | string | `"ClusterIP"` | The frontend service type. See service types [ref](https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types) |
 64 | | frontEnd.tolerations | list | `[]` | Frontend pod specific tolerations. |
 65 | | metadata.affinity | object | `{}` | Metadata pod specific affinity. |
 66 | | metadata.annotations | object | `{}` | Metadata service specific tolerations. |
 67 | | metadata.image | string | `"amundsendev/amundsen-metadata"` | The image of the metadata container. |
 68 | | metadata.imageTag | string | `"2.0.0"` | The image tag of the metadata container. |
 69 | | metadata.neo4jEndpoint | string | `nil` | The name of the service hosting neo4j on your cluster, if you bring your own. You should only need to change this, if you don't use the version in this chart. |
 70 | | metadata.nodeSelector | object | `{}` | Metadata pod specific nodeSelector. |
 71 | | metadata.podAnnotations | object | `{}` | Metadata pod specific annotations. |
 72 | | metadata.replicas | int | `1` | How many replicas of the metadata service to run. |
 73 | | metadata.resources | object | `{}` | See pod resourcing [ref](https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/) |
 74 | | metadata.serviceName | string | `"metadata"` | The metadata service name. |
 75 | | metadata.serviceType | string | `"ClusterIP"` | The metadata service type. See service types [ref](https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types) |
 76 | | metadata.tolerations | list | `[]` | Metadata pod specific tolerations. |
 77 | | neo4j.affinity | object | `{}` | neo4j specific affinity. |
 78 | | neo4j.annotations | object | `{}` | neo4j service specific tolerations. |
 79 | | neo4j.backup | object | `{"enabled":false,"s3Path":"s3://dev/null","schedule":"0 * * * *"}` | If enabled is set to true, make sure and set the s3 path as well. |
 80 | | neo4j.backup.s3Path | string | `"s3://dev/null"` | The s3path to write to for backups. |
 81 | | neo4j.backup.schedule | string | `"0 * * * *"` | The schedule to run backups on. Defaults to hourly. |
 82 | | neo4j.config | object | `{"dbms":{"heap_initial_size":"23000m","heap_max_size":"23000m","pagecache_size":"26600m"}}` | Neo4j application specific configuration. This type of configuration is why the charts/stable version is not used. See [ref](https://github.com/helm/charts/issues/21439) |
 83 | | neo4j.config.dbms | object | `{"heap_initial_size":"23000m","heap_max_size":"23000m","pagecache_size":"26600m"}` | dbms config for neo4j |
 84 | | neo4j.config.dbms.heap_initial_size | string | `"23000m"` | the initial java heap for neo4j |
 85 | | neo4j.config.dbms.heap_max_size | string | `"23000m"` | the max java heap for neo4j |
 86 | | neo4j.config.dbms.pagecache_size | string | `"26600m"` | the page cache size for neo4j |
 87 | | neo4j.enabled | bool | `true` | If neo4j is enabled as part of this chart, or not. Set this to false if you want to provide your own version. |
 88 | | neo4j.nodeSelector | object | `{}` | neo4j specific nodeSelector. |
 89 | | neo4j.persistence | object | `{}` | Neo4j persistence. Turn this on to keep your data between pod crashes, etc. This is also needed for backups. |
 90 | | neo4j.podAnnotations | object | `{}` | neo4j pod specific annotations. |
 91 | | neo4j.resources | object | `{}` | See pod resourcing [ref](https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/) |
 92 | | neo4j.tolerations | list | `[]` | neo4j specific tolerations. |
 93 | | neo4j.version | string | `"3.3.0"` | The neo4j application version used by amundsen. |
 94 | | nodeSelector | object | `{}` | amundsen application wide configuration of nodeSelector. This applies to search, metadata, frontend and neo4j. Elasticsearch has it's own configuation properties for this. [ref](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#nodeselector) |
 95 | | podAnnotations | object | `{}` | amundsen application wide configuration of podAnnotations. This applies to search, metadata, frontend and neo4j. Elasticsearch has it's own configuation properties for this. [ref](https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/) |
 96 | | provider | string | `"aws"` | The cloud provider the app is running in. Used to construct dns hostnames (on aws only). |
 97 | | search.affinity | object | `{}` | Search pod specific affinity. |
 98 | | search.annotations | object | `{}` | Search service specific tolerations. |
 99 | | search.elasticsearchEndpoint | string | `nil` | The name of the service hosting elasticsearch on your cluster, if you bring your own. You should only need to change this, if you don't use the version in this chart. |
100 | | search.image | string | `"amundsendev/amundsen-search"` | The image of the search container. |
101 | | search.imageTag | string | `"2.0.0"` | The image tag of the search container. |
102 | | search.nodeSelector | object | `{}` | Search pod specific nodeSelector. |
103 | | search.podAnnotations | object | `{}` | Search pod specific annotations. |
104 | | search.replicas | int | `1` | How many replicas of the search service to run. |
105 | | search.resources | object | `{}` | See pod resourcing [ref](https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/) |
106 | | search.serviceName | string | `"search"` | The search service name. |
107 | | search.serviceType | string | `"ClusterIP"` | The search service type. See service types [ref](https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types) |
108 | | search.tolerations | list | `[]` | Search pod specific tolerations. |
109 | | tolerations | list | `[]` | amundsen application wide configuration of tolerations. This applies to search, metadata, frontend and neo4j. Elasticsearch has it's own configuation properties for this. [ref](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#taints-and-tolerations-beta-feature) |
110 | 
111 | ## Neo4j DBMS Config?
112 | 
113 | You may want to override the default memory usage for Neo4J. In particular, if you're just test-driving a deployment and your node exits with status 137, you should set the usage to smaller values:
114 | 
115 | ``` yaml
116 | config:
117 |   dbms:
118 |     heap_initial_size: 1G
119 |     heap_max_size: 2G
120 |     pagecache_size: 2G
121 | ```
122 | 
123 | With this values file, you can then install Amundsen using Helm 2 with:
124 | 
125 | ``` shell
126 | helm install ./templates/helm --values impl/helm/dev/values.yaml
127 | ```
128 | 
129 | For Helm 3 it's now mandatory to specify a [chart reference name](https://helm.sh/docs/intro/using_helm/#helm-install-installing-a-package) e.g. `my-amundsen`:
130 | 
131 | ``` shell
132 | helm install my-amundsen ./templates/helm --values impl/helm/dev/values.yaml
133 | ```
134 | 
135 | ## Other Notes
136 | 
137 | - For aws setup, you will also need to setup the [external-dns plugin](https://github.com/kubernetes-incubator/external-dns)
138 | - There is an existing helm chart for neo4j, but, it is missing some features necessary to for use such as:
139 |   - [\[stable/neo4j\] make neo4j service definition more extensible](https://github.com/helm/charts/issues/21441); without this, it is not possible to setup external load balancers, external-dns, etc
140 |   - [\[stable/neo4j\] allow custom configuration of neo4j](https://github.com/helm/charts/issues/21439); without this, custom configuration is not possible which includes setting configmap based settings, which also includes turning on apoc.
141 | 


--------------------------------------------------------------------------------
/docs/developer_guide.md:
--------------------------------------------------------------------------------
  1 | # Developer Guide
  2 | 
  3 | This repository uses `git submodules` to link the code for all of Amundsen's libraries into a central location. This document offers guidance on how to develop locally with this setup.
  4 | 
  5 | This workflow leverages `docker` and `docker-compose` in a very similar manner to our [installation documentation](https://github.com/amundsen-io/amundsen/blob/master/docs/installation.md#bootstrap-a-default-version-of-amundsen-using-docker), to spin up instances of all 3 of Amundsen's services connected with an instances of Neo4j and ElasticSearch which ingest dummy data.
  6 | 
  7 | ## Cloning the Repository
  8 | 
  9 | If cloning the repository for the first time, run the following command to clone the repository and pull the submodules:
 10 | 
 11 | ```bash
 12 | $ git clone --recursive git@github.com:amundsen-io/amundsen.git
 13 | ```
 14 | 
 15 | If  you have already cloned the repository but your submodules are empty, from your cloned `amundsen` directory run:
 16 | 
 17 | ```bash
 18 | $ git submodule init
 19 | $ git submodule update
 20 | ```
 21 | 
 22 | After cloning the repository you can change directories into any of the upstream folders and work in those directories as you normally would. You will have full access to all of the git features, and working in the upstream directories will function the same as if you were working in a cloned version of that repository.
 23 | 
 24 | ## Local Development
 25 | 
 26 | ### Ensure you have the latest code
 27 | 
 28 | Beyond running `git pull origin master` in your local `amundsen` directory, the submodules for our libraries also have to be manually updated to point to the latest versions of each libraries' code. When creating a new branch on `amundsen` to begin local work, ensure your local submodules are pointing to the latest code for each library by running:
 29 | 
 30 | ```bash
 31 | $ git submodule update --remote
 32 | ```
 33 | 
 34 | ### Building local changes
 35 | 
 36 | 1. First, be sure that you have first followed the [installation documentation](https://github.com/amundsen-io/amundsen/blob/master/docs/installation.md#bootstrap-a-default-version-of-amundsen-using-docker) and can spin up a default version of Amundsen without any issues. If you have already completed this step, be sure to have stopped and removed those containers by running:
 37 |     ```bash
 38 |     $ docker-compose -f docker-amundsen.yml down
 39 |     ```
 40 | 
 41 | 2. Launch the containers needed for local development (the `-d` option launches in background) :
 42 |     ```bash
 43 |     $ docker-compose -f docker-amundsen-local.yml up -d
 44 |     ```
 45 | 
 46 | 3. After making local changes rebuild and relaunch modified containers:
 47 |     ```bash
 48 |     $ docker-compose -f docker-amundsen-local.yml build \
 49 |       && docker-compose -f docker-amundsen-local.yml up -d
 50 |     ```
 51 | 
 52 | 4. Optionally, to still tail logs, in a different terminal you can:
 53 |     ```bash
 54 |     $ docker-compose -f docker-amundsen-local.yml logs --tail=3 -f
 55 |     ## - or just tail single container(s):
 56 |     $ docker logs amundsenmetadata --tail 10 -f
 57 |     ```
 58 | 
 59 | ### Local data
 60 | 
 61 | Local data is persisted under .local/ (at the root of the project), clean up the following directories to reset the databases:
 62 | 
 63 | ```bash
 64 | #  reset elasticsearch
 65 | rm -rf .local/elasticsearch
 66 | 
 67 | #  reset neo4j
 68 | rm -rf .local/neo4j
 69 | ```
 70 | 
 71 | 
 72 | ### Troubleshooting
 73 | 
 74 | 1. If you have made a change in `amundsen/amundsenfrontendlibrary` and do not see your changes, this could be due to your browser's caching behaviors. Either execute a hard refresh (recommended) or clear your browser cache (last resort).
 75 | 
 76 | ### Testing Amundsen frontend locally
 77 | 
 78 | Amundsen has an instruction regarding local frontend launch [here](https://github.com/amundsen-io/amundsenfrontendlibrary/blob/master/docs/installation.md)
 79 | 
 80 | Here are some additional changes you might need for windows (OS Win 10):
 81 | 
 82 | - amundsen_application/config.py, set LOCAL_HOST = '127.0.0.1'
 83 | - amundsen_application/wsgi.py, set host='127.0.0.1'
 84 |  (for other microservices also need to change `port` here because the default is 5000)
 85 | 
 86 | (using that approach you can run locally another microservices as well if needed)  
 87 | 
 88 | Once you have a running frontend microservice, the rest of Amundsen components can be launched with docker-compose
 89 | from the root Amundsen project (don't forget to remove frontend microservice section from docker-amundsen.yml):
 90 | `docker-compose -f docker-amundsen.yml up`
 91 | https://github.com/amundsen-io/amundsen/blob/master/docs/installation.md
 92 | 
 93 | ### Developing Dockerbuild file
 94 | 
 95 | When making edits to Dockerbuild file (docker-amundsen-local.yml) it is good to see what you are getting wrong locally.
 96 | To do that you build it `docker build .`
 97 | 
 98 | And then the output should include a line like so at the step right before it failed:
 99 | 
100 | ```bash
101 | Step 3/20 : RUN git clone --recursive git://github.com/amundsen-io/amundsenfrontendlibrary.git  && cd amundsenfrontendlibrary  && git submodule foreach git pull origin master
102 |  ---> Using cache
103 |  ---> ec052612747e
104 | ```
105 | 
106 | You can then launch a container from this image like so
107 | 
108 | ```bash
109 | docker container run -it --name=debug ec052612747e /bin/sh
110 | ```
111 | 
112 | ### Building and Testing Amundsen Frontend Docker Image (or any other service)
113 | 
114 | 1. Build your image
115 | `docker build --no-cache .` it is recommended that you use --no-cache so you aren't accidentally using an old version of an image.
116 | 2. Determine the hash of your images by running `docker images` and getting the id of your most recent image
117 | 3. Go to your locally cloned amundsen repo and edit the docker compose file "docker-amundsen.yml" to have 
118 | the amundsenfrontend image point to the hash of the image that you built
119 | 
120 | ```yaml
121 |   amundsenfrontend:
122 |       #image: amundsendev/amundsen-frontend:1.0.9
123 |       #image: 1234.dkr.ecr.us-west-2.amazonaws.com/edmunds/amundsen-frontend:2020-01-21
124 |       image: 0312d0ac3938
125 | ```
126 | 
127 | ### Pushing image to ECR and using in K8s
128 | 
129 | Assumptions:
130 | 
131 | - You have an aws account
132 | - You have aws command line set up and ready to go
133 | 
134 | 1. Choose an ECR repository you'd like to push to (or create a new one)
135 | https://us-west-2.console.aws.amazon.com/ecr/repositories
136 | 2. Click onto repository name and open "View push commands" cheat sheet
137 | 2b. Login
138 |     
139 |     it would look something like this:
140 |    
141 |     `aws ecr get-login --no-include-email --region us-west-2`
142 |     Then execute what is returned by above
143 |     
144 | 3. Follow the instructions (you may need to install first AWS CLI, aws-okta and configure your AWS credentials if you haven't done it before)
145 | Given image name is amundsen-frontend, build, tag and push commands will be the following:
146 | Here you can see the tag is YYYY-MM-dd but you should choose whatever you like. 
147 |     ```
148 |     docker build -t amundsen-frontend:{YYYY-MM-dd} .
149 |     docker tag amundsen-frontend:{YYYY-MM-dd} <?>.dkr.ecr.<?>.amazonaws.com/amundsen-frontend:{YYYY-MM-dd}
150 |     docker push <?>.dkr.ecr.<?>.amazonaws.com/amundsen-frontend:{YYYY-MM-dd}
151 |     ```
152 | 
153 | 4. Go to the `helm/{env}/amundsen/values.yaml` and modify to the image tag that you want to use.
154 | 
155 | 5. When updating amundsen-frontend, make sure to do a hard refresh of amundsen with emptying the cache,
156 | otherwise you will see stale version of webpage.
157 | 
158 | ### Test search service in local using staging or production data
159 | 
160 | To test in local, we need to stand up Elasticsearch, publish index data, and stand up Elastic search
161 | 
162 | #### Standup Elasticsearch
163 | 
164 | Running Elasticsearch via Docker. To install Docker, go [here](https://hub.docker.com/editions/community/docker-ce-desktop-mac)
165 | Example:
166 | 
167 |     docker run -p 9200:9200  -p 9300:9300  -e "discovery.type=single-node" docker.elastic.co/elasticsearch/elasticsearch:6.2.4
168 | 
169 | ##### (Optional) Standup Kibana
170 | 
171 |     docker run --link ecstatic_edison:elasticsearch -p 5601:5601 docker.elastic.co/kibana/kibana:6.2.4
172 | 
173 | *Note that `ecstatic_edison` is container_id of Elasticsearch container. Update it if it's different by looking at `docker ps`
174 | 
175 | #### Publish Table index through Databuilder
176 | 
177 | ##### Install Databuilder
178 | 
179 |     cd ~/src/
180 |     git clone git@github.com:amundsen-io/amundsendatabuilder.git
181 |     cd ~/src/amundsendatabuilder
182 |     virtualenv venv
183 |     source venv/bin/activate
184 |     python setup.py install
185 |     pip install -r requirements.txt
186 | 
187 | ##### Publish Table index
188 | 
189 | First fill this two environment variables: `NEO4J_ENDPOINT` , `CREDENTIALS_NEO4J_PASSWORD`
190 | 
191 | 	$ python
192 | 	
193 |     import logging  
194 |     import os  
195 |     import uuid  
196 |       
197 |     from elasticsearch import Elasticsearch  
198 |     from pyhocon import ConfigFactory  
199 |       
200 |     from databuilder.extractor.neo4j_extractor import Neo4jExtractor  
201 |     from databuilder.extractor.neo4j_search_data_extractor import Neo4jSearchDataExtractor  
202 |     from databuilder.job.job import DefaultJob  
203 |     from databuilder.loader.file_system_elasticsearch_json_loader import FSElasticsearchJSONLoader  
204 |     from databuilder.publisher.elasticsearch_publisher import ElasticsearchPublisher  
205 |     from databuilder.task.task import DefaultTask  
206 |       
207 |     logging.basicConfig(level=logging.INFO)  
208 |       
209 |     neo4j_user = 'neo4j'  
210 |     neo4j_password = os.getenv('CREDENTIALS_NEO4J_PASSWORD')  
211 |     neo4j_endpoint = os.getenv('NEO4J_ENDPOINT')   
212 |       
213 |     elasticsearch_client = Elasticsearch([  
214 |         {'host': 'localhost'},  
215 |     ])  
216 |       
217 |     data_file_path = '/var/tmp/amundsen/elasticsearch_upload/es_data.json'  
218 |       
219 |     elasticsearch_new_index = 'table_search_index_{hex_str}'.format(hex_str=uuid.uuid4().hex)
220 |     logging.info("Elasticsearch new index: " + elasticsearch_new_index)  
221 |       
222 |     elasticsearch_doc_type = 'table'  
223 |     elasticsearch_index_alias = 'table_search_index'  
224 |       
225 |     job_config = ConfigFactory.from_dict({  
226 |         'extractor.search_data.extractor.neo4j.{}'.format(Neo4jExtractor.GRAPH_URL_CONFIG_KEY):  
227 |             neo4j_endpoint,  
228 |       'extractor.search_data.extractor.neo4j.{}'.format(Neo4jExtractor.MODEL_CLASS_CONFIG_KEY):  
229 |             'databuilder.models.table_elasticsearch_document.TableESDocument',  
230 |       'extractor.search_data.extractor.neo4j.{}'.format(Neo4jExtractor.NEO4J_AUTH_USER):  
231 |             neo4j_user,  
232 |       'extractor.search_data.extractor.neo4j.{}'.format(Neo4jExtractor.NEO4J_AUTH_PW):  
233 |             neo4j_password,  
234 |       'loader.filesystem.elasticsearch.{}'.format(FSElasticsearchJSONLoader.FILE_PATH_CONFIG_KEY):  
235 |             data_file_path,  
236 |       'loader.filesystem.elasticsearch.{}'.format(FSElasticsearchJSONLoader.FILE_MODE_CONFIG_KEY):  
237 |             'w',  
238 |       'publisher.elasticsearch.{}'.format(ElasticsearchPublisher.FILE_PATH_CONFIG_KEY):  
239 |             data_file_path,  
240 |       'publisher.elasticsearch.{}'.format(ElasticsearchPublisher.FILE_MODE_CONFIG_KEY):  
241 |             'r',  
242 |       'publisher.elasticsearch.{}'.format(ElasticsearchPublisher.ELASTICSEARCH_CLIENT_CONFIG_KEY):  
243 |             elasticsearch_client,  
244 |       'publisher.elasticsearch.{}'.format(ElasticsearchPublisher.ELASTICSEARCH_NEW_INDEX_CONFIG_KEY):  
245 |             elasticsearch_new_index,  
246 |       'publisher.elasticsearch.{}'.format(ElasticsearchPublisher.ELASTICSEARCH_DOC_TYPE_CONFIG_KEY):  
247 |             elasticsearch_doc_type,  
248 |       'publisher.elasticsearch.{}'.format(ElasticsearchPublisher.ELASTICSEARCH_ALIAS_CONFIG_KEY):  
249 |             elasticsearch_index_alias,  
250 |     })  
251 |       
252 |     job = DefaultJob(conf=job_config,  
253 |       task=DefaultTask(extractor=Neo4jSearchDataExtractor(),  
254 |       loader=FSElasticsearchJSONLoader()),  
255 |       publisher=ElasticsearchPublisher())  
256 |     if neo4j_password:  
257 |         job.launch()  
258 |     else:  
259 |         raise ValueError('Add environment variable CREDENTIALS_NEO4J_PASSWORD')
260 | 
261 | #### Standup Search service
262 | 
263 | Follow this [instruction](https://github.com/amundsen-io/amundsensearchlibrary#instructions-to-start-the-search-service-from-source)
264 | 
265 | Test the search API with this command:
266 | 
267 |     curl -vv "http://localhost:5001/search?query_term=test&page_index=0"
268 | 


--------------------------------------------------------------------------------
/GOVERNANCE.md:
--------------------------------------------------------------------------------
  1 | # Governance
  2 | 
  3 | At Amundsen, we want to produce an environment of fairness that people can rely on. A formal governance structure helps us resolve debates, invite in (or out) new developers and plan new features.
  4 | 
  5 | With the following governance system, we want to facilitate project permanence, supporting it with healthy habits and processes that are well understood by everyone.
  6 | 
  7 | ## Amundsen Governance Model
  8 | 
  9 | Amundsen is a meritocratic, consensus-based community project. Anyone interested in the project can join the community, contribute to the project design, and participate in the decision-making process. This document describes how that participation occurs and how to set about earning merit within the project community.
 10 | 
 11 | ## Roles And Responsibilities
 12 | 
 13 | ### Users
 14 | 
 15 | Users are community members who need the data discovery features of Amundsen. They are the most important community members, and without them, the project would have no purpose. Anyone can be a user; there are no special requirements.
 16 | 
 17 | Amundsen asks its users to participate in the project and community as much as possible. User contributions enable the project team to ensure that they satisfy the needs of those users. Frequent user contributions include (but are not limited to):
 18 | 
 19 | - Evangelizing about the project (e.g., a link on a website and word-of-mouth awareness raising)
 20 | - Informing developers of strengths and weaknesses from a new user perspective
 21 | - Providing moral support (a ‘thank you’ goes a long way)
 22 | - Providing financial support (the software is open source, but its developers need to eat)
 23 | 
 24 | Users who continue to engage with the project and its community will often become more and more involved. Such users may find themselves becoming contributors, as described in the next section.
 25 | 
 26 | ### Contributors
 27 | 
 28 | Contributors are community members who contribute in concrete ways to the project. Anyone can become a contributor, and contributions can take many forms, as detailed in the [all-contributors project](https://allcontributors.org/docs/en/emoji-key#table). There is no expectation of commitment to the project, no specific skill requirements, and no selection process.
 29 | 
 30 | In addition to their actions as users, contributors may also find themselves doing one or more of the following:
 31 | 
 32 | - Supporting new users (existing users are often the best people to help new users)
 33 | - Creating, triaging or commenting on Issues
 34 | - Doing code reviews or commenting on technical documents
 35 | - Writing, editing, translating or reviewing the documentation
 36 | - Organizing events or evangelizing the project
 37 | 
 38 | Contributors engage with the project through the issue tracker and slack community, or by writing or editing documentation. They submit changes to the project itself via Pull Requests (PRs), which will be considered for inclusion in the project by existing maintainers (see next section). Contributors follow the [Contributing guide](https://www.amundsen.io/amundsen/CONTRIBUTING/) when creating PRs.
 39 | 
 40 | As contributors gain experience and familiarity with the project, their profile and commitment within the community will increase. At some stage, they may find themselves being nominated for being a maintainer.
 41 | 
 42 | ### Maintainers
 43 | 
 44 | Maintainers are community members who have shown that they are committed to Amundsen’s continued development through ongoing engagement with the community. Because of this, maintainers have the right to merge PRs and triage issues.
 45 | 
 46 | Note that any change to resources in Amundsen must be through pull requests. This applies to all changes to documentation, code, configuration, etc. Even maintainers must use pull requests, as they are key to provide transparency and attract new contributors to the project. Additionally, no pull request can be merged without being reviewed.
 47 | 
 48 | Anyone can become a maintainer. Typically, a potential maintainer will need to show that they understand the project, its objectives, and its strategy. They will also have provided valuable contributions to the project over a period of time. Read the sections below to know how to become an Amundsen maintainer.
 49 | 
 50 | ## Becoming a Maintainer
 51 | 
 52 | Any existing maintainer can nominate new maintainers. Once they have been nominated, there will be a vote by the rest of the maintainers. Maintainer voting is one of the few activities that take place on a private channel. This is to allow maintainers to express their opinions about a nominee without causing embarrassment freely. The approval requires **three maintainers +1 vote** and **no -1 vote**.
 53 | 
 54 | Once the vote has been held, the aggregated voting results are published on the #amundsen channel. The nominee is entitled to request an explanation of any ‘no’ votes against them, regardless of the vote's outcome. This explanation will be provided by the maintainers and will be anonymous and constructive.
 55 | 
 56 | Nominees may decline their appointment as a maintainer. Becoming a maintainer means that they will be spending a substantial time working on Amundsen for the foreseeable future. It is essential to recognize that being a maintainer is a privilege, not a right. That privilege must be earned, and once earned, the rest of the maintainers can remove it in extreme circumstances.
 57 | 
 58 | ### Earning a Nomination
 59 | 
 60 | There is not a single path of earning a nomination for maintainer at Amundsen, however, we can give some guidance about some actions that would help:
 61 | 
 62 | - Start by expressing interest to the maintainers that you are interested in becoming a maintainer.
 63 | - You can start tackling issues labeled as [‘help wanted’](https://github.com/amundsen-io/amundsen/labels/help%20wanted), or if you are new to the project, some of the [‘good first issue’](https://github.com/amundsen-io/amundsen/labels/good%20first%20issue) tickets.
 64 | - As you gain experience with the codebase and our standards, we will ask you to do code reviews for incoming PRs (i.e., all maintainers are expected to shoulder a proportional share of community reviews).
 65 | - We will expect you to start contributing increasingly complicated PRs, under the guidance of the existing maintainers.
 66 | - After approximately 2-3 months of working together, an existing maintainer will be able to nominate you for maintainer status.
 67 | 
 68 | We make no guarantees on the length of time this will take, but 2-3 months is the approximate goal.
 69 | 
 70 | ### Maintainer Responsibilities
 71 | 
 72 | The project maintainers are those individuals identified as ‘project owners’ on the development site. Maintainers have many responsibilities, which ensure the smooth running of the project. Among them, we can name:
 73 | 
 74 | - Monitor email aliases and our Slack (delayed response is perfectly acceptable).
 75 | - Perform code reviews for other maintainers and the community. The areas of specialization listed in [OWNERS.md](OWNERS.md) can be used to help with routing an issue/question to the right person.
 76 | - Triage GitHub issues, applying [labels](https://github.com/amundsen-io/amundsen/labels) to each new item. Labels are extremely useful for future issue follow ups. Adding labels is somewhat subjective, so please use your best judgment. Read more about our labels on [this document](https://www.amundsen.io/amundsen/issue_labeling/).
 77 | - Triage build issues, filing issues for known flaky builds or bugs, fixing or finding someone to fix any master build breakages.
 78 | - Make sure that ongoing PRs are moving forward at the right pace or closing them.
 79 | - Continue to spend at least 25% of your time working on Amundsen (~1.25 business days per week).
 80 | - Participate in strategic planning, approve changes to the governance model, and manage the copyrights within the project outputs.
 81 | 
 82 | ## Losing Maintainer Status
 83 | 
 84 | If a maintainer is no longer interested and cannot perform the maintainer duties listed above, they could volunteer to be moved to emeritus status. The maintainer status is attributed for life otherwise. An emeritus maintainer may request reinstatement of commit access from the rest of maintainers. Such reinstatement is subject to lazy consensus approval of active maintainers.
 85 | 
 86 | In extreme cases, maintainers can lose their status by a vote of the maintainers per the voting process below.
 87 | 
 88 | ## Decision Making Process
 89 | 
 90 | Decisions about the future of Amundsen are made through discussion with all community members, from the newest user to the most experienced maintainer. All non-sensitive project management discussion takes place on the project issue tracker system. Occasionally, sensitive discussion occurs on a private channel of our Slack.
 91 | 
 92 | To ensure that the project is not bogged down by endless discussion and continual voting, the project operates a policy of lazy consensus. This allows the majority of decisions to be made without resorting to a formal vote.
 93 | 
 94 | ### Lazy consensus
 95 | 
 96 | Decision making typically involves the following steps:
 97 | 
 98 | - Proposal
 99 | - Discussion
100 | - Vote (if consensus is not reached through discussion)
101 | - Decision
102 | 
103 | Any community member can make a proposal for consideration by the community. To initiate a discussion about a new idea, they should create an issue or submit a PR implementing the idea to the issue tracker. This will prompt a review and, if necessary, a discussion of the idea. The goal of this review and discussion is to gain approval for the contribution. Since most people in the project community have a shared vision, there is often little discussion to reach consensus.
104 | 
105 | In general, as long as nobody explicitly opposes a proposal or PR, it is recognized as having the support of the community. This is called lazy consensus - that is, those who have not stated their opinion explicitly have implicitly agreed to the proposal's implementation.
106 | 
107 | Lazy consensus is a fundamental concept within the project. This process allows a large group of people to reach consensus efficiently as someone with no objections to a proposal need not spend time stating their position.
108 | 
109 | For lazy consensus to be effective, it is necessary to allow at least 48 hours before assuming that there are no objections to the proposal. This requirement ensures that everyone is given enough time to read, digest, and respond to the proposal. This time period is chosen to be as inclusive as possible of all participants, regardless of their location and time commitments.
110 | 
111 | ### Voting
112 | 
113 | Not all decisions can be made using lazy consensus. Issues such as those affecting the strategic direction or legal standing of the project must gain explicit approval in the form of a vote. Every member of the community is encouraged to express their opinions in all discussions and all votes. However, only project maintainers have binding votes for the purposes of decision making.
114 | 
115 | ## Roadmap Creation
116 | 
117 | Our [roadmap](https://www.amundsen.io/amundsen/roadmap/) gives an overview of what we are currently working on and what we want to tackle next. This helps potential contributors understand your project's current status and where it's going next, as well as giving a chance to be part of the planning.
118 | 
119 | In this section, we describe the process we follow to create it, using request for comments documents (RFCs).
120 | 
121 | ### RFCs Process
122 | 
123 | Most of the issues we see can be handled with regular GitHub issues. However, some changes are "substantial", and we ask that these go through a design process and produce a consensus among the Amundsen community.
124 | 
125 | The "RFC" (request for comments) process is intended to provide a consistent and controlled path for new features to enter the roadmap. The high-level process looks like this:
126 | 
127 | 1. Contributor creates an RFC draft in the repository
128 | 2. Users, Contributors, and Maintainers discuss and upvote the draft
129 | 3. If confident on its success, contributor completes the RFC with more in-detail technical specifications
130 | 4. Maintainers approve RFC when it is ready
131 | 5. Maintainers meet every quarter and choose three or five items based on popularity and alignment with project vision and goals
132 | 6. Those selected items become part of the Mid-term goals
133 | 
134 | ##### When to Use RFCs
135 | 
136 | What constitutes a "substantial" change is evolving based on the community, but may include the following:
137 | 
138 | - New features that require configuration options to activate/deactivate
139 | - Remove features
140 | - Architecture changes
141 | - Examples:
142 |   - Adding lineage features
143 |   - Dashboards integration
144 | 
145 | Some changes do not require a RFC:
146 | 
147 | - Reorganizing or refactoring code or documentation
148 | - Improvements that tackle objective quality criteria (speedup, better browser support)
149 | - Changes noticeable only by contributors or maintainers
150 | - Examples:
151 |   - Adding programmatic descriptions
152 |   - Adding support for tags at a column level
153 | 
154 | If you submit a pull request to implement a new feature without going through the RFC process, it may be closed with a polite request to submit an RFC first. That said, if most of the work is done, we'd accelerate the process.
155 | 
156 | We will keep our RFC documents in a separate repo on the Amundsen-io organization, where a detailed step by step process will be documented.
157 | 
158 | ## References
159 | 
160 | - [Envoy’s Governance Document](https://github.com/envoyproxy/envoy/blob/master/GOVERNANCE.md)
161 | - [OSS Watch, Meritocratic Governance](http://oss-watch.ac.uk/resources/meritocraticgovernancemodel)
162 | - [The Apache Software Foundation meritocratic model](http://www.apache.org/foundation/how-it-works.html#meritocracy)
163 | - [Ember RFCs](https://github.com/emberjs/rfcs)
164 | 


--------------------------------------------------------------------------------
/amundsen-kube-helm/templates/helm/values.yaml:
--------------------------------------------------------------------------------
  1 | # Duplicate this file and put your customization here
  2 | 
  3 | ##
  4 | ## common settings for all apps
  5 | ##
  6 | 
  7 | ## NOTE - README table was generated with https://github.com/norwoodj/helm-docs
  8 | 
  9 | ##
 10 | ## environment -- **DEPRECATED - its not standard to pre construct urls this way.** The environment the app is running in. Used to construct dns hostnames (on aws only) and ports.
 11 | ##
 12 | environment: "dev"
 13 | ##
 14 | ## DEPRECATED - its not standard to pre construct urls this way
 15 | ## provider -- The cloud provider the app is running in. Used to construct dns hostnames (on aws only).
 16 | ##
 17 | provider: aws
 18 | ##
 19 | ## dnsZone -- **DEPRECATED - its not standard to pre construct urls this way.** The dns zone (e.g. group-qa.myaccount.company.com) the app is running in. Used to construct dns hostnames (on aws only).
 20 | ##
 21 | dnsZone: teamname.company.com
 22 | ##
 23 | ## dockerhubImagePath -- **DEPRECATED - this is not useful, it would be better to just allow the whole image to be swapped instead.** The image path for dockerhub.
 24 | ##
 25 | dockerhubImagePath: amundsendev
 26 | ##
 27 | ## LONG_RANDOM_STRING -- A long random string. You should probably provide your own. This is needed for OIDC.
 28 | ##
 29 | LONG_RANDOM_STRING: 1234
 30 | 
 31 | ##
 32 | ## nodeSelector -- amundsen application wide configuration of nodeSelector. This applies to search, metadata, frontend and neo4j. Elasticsearch has it's own configuation properties for this. [ref](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#nodeselector)
 33 | ##
 34 | nodeSelector: {}
 35 | ##
 36 | ## affinity -- amundsen application wide configuration of affinity. This applies to search, metadata, frontend and neo4j. Elasticsearch has it's own configuation properties for this. [ref](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity)
 37 | ##
 38 | affinity: {}
 39 | ##
 40 | ## tolerations -- amundsen application wide configuration of tolerations. This applies to search, metadata, frontend and neo4j. Elasticsearch has it's own configuation properties for this. [ref](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#taints-and-tolerations-beta-feature)
 41 | ##
 42 | tolerations: []
 43 | ##
 44 | ## podAnnotations -- amundsen application wide configuration of podAnnotations. This applies to search, metadata, frontend and neo4j. Elasticsearch has it's own configuation properties for this. [ref](https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/)
 45 | ##
 46 | podAnnotations: {}
 47 | 
 48 | ##
 49 | ## Configuration related to the search service.
 50 | ##
 51 | search:
 52 |   ##
 53 |   ## search.serviceName -- The search service name.
 54 |   ##
 55 |   serviceName: search
 56 |   ##
 57 |   ## search.serviceType -- The search service type. See service types [ref](https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types)
 58 |   ##
 59 |   serviceType: ClusterIP
 60 |   ##
 61 |   ## search.elasticsearchEndpoint -- The name of the service hosting elasticsearch on your cluster, if you bring your own. You should only need to change this, if you don't use the version in this chart.
 62 |   ##
 63 |   elasticsearchEndpoint:
 64 |   ##
 65 |   ## search.image -- The image of the search container.
 66 |   ##
 67 |   image: amundsendev/amundsen-search
 68 |   ##
 69 |   ## search.imageTag -- The image tag of the search container.
 70 |   ##
 71 |   imageTag: 2.4.0
 72 |   ##
 73 |   ## search.replicas -- How many replicas of the search service to run.
 74 |   ##
 75 |   replicas: 1
 76 |   ##
 77 |   ## search.resources -- See pod resourcing [ref](https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/)
 78 |   ##
 79 |   resources: {}
 80 |   #  limits:
 81 |   #    cpu: 2
 82 |   #    memory: 2Gi
 83 |   #  requests:
 84 |   #    cpu: 1
 85 |   #    memory: 1Gi
 86 | 
 87 |   ##
 88 |   ## search.nodeSelector -- Search pod specific nodeSelector.
 89 |   ##
 90 |   nodeSelector: {}
 91 |   ##
 92 |   ## search.affinity -- Search pod specific affinity.
 93 |   ##
 94 |   affinity: {}
 95 |   ##
 96 |   ## search.tolerations -- Search pod specific tolerations.
 97 |   ##
 98 |   tolerations: []
 99 |   ##
100 |   ## search.annotations -- Search service specific tolerations.
101 |   ##
102 |   annotations: {}
103 |   ##
104 |   ## search.podAnnotations -- Search pod specific annotations.
105 |   ##
106 |   podAnnotations: {}
107 | 
108 | ##
109 | ## Configuration related to the metadata service.
110 | ##
111 | metadata:
112 |   ##
113 |   ## metadata.serviceName -- The metadata service name.
114 |   ##
115 |   serviceName: metadata
116 |   ##
117 |   ## metadata.serviceType -- The metadata service type. See service types [ref](https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types)
118 |   ##
119 |   serviceType: ClusterIP
120 |   ##
121 |   ## metadata.neo4jEndpoint -- The name of the service hosting neo4j on your cluster, if you bring your own. You should only need to change this, if you don't use the version in this chart.
122 |   ##
123 |   neo4jEndpoint:
124 |   ##
125 |   ## metadata.image -- The image of the metadata container.
126 |   ##
127 |   image: amundsendev/amundsen-metadata
128 |   ##
129 |   ## metadata.imageTag -- The image tag of the metadata container.
130 |   ##
131 |   imageTag: 2.5.5
132 |   ##
133 |   ## metadata.replicas -- How many replicas of the metadata service to run.
134 |   ##
135 |   replicas: 1
136 |   ##
137 |   ## metadata.resources -- See pod resourcing [ref](https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/)
138 |   ##
139 |   resources: {}
140 |   #  limits:
141 |   #    cpu: 2
142 |   #    memory: 2Gi
143 |   #  requests:
144 |   #    cpu: 1
145 |   #    memory: 1Gi
146 | 
147 |   ##
148 |   ## metadata.nodeSelector -- Metadata pod specific nodeSelector.
149 |   ##
150 |   nodeSelector: {}
151 |   ##
152 |   ## metadata.affinity -- Metadata pod specific affinity.
153 |   ##
154 |   affinity: {}
155 |   ##
156 |   ## metadata.tolerations -- Metadata pod specific tolerations.
157 |   ##
158 |   tolerations: []
159 |   ##
160 |   ## metadata.annotations -- Metadata service specific tolerations.
161 |   ##
162 |   annotations: {}
163 |   ##
164 |   ## metadata.podAnnotations -- Metadata pod specific annotations.
165 |   ##
166 |   podAnnotations: {}
167 | 
168 | ##
169 | ## Configuration related to the frontEnd service.
170 | ##
171 | frontEnd:
172 |   ##
173 |   ## frontEnd.serviceName -- The frontend service name.
174 |   ##
175 |   serviceName: frontend
176 |   ##
177 |   ## frontEnd.serviceType -- The frontend service type. See service types [ref](https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types)
178 |   ##
179 |   serviceType: ClusterIP
180 |   ##
181 |   ## frontEnd.image -- The image of the frontend container.
182 |   ##
183 |   image: amundsendev/amundsen-frontend
184 |   ##
185 |   ## frontEnd.imageTag -- The image tag of the frontend container.
186 |   ##
187 |   imageTag: 2.3.0
188 |   ##
189 |   ## frontEnd.servicePort -- The port the frontend service will be exposed on via the loadbalancer.
190 |   ##
191 |   servicePort: 80
192 |   ##
193 |   ## frontEnd.replicas -- How many replicas of the frontend service to run.
194 |   ##
195 |   replicas: 1
196 |   ##
197 |   ## frontEnd.baseUrl -- used by notifications util to provide links to amundsen pages in emails.
198 |   ##
199 |   baseUrl: http://localhost
200 |   ##
201 |   ## frontEnd.oidcEnabled -- To enable auth via OIDC, set this to true.
202 |   ##
203 |   oidcEnabled: false
204 |   ##
205 |   ## frontEnd.createOidcSecret -- OIDC needs some configuration. If you want the chart to make your secrets, set this to true and set the next four values. If you don't want to configure your secrets via helm, you can still use the amundsen-oidc-config.yaml as a template
206 |   ##
207 |   createOidcSecret: false
208 | 
209 |   ##
210 |   ## frontEnd.OIDC_CLIENT_ID -- The client id for OIDC.
211 |   ##
212 |   OIDC_CLIENT_ID:
213 |   ##
214 |   ## frontEnd.OIDC_CLIENT_SECRET -- The client secret for OIDC.
215 |   ##
216 |   OIDC_CLIENT_SECRET: ""
217 |   ##
218 |   ## frontEnd.OIDC_ORG_URL -- The organization URL for OIDC.
219 |   ##
220 |   OIDC_ORG_URL:
221 |   ##
222 |   ## frontEnd.OIDC_AUTH_SERVER_ID -- The authorization server id for OIDC.
223 |   ##
224 |   OIDC_AUTH_SERVER_ID:
225 |   ##
226 |   ## frontEnd.OVERWRITE_REDIRECT_URI -- The redirect uri for OIDC.
227 |   ##
228 |   OVERWRITE_REDIRECT_URI:
229 | 
230 |   ##
231 |   ## frontEnd.resources -- See pod resourcing [ref](https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/)
232 |   ##
233 |   resources: {}
234 |   #  limits:
235 |   #    cpu: 2
236 |   #    memory: 2Gi
237 |   #  requests:
238 |   #    cpu: 1
239 |   #    memory: 1Gi
240 | 
241 |   ##
242 |   ## frontEnd.nodeSelector -- Frontend pod specific nodeSelector.
243 |   ##
244 |   nodeSelector: {}
245 |   ##
246 |   ## frontEnd.affinity -- Frontend pod specific affinity.
247 |   ##
248 |   affinity: {}
249 |   ##
250 |   ## frontEnd.tolerations -- Frontend pod specific tolerations.
251 |   ##
252 |   tolerations: []
253 |   ##
254 |   ## frontEnd.annotations -- Frontend service specific tolerations.
255 |   ##
256 |   annotations: {}
257 |   ##
258 |   ## frontEnd.podAnnotations -- Frontend pod specific annotations.
259 |   ##
260 |   podAnnotations: {}
261 | 
262 | ##
263 | ## Configuration related to neo4j.
264 | ##
265 | neo4j:
266 |   ##
267 |   ## neo4j.enabled -- If neo4j is enabled as part of this chart, or not. Set this to false if you want to provide your own version.
268 |   ##
269 |   enabled: true
270 |   ##
271 |   ## neo4j.version -- The neo4j application version used by amundsen.
272 |   ##
273 |   version: 3.3.0
274 | 
275 |   ##
276 |   ## neo4j.resources -- See pod resourcing [ref](https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/)
277 |   ##
278 |   resources: {}
279 |   #resources:
280 |   #  limits:
281 |   #    cpu: 2
282 |   #    memory: 2Gi
283 |   # requests:
284 |   #    cpu: 1
285 |   #    memory: 1Gi
286 | 
287 |   ##
288 |   ## neo4j.config -- Neo4j application specific configuration. This type of configuration is why the charts/stable version is not used. See [ref](https://github.com/helm/charts/issues/21439)
289 |   ##
290 |   config:
291 |     ##
292 |     ## neo4j.config.dbms -- dbms config for neo4j
293 |     ##
294 |     dbms:
295 |       ## neo4j.config.dbms.heap_initial_size -- the initial java heap for neo4j
296 |       heap_initial_size: 1G
297 |       ## neo4j.config.dbms.heap_max_size -- the max java heap for neo4j
298 |       heap_max_size: 2G
299 |       ## neo4j.config.dbms.pagecache_size -- the page cache size for neo4j
300 |       pagecache_size: 2G
301 | 
302 |   ##
303 |   ## neo4j.persistence -- Neo4j persistence. Turn this on to keep your data between pod crashes, etc. This is also needed for backups.
304 |   ##
305 |   persistence: {}
306 |   #  storageClass: gp2
307 |   #  size: 10Gi
308 |   #  accessMode: ReadWriteMany
309 |   #  efs:
310 |   #    dns:
311 | 
312 |   ##
313 |   ## neo4j.backup -- If enabled is set to true, make sure and set the s3 path as well.
314 |   ##
315 |   backup:
316 |     # neo4j.backup.enabled - Whether to include the backup neo4j cron pod. If set to true, s3Path is required.
317 |     enabled: false
318 |     ##
319 |     ## neo4j.backup.s3Path -- The s3path to write to for backups.
320 |     ##
321 |     s3Path: "s3://dev/null"
322 |     ##
323 |     ## neo4j.backup.schedule -- The schedule to run backups on. Defaults to hourly.
324 |     ##
325 |     schedule: "0 * * * *"
326 |     podAnnotations: {}
327 | 
328 |   ##
329 |   ## neo4j.nodeSelector -- neo4j specific nodeSelector.
330 |   ##
331 |   nodeSelector: {}
332 |   ##
333 |   ## neo4j.affinity -- neo4j specific affinity.
334 |   ##
335 |   affinity: {}
336 |   ##
337 |   ## neo4j.tolerations -- neo4j specific tolerations.
338 |   ##
339 |   tolerations: []
340 |   ##
341 |   ## neo4j.annotations -- neo4j service specific tolerations.
342 |   ##
343 |   annotations: {}
344 |   ##
345 |   ## neo4j.podAnnotations -- neo4j pod specific annotations.
346 |   ##
347 |   podAnnotations: {}
348 | 
349 | ##
350 | ## Configuration related to elasticsearch.
351 | ##
352 | ## To add values to dependent charts, prefix the value with the chart name (e.g. elasticsearch)
353 | ## By default, the ES chart runs with 3,3,2 nodes for master, data, client. Amundsen likely does not need so much,
354 | ## so, this has been tuned down to 1,1,1.
355 | ##
356 | elasticsearch:
357 |   # elasticsearch.enabled -- set this to false, if you want to provide your own ES instance.
358 |   enabled: true
359 |   cluster:
360 |     env:
361 |       ## elasticsearch.cluster.env.MINIMUM_MASTER_NODES -- required to match master.replicas
362 |       MINIMUM_MASTER_NODES: 1
363 |       ## elasticsearch.cluster.env.EXPECTED_MASTER_NODES -- required to match master.replicas
364 |       EXPECTED_MASTER_NODES: 1
365 |       ## elasticsearch.cluster.env.RECOVER_AFTER_MASTER_NODES -- required to match master.replicas
366 |       RECOVER_AFTER_MASTER_NODES: 1
367 |   master:
368 |     ## elasticsearch.master.replicas -- only running amundsen on 1 master replica
369 |     replicas: 1
370 |   data:
371 |     ## elasticsearch.data.replicas -- only running amundsen on 1 data replica
372 |     replicas: 1
373 |   client:
374 |     ## elasticsearch.client.replicas -- only running amundsen on 1 client replica
375 |     replicas: 1
376 |   #  serviceType: LoadBalancer
377 |   #  serviceAnnotations:
378 |   #    external-dns.alpha.kubernetes.io/hostname: amundsen-elasticsearch.dev.teamname.company.com
379 |   #    service.beta.kubernetes.io/aws-load-balancer-internal: 0.0.0.0/0
380 |   #    service.beta.kubernetes.io/aws-load-balancer-type: nlb
381 |   #  nodeAffinity: high
382 |   #  resources:
383 |   #    limits:
384 |   #      cpu: 2
385 |   #      memory: 2Gi
386 | 


--------------------------------------------------------------------------------
/example/docker/neo4j/conf/neo4j.conf:
--------------------------------------------------------------------------------
  1 | #*****************************************************************
  2 | # Neo4j configuration
  3 | #
  4 | # For more details and a complete list of settings, please see
  5 | # https://neo4j.com/docs/operations-manual/current/reference/configuration-settings/
  6 | #*****************************************************************
  7 | 
  8 | # The name of the database to mount
  9 | dbms.active_database=amundsen.db
 10 | 
 11 | # Paths of directories in the installation.
 12 | dbms.directories.data=/neo4j/data
 13 | #dbms.directories.plugins=/var/lib/neo4j/plugins
 14 | #dbms.directories.certificates=/var/lib/neo4j/certificates
 15 | dbms.directories.logs=/var/log/neo4j
 16 | #dbms.directories.lib=/usr/share/neo4j/lib
 17 | #dbms.directories.run=/var/run/neo4j
 18 | 
 19 | # This setting constrains all `LOAD CSV` import files to be under the `import` directory. Remove or comment it out to
 20 | # allow files to be loaded from anywhere in the filesystem; this introduces possible security problems. See the
 21 | # `LOAD CSV` section of the manual for details.
 22 | dbms.directories.import=/var/lib/neo4j/import
 23 | 
 24 | # Whether requests to Neo4j are authenticated.
 25 | # To disable authentication, uncomment this line
 26 | dbms.security.auth_enabled=false
 27 | 
 28 | # Enable this to be able to upgrade a store from an older version.
 29 | #dbms.allow_upgrade=true
 30 | 
 31 | # Java Heap Size: by default the Java heap size is dynamically
 32 | # calculated based on available system resources.
 33 | # Uncomment these lines to set specific initial and maximum
 34 | # heap size.
 35 | #dbms.memory.heap.initial_size=512m
 36 | #dbms.memory.heap.max_size=512m
 37 | 
 38 | # The amount of memory to use for mapping the store files, in bytes (or
 39 | # kilobytes with the 'k' suffix, megabytes with 'm' and gigabytes with 'g').
 40 | # If Neo4j is running on a dedicated server, then it is generally recommended
 41 | # to leave about 2-4 gigabytes for the operating system, give the JVM enough
 42 | # heap to hold all your transaction state and query context, and then leave the
 43 | # rest for the page cache.
 44 | # The default page cache memory assumes the machine is dedicated to running
 45 | # Neo4j, and is heuristically set to 50% of RAM minus the max Java heap size.
 46 | #dbms.memory.pagecache.size=10g
 47 | 
 48 | #*****************************************************************
 49 | # Network connector configuration
 50 | #*****************************************************************
 51 | 
 52 | # With default configuration Neo4j only accepts local connections.
 53 | # To accept non-local connections, uncomment this line:
 54 | dbms.connectors.default_listen_address=0.0.0.0
 55 | # You can also choose a specific network interface, and configure a non-default
 56 | # port for each connector, by setting their individual listen_address.
 57 | 
 58 | # The address at which this server can be reached by its clients. This may be the server's IP address or DNS name, or
 59 | # it may be the address of a reverse proxy which sits in front of the server. This setting may be overridden for
 60 | # individual connectors below.
 61 | #dbms.connectors.default_advertised_address=localhost
 62 | 
 63 | # You can also choose a specific advertised hostname or IP address, and
 64 | # configure an advertised port for each connector, by setting their
 65 | # individual advertised_address.
 66 | 
 67 | # Bolt connector
 68 | dbms.connector.bolt.enabled=true
 69 | #dbms.connector.bolt.tls_level=OPTIONAL
 70 | #dbms.connector.bolt.listen_address=:7687
 71 | 
 72 | # HTTP Connector. There must be exactly one HTTP connector.
 73 | dbms.connector.http.enabled=true
 74 | #dbms.connector.http.listen_address=:7474
 75 | 
 76 | # HTTPS Connector. There can be zero or one HTTPS connectors.
 77 | dbms.connector.https.enabled=true
 78 | #dbms.connector.https.listen_address=:7473
 79 | 
 80 | # Number of Neo4j worker threads.
 81 | #dbms.threads.worker_count=
 82 | 
 83 | #*****************************************************************
 84 | # SSL system configuration
 85 | #*****************************************************************
 86 | 
 87 | # Names of the SSL policies to be used for the respective components.
 88 | 
 89 | # The legacy policy is a special policy which is not defined in
 90 | # the policy configuration section, but rather derives from
 91 | # dbms.directories.certificates and associated files
 92 | # (by default: neo4j.key and neo4j.cert). Its use will be deprecated.
 93 | 
 94 | # The policies to be used for connectors.
 95 | #
 96 | # N.B: Note that a connector must be configured to support/require
 97 | #      SSL/TLS for the policy to actually be utilized.
 98 | #
 99 | # see: dbms.connector.*.tls_level
100 | 
101 | #bolt.ssl_policy=legacy
102 | #https.ssl_policy=legacy
103 | 
104 | #*****************************************************************
105 | # SSL policy configuration
106 | #*****************************************************************
107 | 
108 | # Each policy is configured under a separate namespace, e.g.
109 | #    dbms.ssl.policy.<policyname>.*
110 | #
111 | # The example settings below are for a new policy named 'default'.
112 | 
113 | # The base directory for cryptographic objects. Each policy will by
114 | # default look for its associated objects (keys, certificates, ...)
115 | # under the base directory.
116 | #
117 | # Every such setting can be overriden using a full path to
118 | # the respective object, but every policy will by default look
119 | # for cryptographic objects in its base location.
120 | #
121 | # Mandatory setting
122 | 
123 | #dbms.ssl.policy.default.base_directory=certificates/default
124 | 
125 | # Allows the generation of a fresh private key and a self-signed
126 | # certificate if none are found in the expected locations. It is
127 | # recommended to turn this off again after keys have been generated.
128 | #
129 | # Keys should in general be generated and distributed offline
130 | # by a trusted certificate authority (CA) and not by utilizing
131 | # this mode.
132 | 
133 | #dbms.ssl.policy.default.allow_key_generation=false
134 | 
135 | # Enabling this makes it so that this policy ignores the contents
136 | # of the trusted_dir and simply resorts to trusting everything.
137 | #
138 | # Use of this mode is discouraged. It would offer encryption but no security.
139 | 
140 | #dbms.ssl.policy.default.trust_all=false
141 | 
142 | # The private key for the default SSL policy. By default a file
143 | # named private.key is expected under the base directory of the policy.
144 | # It is mandatory that a key can be found or generated.
145 | 
146 | #dbms.ssl.policy.default.private_key=
147 | 
148 | # The private key for the default SSL policy. By default a file
149 | # named public.crt is expected under the base directory of the policy.
150 | # It is mandatory that a certificate can be found or generated.
151 | 
152 | #dbms.ssl.policy.default.public_certificate=
153 | 
154 | # The certificates of trusted parties. By default a directory named
155 | # 'trusted' is expected under the base directory of the policy. It is
156 | # mandatory to create the directory so that it exists, because it cannot
157 | # be auto-created (for security purposes).
158 | #
159 | # To enforce client authentication client_auth must be set to 'require'!
160 | 
161 | #dbms.ssl.policy.default.trusted_dir=
162 | 
163 | # Client authentication setting. Values: none, optional, require
164 | # The default is to require client authentication.
165 | #
166 | # Servers are always authenticated unless explicitly overridden
167 | # using the trust_all setting. In a mutual authentication setup this
168 | # should be kept at the default of require and trusted certificates
169 | # must be installed in the trusted_dir.
170 | 
171 | #dbms.ssl.policy.default.client_auth=require
172 | 
173 | # A comma-separated list of allowed TLS versions.
174 | # By default TLSv1, TLSv1.1 and TLSv1.2 are allowed.
175 | 
176 | #dbms.ssl.policy.default.tls_versions=
177 | 
178 | # A comma-separated list of allowed ciphers.
179 | # The default ciphers are the defaults of the JVM platform.
180 | 
181 | #dbms.ssl.policy.default.ciphers=
182 | 
183 | #*****************************************************************
184 | # Logging configuration
185 | #*****************************************************************
186 | 
187 | # To enable HTTP logging, uncomment this line
188 | #dbms.logs.http.enabled=true
189 | 
190 | # Number of HTTP logs to keep.
191 | #dbms.logs.http.rotation.keep_number=5
192 | 
193 | # Size of each HTTP log that is kept.
194 | #dbms.logs.http.rotation.size=20m
195 | 
196 | # To enable GC Logging, uncomment this line
197 | #dbms.logs.gc.enabled=true
198 | 
199 | # GC Logging Options
200 | # see http://docs.oracle.com/cd/E19957-01/819-0084-10/pt_tuningjava.html#wp57013 for more information.
201 | #dbms.logs.gc.options=-XX:+PrintGCDetails -XX:+PrintGCDateStamps -XX:+PrintGCApplicationStoppedTime -XX:+PrintPromotionFailure -XX:+PrintTenuringDistribution
202 | 
203 | # Number of GC logs to keep.
204 | #dbms.logs.gc.rotation.keep_number=5
205 | 
206 | # Size of each GC log that is kept.
207 | #dbms.logs.gc.rotation.size=20m
208 | 
209 | # Size threshold for rotation of the debug log. If set to zero then no rotation will occur. Accepts a binary suffix "k",
210 | # "m" or "g".
211 | #dbms.logs.debug.rotation.size=20m
212 | 
213 | # Maximum number of history files for the internal log.
214 | #dbms.logs.debug.rotation.keep_number=7
215 | 
216 | #*****************************************************************
217 | # Miscellaneous configuration
218 | #*****************************************************************
219 | 
220 | # Enable this to specify a parser other than the default one.
221 | #cypher.default_language_version=3.0
222 | 
223 | # Determines if Cypher will allow using file URLs when loading data using
224 | # `LOAD CSV`. Setting this value to `false` will cause Neo4j to fail `LOAD CSV`
225 | # clauses that load data from the file system.
226 | #dbms.security.allow_csv_import_from_file_urls=true
227 | 
228 | # Retention policy for transaction logs needed to perform recovery and backups.
229 | dbms.tx_log.rotation.retention_policy=1 days
230 | 
231 | # Enable a remote shell server which Neo4j Shell clients can log in to.
232 | dbms.shell.enabled=true
233 | # The network interface IP the shell will listen on (use 0.0.0.0 for all interfaces).
234 | #dbms.shell.host=127.0.0.1
235 | # The port the shell will listen on, default is 1337.
236 | #dbms.shell.port=1337
237 | 
238 | # Only allow read operations from this Neo4j instance. This mode still requires
239 | # write access to the directory for lock purposes.
240 | #dbms.read_only=false
241 | 
242 | # Comma separated list of JAX-RS packages containing JAX-RS resources, one
243 | # package name for each mountpoint. The listed package names will be loaded
244 | # under the mountpoints specified. Uncomment this line to mount the
245 | # org.neo4j.examples.server.unmanaged.HelloWorldResource.java from
246 | # neo4j-server-examples under /examples/unmanaged, resulting in a final URL of
247 | # http://localhost:7474/examples/unmanaged/helloworld/{nodeId}
248 | #dbms.unmanaged_extension_classes=org.neo4j.examples.server.unmanaged=/examples/unmanaged
249 | 
250 | #********************************************************************
251 | # JVM Parameters
252 | #********************************************************************
253 | 
254 | # G1GC generally strikes a good balance between throughput and tail
255 | # latency, without too much tuning.
256 | dbms.jvm.additional=-XX:+UseG1GC
257 | 
258 | # Have common exceptions keep producing stack traces, so they can be
259 | # debugged regardless of how often logs are rotated.
260 | dbms.jvm.additional=-XX:-OmitStackTraceInFastThrow
261 | 
262 | # Make sure that `initmemory` is not only allocated, but committed to
263 | # the process, before starting the database. This reduces memory
264 | # fragmentation, increasing the effectiveness of transparent huge
265 | # pages. It also reduces the possibility of seeing performance drop
266 | # due to heap-growing GC events, where a decrease in available page
267 | # cache leads to an increase in mean IO response time.
268 | # Try reducing the heap memory, if this flag degrades performance.
269 | dbms.jvm.additional=-XX:+AlwaysPreTouch
270 | 
271 | # Trust that non-static final fields are really final.
272 | # This allows more optimizations and improves overall performance.
273 | # NOTE: Disable this if you use embedded mode, or have extensions or dependencies that may use reflection or
274 | # serialization to change the value of final fields!
275 | dbms.jvm.additional=-XX:+UnlockExperimentalVMOptions
276 | dbms.jvm.additional=-XX:+TrustFinalNonStaticFields
277 | 
278 | # Disable explicit garbage collection, which is occasionally invoked by the JDK itself.
279 | dbms.jvm.additional=-XX:+DisableExplicitGC
280 | 
281 | # Remote JMX monitoring, uncomment and adjust the following lines as needed. Absolute paths to jmx.access and
282 | # jmx.password files are required.
283 | # Also make sure to update the jmx.access and jmx.password files with appropriate permission roles and passwords,
284 | # the shipped configuration contains only a read only role called 'monitor' with password 'Neo4j'.
285 | # For more details, see: http://download.oracle.com/javase/8/docs/technotes/guides/management/agent.html
286 | # On Unix based systems the jmx.password file needs to be owned by the user that will run the server,
287 | # and have permissions set to 0600.
288 | # For details on setting these file permissions on Windows see:
289 | #     http://docs.oracle.com/javase/8/docs/technotes/guides/management/security-windows.html
290 | #dbms.jvm.additional=-Dcom.sun.management.jmxremote.port=3637
291 | #dbms.jvm.additional=-Dcom.sun.management.jmxremote.authenticate=true
292 | #dbms.jvm.additional=-Dcom.sun.management.jmxremote.ssl=false
293 | #dbms.jvm.additional=-Dcom.sun.management.jmxremote.password.file=/absolute/path/to/conf/jmx.password
294 | #dbms.jvm.additional=-Dcom.sun.management.jmxremote.access.file=/absolute/path/to/conf/jmx.access
295 | 
296 | # Some systems cannot discover host name automatically, and need this line configured:
297 | #dbms.jvm.additional=-Djava.rmi.server.hostname=$THE_NEO4J_SERVER_HOSTNAME
298 | 
299 | # Expand Diffie Hellman (DH) key size from default 1024 to 2048 for DH-RSA cipher suites used in server TLS handshakes.
300 | # This is to protect the server from any potential passive eavesdropping.
301 | dbms.jvm.additional=-Djdk.tls.ephemeralDHKeySize=2048
302 | 
303 | # This mitigates a DDoS vector.
304 | dbms.jvm.additional=-Djdk.tls.rejectClientInitiatedRenegotiation=true
305 | 
306 | #********************************************************************
307 | # Wrapper Windows NT/2000/XP Service Properties
308 | #********************************************************************
309 | # WARNING - Do not modify any of these properties when an application
310 | #  using this configuration file has been installed as a service.
311 | #  Please uninstall the service before modifying this section.  The
312 | #  service can then be reinstalled.
313 | 
314 | # Name of the service
315 | dbms.windows_service_name=neo4j
316 | 
317 | #********************************************************************
318 | # Other Neo4j system properties
319 | #********************************************************************
320 | dbms.jvm.additional=-Dunsupported.dbms.udc.source=debian
321 | 
322 | 
323 | # Allow Neo4j APOC to import and export for backup
324 | dbms.security.procedures.unrestricted=apoc.export.*,apoc.import.*
325 | apoc.export.file.enabled=true
326 | apoc.import.file.enabled=true


--------------------------------------------------------------------------------