├── .flake8
├── .gitattributes
├── .github
    └── workflows
    │   ├── jobber.yaml
    │   ├── linter.yaml
    │   ├── minesweeper.yaml
    │   ├── nbserve.yaml
    │   ├── notification.yaml
    │   ├── paws-hub.yaml
    │   ├── renderer.yaml
    │   ├── singleuser.yaml
    │   ├── tofu-fmt.yaml
    │   └── update-container-tags.yaml
├── .gitignore
├── .gitmodules
├── .yamllint.conf
├── LICENSE
├── README.md
├── ansible
    ├── ansible.cfg
    ├── files
    │   ├── csi-secret-cinderplugin.yaml.codfw1dev.crypt
    │   ├── csi-secret-cinderplugin.yaml.eqiad1.crypt
    │   └── sc.yaml
    ├── paws.yaml
    ├── templates
    │   └── prometheus-ingress.yaml.j2
    └── vars
    │   ├── codfw1dev.yaml
    │   └── eqiad1.yaml
├── build.py
├── deploy.sh
├── images
    ├── jobber
    │   ├── Dockerfile
    │   └── README.md
    ├── minesweeper
    │   ├── Dockerfile
    │   ├── LICENSE
    │   ├── README.md
    │   ├── requirements.in
    │   └── requirements.txt
    ├── nbserve
    │   ├── Dockerfile
    │   ├── README.md
    │   └── robots.txt
    ├── paws-hub
    │   ├── Dockerfile
    │   ├── PAWS.svg
    │   ├── README.md
    │   └── paws-favicon.ico
    ├── renderer
    │   ├── Dockerfile
    │   ├── README.md
    │   ├── basic.tpl
    │   ├── full.tpl
    │   ├── renderer.py
    │   └── requirements.txt
    └── singleuser
    │   ├── Dockerfile
    │   ├── banner
    │   ├── hide_clusters_tab.css
    │   ├── install-extensions
    │   ├── install-julia
    │   ├── install-sql-tool
    │   ├── r
    │       ├── Rprofile.site
    │       ├── cran.gpg
    │       ├── cran.list
    │       └── rsession.conf
    │   ├── requirements.txt
    │   ├── user-config.py
    │   └── user-fixes.py
├── paws
    ├── .helmignore
    ├── Chart.yaml
    ├── codfw-secrets.yaml
    ├── codfw.yaml
    ├── files
    │   └── minesweeper
    │   │   ├── minesweeper.py
    │   │   └── secrets
    │   │       ├── ban.py
    │   │       ├── herorat.py
    │   │       └── minesweeper.yaml
    ├── production.yaml
    ├── secrets.yaml
    ├── templates
    │   ├── NOTES.txt
    │   ├── frontpage.yaml
    │   ├── localdev.yaml
    │   ├── minesweeper
    │   │   ├── configmap.yaml
    │   │   ├── daemonset.yaml
    │   │   └── rbac.yaml
    │   ├── nbserve
    │   │   └── nginx.yaml
    │   └── public.yaml
    └── values.yaml
├── pyproject.toml
├── secrets-codfw1dev.sh
├── secrets-eqiad1.sh
├── tests
    ├── helm-lint
    │   └── Dockerfile
    └── tox
    │   └── Dockerfile
├── tofu
    ├── 127a.tf
    ├── codfw1dev-backend.conf
    ├── eqiad1-backend.conf
    ├── main.tf
    ├── secrets.tf
    └── vars.tf
├── toolinfo.json
├── tox.ini
└── tox_scripts
    └── yamllint.sh


/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | max-line-length = 80
3 | per-file-ignores =
4 |     images/nbserve/nginx.py: E501
5 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
 1 | # Directory entries are not enough to encrypt fines beneath it
 2 | # https://github.com/AGWA/git-crypt#gitattributes-file
 3 | paws/secrets.yaml filter=git-crypt diff=git-crypt
 4 | ansible/files/csi-secret-cinderplugin.yaml.eqiad1.crypt filter=git-crypt diff=git-crypt
 5 | ansible/files/csi-secret-cinderplugin.yaml.codfw1dev.crypt filter=git-crypt diff=git-crypt
 6 | paws/codfw-secrets.yaml filter=git-crypt diff=git-crypt
 7 | paws/files/minesweeper/secrets/** filter=git-crypt diff=git-crypt
 8 | tofu/secrets.tf filter=git-crypt diff=git-crypt
 9 | secrets-eqiad1.sh filter=git-crypt diff=git-crypt
10 | secrets-codfw1dev.sh filter=git-crypt diff=git-crypt
11 | 


--------------------------------------------------------------------------------
/.github/workflows/jobber.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | 
 3 | name: build and push jobber
 4 | 
 5 | 'on':
 6 |   pull_request_target:
 7 |     paths:
 8 |       - images/jobber/**
 9 | 
10 | jobs:
11 |   build-and-push:
12 |     name: build and push jobber
13 |     uses: toolforge/github-actions/.github/workflows/build-and-push.yaml@build-and-push-v4
14 |     with:
15 |       imagename: jobber
16 |     secrets:
17 |       quay_user: ${{ secrets.QUAY_USER }}
18 |       quay_password: ${{ secrets.QUAY_PASSWORD }}
19 | 


--------------------------------------------------------------------------------
/.github/workflows/linter.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | 
 3 | name: "linter"
 4 | 
 5 | 'on':
 6 |   pull_request:
 7 | 
 8 | jobs:
 9 |   tox:
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |       - uses: actions/checkout@v2
13 | 
14 |       - name: build tox container
15 |         run: |
16 |           docker build -f tests/tox/Dockerfile -t tox:01 .
17 |       - name: run tox container
18 |         run: |
19 |           docker run tox:01
20 | 
21 |   helm-lint:
22 |     runs-on: ubuntu-latest
23 |     steps:
24 |       - uses: actions/checkout@v2
25 | 
26 |       - name: build helm-lint container
27 |         run: |
28 |           docker build -f tests/helm-lint/Dockerfile -t helm-lint:01 .
29 |       - name: run helm-lint container
30 |         run: |
31 |           docker run helm-lint:01
32 | 


--------------------------------------------------------------------------------
/.github/workflows/minesweeper.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | 
 3 | name: build and push minesweeper
 4 | 
 5 | 'on':
 6 |   pull_request_target:
 7 |     paths:
 8 |       - images/minesweeper/**
 9 | 
10 | jobs:
11 |   build-and-push:
12 |     name: build and push minesweeper
13 |     uses: toolforge/github-actions/.github/workflows/build-and-push.yaml@build-and-push-v4
14 |     with:
15 |       imagename: minesweeper
16 |     secrets:
17 |       quay_user: ${{ secrets.QUAY_USER }}
18 |       quay_password: ${{ secrets.QUAY_PASSWORD }}
19 | 


--------------------------------------------------------------------------------
/.github/workflows/nbserve.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | 
 3 | name: build and push nbserve
 4 | 
 5 | 'on':
 6 |   pull_request_target:
 7 |     paths:
 8 |       - images/nbserve/**
 9 | 
10 | jobs:
11 |   build-and-push:
12 |     name: build and push nbserve
13 |     uses: toolforge/github-actions/.github/workflows/build-and-push.yaml@build-and-push-v4
14 |     with:
15 |       imagename: nbserve
16 |     secrets:
17 |       quay_user: ${{ secrets.QUAY_USER }}
18 |       quay_password: ${{ secrets.QUAY_PASSWORD }}
19 | 


--------------------------------------------------------------------------------
/.github/workflows/notification.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | 
 3 | name: "PR Notification"
 4 | 
 5 | 'on':
 6 |   pull_request:
 7 |     types: [opened, closed]
 8 | 
 9 | jobs:
10 |   irc-notification:
11 |     runs-on: ubuntu-latest
12 |     name: Notify on IRC
13 |     steps:
14 |       - name: irc notification
15 |         run: |
16 |           sudo apt-get update
17 |           sudo apt-get install netcat-traditional -y
18 |           message="${{ github.actor }} ${{ github.event.action }} ${{ github.event.pull_request._links.html.href }}"
19 |           echo -e "USER notefromgithub notefromgithub notefromgithub :notefromgithub\nNICK notefromgithub\nJOIN #wikimedia-cloud-feed\nPRIVMSG #wikimedia-cloud-feed :${message}\nQUIT\n" | nc irc.libera.chat 6667
20 |   phabricator-comment:
21 |     runs-on: ubuntu-latest
22 |     name: Comment on Phabricator
23 |     steps:
24 |       - uses: actions/checkout@v2
25 |       - name: post to phabricator
26 |         run: |
27 |           message="${{ github.actor }} ${{ github.event.action }} ${{ github.event.pull_request._links.html.href }}"
28 |           echo "${message}"
29 |           inital_commit=$(git log origin/main..origin/${{ github.head_ref }} --pretty=%H | tail -n1)
30 |           task=$(curl ${{ github.event.pull_request._links.commits.href }} | jq .[0].commit.message -r | grep "^Bug: T[0-9]*$" | head -1 | awk '{print $2}')
31 | 
32 |           if [ -n "${task}" ]; then
33 |             curl https://phabricator.wikimedia.org/api/maniphest.edit \
34 |                 -d api.token=${{ secrets.TOOLFORGE_PHAB_BOT_KEY }} \
35 |                 -d transactions[0][type]=comment \
36 |                 -d transactions[0][value]="${message}" \
37 |                 -d objectIdentifier=${task}
38 |           fi
39 | 


--------------------------------------------------------------------------------
/.github/workflows/paws-hub.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | 
 3 | name: build and push paws-hub
 4 | 
 5 | 'on':
 6 |   pull_request_target:
 7 |     paths:
 8 |       - images/paws-hub/**
 9 | 
10 | jobs:
11 |   build-and-push:
12 |     name: build and push paws-hub
13 |     uses: toolforge/github-actions/.github/workflows/build-and-push.yaml@build-and-push-v4
14 |     with:
15 |       imagename: paws-hub
16 |     secrets:
17 |       quay_user: ${{ secrets.QUAY_USER }}
18 |       quay_password: ${{ secrets.QUAY_PASSWORD }}
19 | 


--------------------------------------------------------------------------------
/.github/workflows/renderer.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | 
 3 | name: build and push renderer
 4 | 
 5 | 'on':
 6 |   pull_request_target:
 7 |     paths:
 8 |       - images/renderer/**
 9 | 
10 | jobs:
11 |   build-and-push:
12 |     name: build and push renderer
13 |     uses: toolforge/github-actions/.github/workflows/build-and-push.yaml@build-and-push-v4
14 |     with:
15 |       imagename: renderer
16 |     secrets:
17 |       quay_user: ${{ secrets.QUAY_USER }}
18 |       quay_password: ${{ secrets.QUAY_PASSWORD }}
19 | 


--------------------------------------------------------------------------------
/.github/workflows/singleuser.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | 
 3 | name: build and push singleuser
 4 | 
 5 | 'on':
 6 |   pull_request_target:
 7 |     paths:
 8 |       - images/singleuser/**
 9 | 
10 | 
11 | jobs:
12 |   build-and-push:
13 |     name: build and push singleuser
14 |     uses: toolforge/github-actions/.github/workflows/build-and-push.yaml@build-and-push-v4
15 |     with:
16 |       imagename: singleuser
17 |     secrets:
18 |       quay_user: ${{ secrets.QUAY_USER }}
19 |       quay_password: ${{ secrets.QUAY_PASSWORD }}
20 | 


--------------------------------------------------------------------------------
/.github/workflows/tofu-fmt.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | 
 3 | name: "tofu fmt"
 4 | 
 5 | 'on':
 6 |   pull_request:
 7 | 
 8 | jobs:
 9 |   tf-fmt:
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |       - uses: actions/checkout@v2
13 | 
14 |       - name: install tofu
15 |         run: |
16 |           sudo apt-get update && sudo apt-get install -y gnupg software-properties-common
17 |           curl -s https://packagecloud.io/install/repositories/opentofu/tofu/script.deb.sh?any=true -o /tmp/tofu-repository-setup.sh
18 |           sudo bash /tmp/tofu-repository-setup.sh
19 |           sudo apt install tofu
20 |       - name: tofu fmt
21 |         run: |
22 |           shopt -s extglob
23 |           cd tofu
24 |           if ! tofu fmt -check -diff !(secrets).tf ; then
25 |              echo "please update your tofu code to match the above.";
26 |              echo 'or run `tofu fmt` to have tofu reformat it.';
27 |              exit 1;
28 |           fi
29 | 


--------------------------------------------------------------------------------
/.github/workflows/update-container-tags.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | 
 3 | name: update container tags
 4 | 
 5 | 'on':
 6 |   pull_request_target:
 7 | 
 8 | jobs:
 9 |   update-container-tags:
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |       - uses: actions/checkout@v2
13 |         with:
14 |           token: ${{ secrets.PERSONAL_ACCESS_TOKEN }}
15 |           repository: ${{ github.event.pull_request.head.repo.full_name }}
16 | 
17 |       - name: git fetch
18 |         run: |
19 |           git fetch
20 |       - name: git checkout
21 |         run: |
22 |           git checkout ${{ github.head_ref }}
23 | 
24 |       # compare  to upstream main branch
25 |       - name: git add remote
26 |         run: |
27 |           git remote add toolforgepaws https://github.com/toolforge/paws.git
28 |       - name: git remote update
29 |         run: |
30 |           git remote update
31 | 
32 |       - name: update values.yaml
33 |         run: |
34 |           for i in renderer nbserve paws-hub jobber singleuser minesweeper ; do
35 |             if [[ $(git diff remotes/toolforgepaws/main -- images/${i}/) ]]; then
36 |             sed -i "s/tag: .* # ${i} tag managed by github actions$/tag: pr-${{ github.event.number }} # ${i} tag managed by github actions/" paws/values.yaml
37 |             fi
38 |           done
39 | 
40 |       - uses: EndBug/add-and-commit@v7
41 |         with:
42 |           add: 'paws/values.yaml'
43 |           author_name: Github Action
44 |           author_email: auto@github.com
45 |           branch: ${{ github.head_ref }}
46 |           message: 'auto update of ${{ inputs.imagename }} tag'
47 |           pull: --rebase --autostash
48 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | venv/
 2 | .vscode/
 3 | __pycache__/
 4 | *.pyc
 5 | .mypy_cache/
 6 | charts/
 7 | requirements.lock
 8 | Chart.lock
 9 | dev-values.yaml
10 | *.swp
11 | terraform.tfstate*
12 | .terraform*
13 | tofu/kube.config
14 | .venv*
15 | ansible/collections/*
16 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/toolforge/paws/9c42a38a368b3ad460fd122e60851c67335c3ed2/.gitmodules


--------------------------------------------------------------------------------
/.yamllint.conf:
--------------------------------------------------------------------------------
 1 | ---
 2 | 
 3 | yaml-files:
 4 |   - '*.yaml'
 5 |   - '*.yml'
 6 |   - '.yamllint'
 7 | 
 8 | rules:
 9 |   # with all the templates braces get weird
10 |   braces: disable
11 |   brackets: enable
12 |   colons: enable
13 |   commas: enable
14 |   comments:
15 |     level: warning
16 |     min-spaces-from-content: 1
17 |   comments-indentation:
18 |     level: warning
19 |   document-end: disable
20 |   document-start:
21 |     level: warning
22 |   empty-lines: enable
23 |   empty-values: disable
24 |   hyphens: enable
25 |   indentation: enable
26 |   key-duplicates: enable
27 |   key-ordering: disable
28 |   line-length: disable
29 |   new-line-at-end-of-file: enable
30 |   new-lines: enable
31 |   octal-values: disable
32 |   quoted-strings: disable
33 |   trailing-spaces: enable
34 |   truthy:
35 |     level: warning
36 | 
37 | # don't try to parse encrypted files. Gives error like:
38 | # UnicodeDecodeError: 'utf-8' codec can't decode byte 0x99 in position 10: invalid start byte
39 | ignore: |
40 |   paws/secrets.yaml
41 |   paws/codfw-secrets.yaml
42 |   paws/files/minesweeper/secrets/
43 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2015 Yuvi Panda
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # PAWS
  2 | 
  3 | PAWS: A Web Shell (PAWS) is a Jupyter notebooks deployment that has been customized to make interacting with Wikimedia wikis easier. It allows users to create and share documents that contain live code, visualizations such as graphs, rich text, etc. The user created notebooks are a powerful tool that enables data analysis and scientific research, and also transforms the way in which programmers write code - by enabling an exploratory environment with a quick feedback loop, and a low barrier for entry through it's easy to use graphical interface.
  4 | 
  5 | ## Contributing
  6 | 
  7 | Bugs, issues and feature requests are found on [Wikimedia Foundation's Phabricator](https://phabricator.wikimedia.org/).
  8 | There is a [workboard](https://phabricator.wikimedia.org/project/view/1648/) and a project tag of `#paws` to use for related work. You can reference code and commits from this repo at the Phabricator mirror of the code [here](https://phabricator.wikimedia.org/diffusion/PAWS/browse/main/), but please do not clone or try to use that mirror directly.
  9 | 
 10 | To contribute to this project's code, please fork the repo on [GitHub](https://github.com/toolforge/paws/) and submit a pull request.
 11 | 
 12 | If you have push access to the project, we ask that new changes be reviewed by one other
 13 | project member by using either a feature branch on the https://github.com/toolforge/paws repo
 14 | to trigger a pull request or using a fork to set up a pull request.
 15 | 
 16 | ### Pull Requests and CI
 17 | 
 18 | When a pull request is opened a few things are run automatically. Any container that was modified in /images will be built and pushed to quay.io. Your branch will be updated with an additional commit, updating the values.yaml file to point to the new image tags. And a linter will be run. These workflows, and their status, will be visible in the github pull request page. At this point you, or anyone else, will be able to pull down the branch in the PR and run it locally in minikube as described below.
 19 | 
 20 | If your PR originates from a fork, please be sure "Allow edits and access to secrets by maintainers" is enabled such that the CI can function. Alternatively please manually edit the values.yaml to match the PR number for any containers that your code updates.
 21 | 
 22 | ### Settings up a development environment
 23 | 
 24 | It is possible to run a fully-functioning PAWS system inside [minikube](https://minikube.sigs.k8s.io/docs/)! You don't need
 25 | access to the secrets.yaml file to do it either, since the defaults mostly support it.
 26 | 
 27 | You will need to install minikube (tested on minikube v1.33.1) and [helm](https://helm.sh) and kubectl on your system. When you are confident those are working, start minikube with:
 28 |  - `minikube start --kubernetes-version=v1.27.8`
 29 |  - `minikube addons enable ingress`
 30 | (from the top level of this repo):
 31 | install the dependencies for the PAWS dev environment with these steps:
 32 |  - `helm repo add jupyterhub https://jupyterhub.github.io/helm-chart/`
 33 |  - `helm dep up paws/`
 34 |  - `kubectl create namespace paws-dev`
 35 |  - `helm -n paws-dev upgrade --install dev paws/ --timeout=50m`
 36 | 
 37 | The rest of the setup instructions will display on screen as long as the install is successful.
 38 | Please refer to the helm documentation from there.
 39 | 
 40 | If you are experiencing issues with the installation, you can try changing the driver configuration in minikube: https://minikube.sigs.k8s.io/docs/drivers/
 41 | 
 42 | - First delete the current cluster:
 43 | 
 44 |     `minikube delete`
 45 | 
 46 | - Start a new cluster with the driver you want to use (e.g. docker, virtualbox, hyperkit, etc.):
 47 | 
 48 |     `minikube start --driver=docker --kubernetes-version=v1.20.11`
 49 | 
 50 | Another possible solution if minikube is acting weird might be to upgrade minikube, or even to
 51 | increase the default memory:
 52 | 
 53 | `minikube config set memory 4096`
 54 | 
 55 | #### Working with images
 56 | Keep in mind that opening a PR will, attempt to, build any image that has changed in the PR branch. This method is fine to build and test the resulting container. Though it is often easier to build, and rebuild, a container locally for testing. The following describes how to build and use a container locally.
 57 | 
 58 | There are 8 images that are part of PAWS, in particular in the images/ directory. If you start a dev environment, it will pull those images from quay.io by default, just like in Wikimedia Cloud Services. If you are making changes to the images and testing those locally, you'll need to build them and tag them for your local environment, possibly setting them in your local values file with the tags you set.
 59 | 
 60 | If you are using minikube, you need to make sure you are using minikube's docker, not your system's docker with `eval $(minikube docker-env)`. Now your docker commands will operate on the minikube environment.
 61 | 
 62 | For example, let's say you wanted to update the singleuser image (which is the actual notebook server image):
 63 | - `cd images/singleuser`
 64 | - `docker build -t tag-you-are-going-to-use:whatever .`
 65 | 
 66 | And then you should have the image with a tag of `tag-you-are-going-to-use:whatever` that you could edit into your values.yaml file for local helm work.
 67 | ## Useful libraries
 68 | ### Accessing Database Replicas With Pandas and Sqlalchemy
 69 | 
 70 | Pandas is a lovely high level library for in-memory data manipulations. In order to get the result of a SQL query as a pandas dataframe use:
 71 | ```
 72 | from sqlalchemy import create_engine
 73 | import sys, os
 74 | import pandas as pd
 75 | 
 76 | constr = 'mysql+pymysql://{user}:{pwd}@{host}'.format(user=os.environ['MYSQL_USERNAME'],
 77 |                                                       pwd=os.environ['MYSQL_PASSWORD'],
 78 |                                                       host=os.environ['MYSQL_HOST'])
 79 | con = create_engine(constr)
 80 | 
 81 | df = pd.read_sql('select * from plwiki_p.logging limit 10', con)
 82 | ```
 83 | 
 84 | ### Storage space
 85 | Publishing space
 86 | 
 87 | A notebook can be turned into a public notebook by publishing a link to it. This works as the notebook is made available in a read only mode. An example might be …revisions-sql.ipynb?kernel_name=python3. It could be wise to add the kernel name to the link, even if it isn't necessary in some cases.
 88 | 
 89 | If you want to run the copy yourself, or do interactive changes, you must download the notebook and reupload on your own account. Downloading the raw format of the previous example can be done by adding format=raw to the previous example …revisions-sql.ipynb?format=raw. This download-reupload-process is somewhat awkward.
 90 | 
 91 | Note that a notebook will always be published, as the link can be guessed, so don't add any private information.
 92 | 
 93 | ### Running tests locally
 94 | PAWS tests are managed through docker. You can find all of the tests in the tests directory. If you want to build and run them locally run:
 95 | `docker build -f tests/<testname>/Dockerfile -t test:01 . ; docker run test:01`
 96 | Where <testname> is any of the directories in tests. For example:
 97 | `docker build -f tests/tox/Dockerfile -t test:01 . ; docker run test:01`
 98 | Would run tox.
 99 | 
100 | ### To know more about PAWS have a look at:
101 | https://wikitech.wikimedia.org/wiki/PAWS
102 | 
103 | ### Comment to Phabricator
104 | To have a PR make comments to an associated phabricator ticket have the last line of the commit look like:
105 | 
106 | Bug: <ticket number>
107 | 
108 | For example:
109 | Bug: T318182
110 | 
111 | ### Deployment ###
112 | ```
113 | bash deploy.sh <eqiad1|codfw1dev>
114 | ```
115 | 
116 | update the web proxy in horizon to point to current cluster.
117 | 
118 | https://wikitech.wikimedia.org/wiki/PAWS/Admin#Deployment
119 | 
120 | #### Disaster Recovery
121 | If the entire project is removed two parts of paws are not managed by tofu/ansible.
122 | Object storage container: An object storage container named "tofu-state" will need to be generated in horizon. This is where the state file for tofu resides.
123 | NFS: The NFS server is not included. And a fresh NFS server will be needed for paws to operate.
124 | 
125 | # backup prometheus
126 | see ansible/files/prometheus-data.sh for example of backup/restore
127 | 


--------------------------------------------------------------------------------
/ansible/ansible.cfg:
--------------------------------------------------------------------------------
 1 | [defaults]
 2 | # Better error output
 3 | stdout_callback=debug
 4 | stderr_callback=debug
 5 | 
 6 | collections_path=./collections/ansible_collections
 7 | 
 8 | # we're only using localhost, no need for the warning.
 9 | localhost_warning=False
10 | 
11 | [inventory]
12 | # Only using localhost, so no inventory
13 | inventory_unparsed_warning=False
14 | 


--------------------------------------------------------------------------------
/ansible/files/csi-secret-cinderplugin.yaml.codfw1dev.crypt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/toolforge/paws/9c42a38a368b3ad460fd122e60851c67335c3ed2/ansible/files/csi-secret-cinderplugin.yaml.codfw1dev.crypt


--------------------------------------------------------------------------------
/ansible/files/csi-secret-cinderplugin.yaml.eqiad1.crypt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/toolforge/paws/9c42a38a368b3ad460fd122e60851c67335c3ed2/ansible/files/csi-secret-cinderplugin.yaml.eqiad1.crypt


--------------------------------------------------------------------------------
/ansible/files/sc.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | 
 3 | apiVersion: storage.k8s.io/v1
 4 | kind: StorageClass
 5 | metadata:
 6 |   name: standard
 7 |   annotations:
 8 |     storageclass.kubernetes.io/is-default-class: "true"
 9 | provisioner: cinder.csi.openstack.org
10 | parameters:
11 |   availability: nova
12 | 


--------------------------------------------------------------------------------
/ansible/paws.yaml:
--------------------------------------------------------------------------------
  1 | ---
  2 | 
  3 | - name: Deploy PAWS to k8s cluster
  4 |   gather_facts: false
  5 |   hosts: localhost
  6 |   tasks:
  7 |     - name: include env vars
  8 |       include_vars: vars/{{ datacenter }}.yaml
  9 | 
 10 |     - name: Temporary directory for cloud provider
 11 |       ansible.builtin.tempfile:
 12 |         state: directory
 13 |         suffix: paws
 14 |       register: paws_dir
 15 |       changed_when: False
 16 | 
 17 |     - name: Clone cloud-provider-openstack
 18 |       ansible.builtin.git:
 19 |         repo: https://github.com/kubernetes/cloud-provider-openstack.git
 20 |         dest: "{{ paws_dir.path }}"
 21 |         version: v1.26.3
 22 |       changed_when: False
 23 | 
 24 |     - name: Copy file with owner and permissions
 25 |       ansible.builtin.copy:
 26 |         src: files/csi-secret-cinderplugin.yaml.{{ datacenter }}.crypt
 27 |         dest: "{{ paws_dir.path }}/manifests/cinder-csi-plugin/csi-secret-cinderplugin.yaml"
 28 |       changed_when: False
 29 | 
 30 |     - name: install the things!
 31 |       kubernetes.core.k8s:
 32 |         state: present
 33 |         src: "{{ paws_dir.path }}/manifests/cinder-csi-plugin/{{ item }}"
 34 |       loop:
 35 |         - "csi-secret-cinderplugin.yaml"
 36 |         - "cinder-csi-controllerplugin-rbac.yaml"
 37 |         - "cinder-csi-controllerplugin.yaml"
 38 |         - "cinder-csi-nodeplugin-rbac.yaml"
 39 |         - "cinder-csi-nodeplugin.yaml"
 40 |         - "csi-cinder-driver.yaml"
 41 | 
 42 |     - name: Delete temporary directory
 43 |       ansible.builtin.file:
 44 |         state: absent
 45 |         path: "{{ paws_dir.path }}"
 46 |       changed_when: False
 47 | 
 48 |     - name: and sc.yaml
 49 |       kubernetes.core.k8s:
 50 |         state: present
 51 |         src: "files/sc.yaml"
 52 | 
 53 |     - name: Deploy ingress-nginx
 54 |       kubernetes.core.helm:
 55 |         name: ingress-nginx
 56 |         chart_ref: ingress-nginx
 57 |         chart_repo_url: https://kubernetes.github.io/ingress-nginx
 58 |         chart_version: 4.12.1
 59 |         release_namespace: ingress-nginx
 60 |         create_namespace: true
 61 |         set_values:
 62 |           - value: controller.service.type=NodePort
 63 |             value_type: string
 64 |           - value: controller.service.enableHttps=false
 65 |             value_type: string
 66 |           - value: controller.service.nodePorts.http=30001
 67 |             value_type: string
 68 |           - value: controller.config.proxy-body-size=4m
 69 |             value_type: string
 70 |           - value: controller.config.allow-snippet-annotations=true
 71 |             value_type: string
 72 | 
 73 |     - name: Add jupyterhub chart repo
 74 |       kubernetes.core.helm_repository:
 75 |         name: jupyterhub
 76 |         repo_url: "https://jupyterhub.github.io/helm-chart/"
 77 | 
 78 |     - name: Deploy paws
 79 |       kubernetes.core.helm:
 80 |         name: paws
 81 |         chart_ref: ../paws
 82 |         release_namespace: "{{ namespace }}"
 83 |         create_namespace: true
 84 |         values_files:
 85 |           - ../paws/{{ secret_file }}.yaml
 86 |           - ../paws/{{ env_file }}.yaml
 87 |         timeout: '50m'
 88 |         dependency_update: true
 89 | 
 90 |     - name: Create metrics namespace for prometheus
 91 |       kubernetes.core.k8s:
 92 |         name: metrics
 93 |         kind: Namespace
 94 |         state: present
 95 | 
 96 |     - name: Prometheus
 97 |       kubernetes.core.helm:
 98 |         name: prometheus
 99 |         chart_ref: prometheus
100 |         chart_repo_url: https://prometheus-community.github.io/helm-charts
101 |         chart_version: 25.26.0
102 |         release_namespace: metrics
103 |         create_namespace: true
104 |         set_values:
105 |           - value: prometheus.retention=30d
106 |             value_type: string
107 | 
108 |     - name: Ingress for prometheus
109 |       kubernetes.core.k8s:
110 |         state: present
111 |         template: "templates/prometheus-ingress.yaml.j2"
112 |         namespace: metrics
113 | 


--------------------------------------------------------------------------------
/ansible/templates/prometheus-ingress.yaml.j2:
--------------------------------------------------------------------------------
 1 | apiVersion: networking.k8s.io/v1
 2 | kind: Ingress
 3 | metadata:
 4 |   name: prometheus
 5 | spec:
 6 |   ingressClassName: nginx
 7 |   rules:
 8 | {% if datacenter == 'codfw1dev' %}
 9 |   - host: prometheus-paws.codfw1dev.wmcloud.org
10 | {% else %}
11 |   - host: prometheus-paws.wmcloud.org
12 | {% endif %}
13 |     http:
14 |       paths:
15 |       - backend:
16 |           service:
17 |             name: prometheus-server
18 |             port:
19 |               number: 80
20 |         path: /
21 |         pathType: Prefix
22 | 


--------------------------------------------------------------------------------
/ansible/vars/codfw1dev.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | 
3 | secret_file: 'codfw-secrets'
4 | namespace: 'codfw1dev'
5 | env_file: 'codfw'
6 | 


--------------------------------------------------------------------------------
/ansible/vars/eqiad1.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | 
3 | secret_file: 'secrets'
4 | namespace: 'prod'
5 | env_file: 'production'
6 | 


--------------------------------------------------------------------------------
/build.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python3
  2 | import os
  3 | import argparse
  4 | import subprocess
  5 | 
  6 | 
  7 | def last_git_modified(path, n=1):
  8 |     return (
  9 |         subprocess.check_output(
 10 |             ["git", "log", "-n", str(n), "--pretty=format:%h", path]
 11 |         )
 12 |         .decode("utf-8")
 13 |         .split("\n")[-1]
 14 |     )
 15 | 
 16 | 
 17 | def image_touched(image, commit_range):
 18 |     return (
 19 |         subprocess.check_output(
 20 |             [
 21 |                 "git",
 22 |                 "diff",
 23 |                 "--name-only",
 24 |                 commit_range,
 25 |                 os.path.join("images", image),
 26 |             ]
 27 |         )
 28 |         .decode("utf-8")
 29 |         .strip()
 30 |         != ""
 31 |     )
 32 | 
 33 | 
 34 | def build_images(prefix, images, commit_range=None, push=False):
 35 |     for image in images:
 36 |         if commit_range:
 37 |             if not image_touched(image, commit_range):
 38 |                 print(
 39 |                     "Skipping {}, not touched in {}".format(image, commit_range)
 40 |                 )
 41 |                 continue
 42 | 
 43 |         # Pull last available version of image to maximize cache use
 44 |         try_count = 0
 45 |         while try_count < 50:
 46 |             last_image_tag = last_git_modified(
 47 |                 os.path.join("images", image), try_count + 2
 48 |             )
 49 |             last_image_spec = "{}{}:{}".format(prefix, image, last_image_tag)
 50 |             try:
 51 |                 subprocess.check_call(["docker", "pull", last_image_spec])
 52 |                 break
 53 |             except subprocess.CalledProcessError:
 54 |                 try_count += 1
 55 |                 pass
 56 |         image_path = os.path.join("images", image)
 57 |         tag = last_git_modified(image_path)
 58 |         image_spec = "{}{}:{}".format(prefix, image, tag)
 59 | 
 60 |         subprocess.check_call(
 61 |             [
 62 |                 "docker",
 63 |                 "build",
 64 |                 "-t",
 65 |                 image_spec,
 66 |                 "--cache-from",
 67 |                 last_image_spec,
 68 |                 image_path,
 69 |             ]
 70 |         )
 71 |         if push:
 72 |             subprocess.check_call(["docker", "push", image_spec])
 73 | 
 74 | 
 75 | def deploy(prefix, images, release, install):
 76 |     image_map = {
 77 |         "paws-hub": "jupyterhub.hub.image",
 78 |         "singleuser": "jupyterhub.singleuser.image",
 79 |     }
 80 | 
 81 |     args = []
 82 | 
 83 |     # Set up helm!
 84 |     subprocess.check_call(["helm", "init", "--client-only"])
 85 |     subprocess.check_call(
 86 |         [
 87 |             "helm",
 88 |             "repo",
 89 |             "add",
 90 |             "jupyterhub",
 91 |             "https://jupyterhub.github.io/helm-chart",
 92 |         ]
 93 |     )
 94 |     subprocess.check_call(["helm", "dep", "up"], cwd="paws")
 95 | 
 96 |     for image in images:
 97 |         image_path = os.path.join("images", image)
 98 |         image_name = prefix + image
 99 |         tag = last_git_modified(image_path)
100 |         args.append("--set={}.name={}".format(image_map[image], image_name))
101 |         args.append("--set={}.tag={}".format(image_map[image], tag))
102 | 
103 |     if install:
104 |         helm = [
105 |             "helm",
106 |             "install",
107 |             "--name",
108 |             release,
109 |             "--namespace",
110 |             release,
111 |             "paws/",
112 |             "-f",
113 |             "paws/secrets.yaml",
114 |         ]
115 |     else:
116 |         helm = ["helm", "upgrade", release, "paws/", "-f", "paws/secrets.yaml"]
117 | 
118 |     subprocess.check_call(helm + args)
119 | 
120 | 
121 | def main():
122 |     argparser = argparse.ArgumentParser()
123 |     argparser.add_argument(
124 |         "--image-prefix", default="quay.io/wikimedia-paws-prod/"
125 |     )
126 |     subparsers = argparser.add_subparsers(dest="action")
127 | 
128 |     build_parser = subparsers.add_parser(
129 |         "build", description="Build & Push images"
130 |     )
131 |     build_parser.add_argument(
132 |         "--commit-range",
133 |         help="Range of commits to consider when building images",
134 |     )
135 |     build_parser.add_argument("--push", action="store_true")
136 | 
137 |     deploy_parser = subparsers.add_parser(
138 |         "deploy", description="Deploy with helm"
139 |     )
140 |     deploy_parser.add_argument("release", default="prod")
141 |     deploy_parser.add_argument("--install", action="store_true")
142 | 
143 |     args = argparser.parse_args()
144 | 
145 |     images = ["paws-hub", "singleuser"]
146 |     if args.action == "build":
147 |         build_images(args.image_prefix, images, args.commit_range, args.push)
148 |     else:
149 |         deploy(args.image_prefix, images, args.release, args.install)
150 | 
151 | 
152 | main()
153 | 


--------------------------------------------------------------------------------
/deploy.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | if [ "${1}" = 'eqiad1' ]
 6 | then
 7 |   datacenter=${1}
 8 | elif [ "${1}" = 'codfw1dev' ]
 9 | then
10 |   datacenter=${1}
11 | else
12 |   echo "Please enter datacenter."
13 |   echo "Usage:"
14 |   echo "${0} <eqiad1|codfw1dev>"
15 |   exit
16 | fi
17 | 
18 | if [ -n "${2}" ]
19 | then
20 |   if [ "${2}" = 'tofu' ]
21 |   then
22 |     # exit after tofu
23 |     tofuonly=1
24 |   fi
25 | fi
26 | 
27 | 
28 | if ! command -v kubectl ; then
29 |   echo "please install kubectl"
30 |   exit 1
31 | fi
32 | 
33 | if ! command -v helm ; then
34 |   echo "please install helm"
35 |   exit 1
36 | fi
37 | 
38 | if ! command -v tofu ; then
39 |   echo "please install tofu"
40 |   exit 1
41 | fi
42 | 
43 | source secrets-${datacenter}.sh
44 | 
45 | python3 -m venv .venv/deploy
46 | source .venv/deploy/bin/activate
47 | pip install ansible==10.3.0 kubernetes==26.1.0
48 | # install helm diff. Needed to keep helm module idempotent
49 | helm plugin install https://github.com/databus23/helm-diff || true
50 | 
51 | cd tofu
52 | AWS_ACCESS_KEY_ID=${ACCESS_KEY} AWS_SECRET_ACCESS_KEY=${SECRET_KEY} tofu init -backend-config=${datacenter}-backend.conf
53 | AWS_ACCESS_KEY_ID=${ACCESS_KEY} AWS_SECRET_ACCESS_KEY=${SECRET_KEY} tofu apply -var datacenter=${datacenter}
54 | export KUBECONFIG=$(pwd)/kube.config
55 | 
56 | if [ "${tofuonly}" = '1' ]
57 | then
58 |   exit
59 | fi
60 | 
61 | cd ../ansible
62 | # install collections here to take advantage of ansible.cfg configs
63 | ansible-galaxy collection install -U kubernetes.core -p ./collections
64 | 
65 | ansible-playbook paws.yaml --extra-vars "datacenter=${datacenter}"
66 | 


--------------------------------------------------------------------------------
/images/jobber/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM alpine:3.20.0
2 | 
3 | RUN apk --no-cache add curl jq
4 | 
5 | WORKDIR /opt/mediawiki
6 | 


--------------------------------------------------------------------------------
/images/jobber/README.md:
--------------------------------------------------------------------------------
1 | This container is used to correct the permissions in the host volume on the minikube dev env.
2 | 
3 | More info in:
4 | paws/templates/localdev.yaml
5 | 


--------------------------------------------------------------------------------
/images/minesweeper/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM alpine:3.20.0
 2 | 
 3 | RUN apk add --no-cache procps python3 py3-pip py3-psutil
 4 | RUN python3 -mpip install --break-system-packages --no-cache --upgrade pip
 5 | COPY requirements.txt /tmp/requirements.txt
 6 | RUN python3 -mpip install --break-system-packages --no-cache -r /tmp/requirements.txt
 7 | 
 8 | # always!
 9 | ENV PYTHONUNBUFFERED=1
10 | 


--------------------------------------------------------------------------------
/images/minesweeper/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2017, JupyterHub
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | * Redistributions of source code must retain the above copyright notice, this
10 |   list of conditions and the following disclaimer.
11 | 
12 | * Redistributions in binary form must reproduce the above copyright notice,
13 |   this list of conditions and the following disclaimer in the documentation
14 |   and/or other materials provided with the distribution.
15 | 
16 | * Neither the name of the copyright holder nor the names of its
17 |   contributors may be used to endorse or promote products derived from
18 |   this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/images/minesweeper/README.md:
--------------------------------------------------------------------------------
1 | # minesweeper docker image
2 | 
3 | docker image with basic dependencies for admin tasks on a kubernetes cluster
4 | (ps, python, python-psutil, python-kubernetes)
5 | 


--------------------------------------------------------------------------------
/images/minesweeper/requirements.in:
--------------------------------------------------------------------------------
1 | kubernetes
2 | 


--------------------------------------------------------------------------------
/images/minesweeper/requirements.txt:
--------------------------------------------------------------------------------
 1 | #
 2 | # This file is autogenerated by pip-compile with Python 3.9
 3 | # by the following command:
 4 | #
 5 | #    pip-compile --output-file=./requirements.txt ./requirements.in
 6 | #
 7 | cachetools==5.3.1
 8 |     # via google-auth
 9 | certifi==2024.7.4
10 |     # via
11 |     #   kubernetes
12 |     #   requests
13 | charset-normalizer==3.2.0
14 |     # via requests
15 | google-auth==2.22.0
16 |     # via kubernetes
17 | idna==3.7
18 |     # via requests
19 | kubernetes==27.2.0
20 |     # via -r requirements.in
21 | oauthlib==3.2.2
22 |     # via
23 |     #   kubernetes
24 |     #   requests-oauthlib
25 | pyasn1==0.5.0
26 |     # via
27 |     #   pyasn1-modules
28 |     #   rsa
29 | pyasn1-modules==0.3.0
30 |     # via google-auth
31 | python-dateutil==2.8.2
32 |     # via kubernetes
33 | pyyaml==6.0.1
34 |     # via kubernetes
35 | requests==2.32.0
36 |     # via
37 |     #   kubernetes
38 |     #   requests-oauthlib
39 | requests-oauthlib==1.3.1
40 |     # via kubernetes
41 | rsa==4.9
42 |     # via google-auth
43 | six==1.16.0
44 |     # via
45 |     #   google-auth
46 |     #   kubernetes
47 |     #   python-dateutil
48 | urllib3==1.26.19
49 |     # via
50 |     #   google-auth
51 |     #   kubernetes
52 |     #   requests
53 | websocket-client==1.6.1
54 |     # via kubernetes
55 | 


--------------------------------------------------------------------------------
/images/nbserve/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:24.04
 2 | 
 3 | ENV DEBIAN_FRONTEND=noninteractive
 4 | RUN apt-get update \
 5 |     && apt-get install -y --no-install-recommends \
 6 |     build-essential \
 7 |     curl \
 8 |     libgd-dev \
 9 |     libpcre3-dev \
10 |     libssl-dev \
11 |     luarocks \
12 |     make \
13 |     perl \
14 |     unzip \
15 |     ca-certificates \
16 |     git \
17 |     libxml2-dev \
18 |     libxslt1-dev \
19 |     python3 \
20 |     && apt-get clean \
21 |     && rm -rf /var/lib/apt/lists/*
22 | 
23 | ARG RESTY_VERSION="1.25.3.1"
24 | ARG RESTY_J="1"
25 | # ARG RESTY_OPENSSL_VERSION="1.1.1d"
26 | ARG RESTY_CONFIG_OPTIONS="\
27 |     --with-compat \
28 |     --with-file-aio \
29 |     --with-http_addition_module \
30 |     --with-http_auth_request_module \
31 |     --with-http_dav_module \
32 |     --with-http_flv_module \
33 |     --with-http_gunzip_module \
34 |     --with-http_gzip_static_module \
35 |     --with-http_image_filter_module=dynamic \
36 |     --with-http_mp4_module \
37 |     --with-http_random_index_module \
38 |     --with-http_realip_module \
39 |     --with-http_secure_link_module \
40 |     --with-http_slice_module \
41 |     --with-http_ssl_module \
42 |     --with-http_stub_status_module \
43 |     --with-http_sub_module \
44 |     --with-http_v2_module \
45 |     --with-http_xslt_module=dynamic \
46 |     --with-ipv6 \
47 |     --with-mail \
48 |     --with-mail_ssl_module \
49 |     --with-md5-asm \
50 |     --with-pcre-jit \
51 |     --with-sha1-asm \
52 |     --with-stream \
53 |     --with-stream_ssl_module \
54 |     --with-threads \
55 |     --add-module=./ngx-fancyindex \
56 |     "
57 | 
58 | RUN cd /tmp \
59 |     && curl -fSL https://openresty.org/download/openresty-${RESTY_VERSION}.tar.gz -o openresty-${RESTY_VERSION}.tar.gz \
60 |     && tar xzf openresty-${RESTY_VERSION}.tar.gz \
61 |     && cd /tmp/openresty-${RESTY_VERSION} \
62 |     && git clone https://github.com/aperezdc/ngx-fancyindex \
63 |     && eval ./configure -j${RESTY_J} ${RESTY_CONFIG_OPTIONS} \
64 |     && make -j${RESTY_J} \
65 |     && make -j${RESTY_J} install \
66 |     && cd /tmp \
67 |     && rm -rf \
68 |     openresty-${RESTY_VERSION}.tar.gz openresty-${RESTY_VERSION} \
69 |     && mkdir -p /var/run/openresty
70 | 
71 | RUN ln -sf /dev/stdout /usr/local/openresty/nginx/logs/access.log
72 | RUN ln -sf /dev/stderr /usr/local/openresty/nginx/logs/error.log
73 | 
74 | # Add additional binaries into PATH for convenience
75 | ENV PATH=$PATH:/usr/local/openresty/luajit/bin:/usr/local/openresty/nginx/sbin:/usr/local/openresty/bin
76 | 
77 | RUN luarocks install lua-resty-http
78 | RUN luarocks install lua-cjson
79 | 
80 | RUN apt-get clean \
81 |     && rm -rf /var/lib/apt/lists/*
82 | 
83 | EXPOSE 8000
84 | 
85 | ADD robots.txt /var/www/robots.txt
86 | 
87 | CMD ["/usr/local/openresty/bin/openresty", "-c", "/mnt/nginx.conf"]
88 | 
89 | # Use SIGQUIT instead of default SIGTERM to cleanly drain requests
90 | # See https://github.com/openresty/docker-openresty/blob/master/README.md#tips--pitfalls
91 | STOPSIGNAL SIGQUIT
92 | 


--------------------------------------------------------------------------------
/images/nbserve/README.md:
--------------------------------------------------------------------------------
1 | This container is used for managing the public-paws.wmcloud.org traffic. So public viewing of notebooks and files.
2 | 
3 | It also redirects files to the renderer container to be built. Also for public viewing.
4 | 


--------------------------------------------------------------------------------
/images/nbserve/robots.txt:
--------------------------------------------------------------------------------
1 | User-agent: *
2 | Disallow: /
3 | 


--------------------------------------------------------------------------------
/images/paws-hub/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM jupyterhub/k8s-hub:3.3.8
 2 | ARG NB_USER=tools.paws
 3 | ARG NB_UID=52771
 4 | ARG HOME=/home/paws
 5 | USER root
 6 | RUN adduser --disabled-password \
 7 |     --gecos "Default user" \
 8 |     --uid ${NB_UID} \
 9 |     --home ${HOME} \
10 |     --force-badname \
11 |     ${NB_USER}
12 | 
13 | COPY PAWS.svg /srv/jupyterhub
14 | COPY paws-favicon.ico /usr/local/share/jupyterhub/static/favicon.ico
15 | 
16 | RUN chown -R ${NB_USER}:${NB_USER} /srv/jupyterhub
17 | USER ${NB_USER}
18 | 
19 | CMD ["jupyterhub", "--config", "/srv/jupyterhub_config.py"]
20 | 


--------------------------------------------------------------------------------
/images/paws-hub/PAWS.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <!-- Generator: Adobe Illustrator 22.1.0, SVG Export Plug-In . SVG Version: 6.00 Build 0)  -->
 3 | <svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
 4 | 	 viewBox="0 0 1330 350" style="enable-background:new 0 0 1330 350;" xml:space="preserve">
 5 | <style type="text/css">
 6 | 	.st0{fill:#3883B0;}
 7 | 	.st1{enable-background:new    ;}
 8 | </style>
 9 | <g id="Layer_3">
10 | 	<g>
11 | 		<polyline class="st0" points="70.1,177.1 70.1,37.5 153.6,73.3 		"/>
12 | 		<g>
13 | 			<path class="st0" d="M282.9,115.8c-1.2-2-2.5-4-3.9-6l-1.3,3.9c0,0,1.4,0.5,3.6,1.3C281.7,115.3,282.3,115.5,282.9,115.8z"/>
14 | 			<path class="st0" d="M241.6,173.5c0,8.3,6.8,15.1,15.1,15.1c8.3,0,15.1-6.7,15.1-15.1c0-8.3-6.7-15.1-15.1-15.1
15 | 				C248.3,158.5,241.6,165.2,241.6,173.5z"/>
16 | 			<path class="st0" d="M258.6,248.6l-0.2-11.9c0,0-0.4,0-1.1,0.1l0,0c-0.2,0-0.5,0-0.8,0c-0.6,0-1.2-0.1-1.9-0.1
17 | 				c-1.4-0.1-2.9-0.2-4.7-0.2c-1.3-0.2-2.7-0.4-4-0.7c-1.3-0.1-2.8-0.5-4.2-0.9c-1.4-0.4-2.7-0.6-3.8-1c-2.2-0.8-3.6-1.3-3.6-1.3
18 | 				l-4.3,11.1c-7.4-2.9-14.2-6.8-20.3-11.8l7.5-9.2c0,0-4.8-3.9-8.7-8.6c-1.8-2.5-3.8-4.8-4.9-6.9c-1.2-2-2-3.3-2-3.3l-10.4,5.7
19 | 				c-3.6-6.6-6.3-13.9-7.8-21.5l11.6-2.3c0,0-0.2-1.5-0.6-3.8c-0.2-1.1-0.4-2.5-0.4-3.9c0-0.7-0.1-1.5-0.1-2.2l-0.1-1.1l0-0.6l0-0.3
20 | 				l0-0.1c0.1-1.7,0.2-3.3,0.2-4.9c0.1-0.8,0.1-1.7,0.1-2.4c0.1-0.6,0.2-1.1,0.3-1.7c0.3-2.1,0.5-3.4,0.5-3.4l-11.6-2.2
21 | 				c1.5-7.7,4.2-15,7.9-21.7l10.4,5.7c0,0,0.8-1.3,2-3.3c0.6-1,1.3-2.1,2.2-3.3c0.9-1.1,1.7-2.4,2.7-3.6c2.1-2.3,4.1-4.6,5.9-6.1
22 | 				c0.9-0.8,1.6-1.4,2.1-1.9c0.5-0.5,0.8-0.7,0.8-0.7l-7.4-9.2c6.1-4.9,12.9-8.9,20.3-11.7l4.2,11.1c0,0,1.4-0.5,3.6-1.3
23 | 				c1.1-0.3,2.4-0.6,3.8-1c1.4-0.4,2.9-0.8,4.4-0.9c1.5-0.2,3-0.5,4.5-0.7c1.4-0.1,2.8-0.1,3.9-0.2c0.6,0,1.1-0.1,1.6-0.1
24 | 				c0.2,0,0.4,0,0.6,0l0.1,0c0.8,0,1.2,0.1,1.2,0.1l0.6-11.9c4.1,0.1,8.2,0.5,12.1,1.3c-21-23.9-51.8-38.9-86.2-38.9
25 | 				c-63.4,0-114.8,51.4-114.8,114.8s51.4,114.8,114.8,114.8c36.7,0,69.4-17.3,90.4-44.1C270,247.7,264.4,248.5,258.6,248.6z
26 | 				 M115,188.6c-8.3,0-15.1-6.7-15.1-15.1c0-8.3,6.7-15.1,15.1-15.1c8.3,0,15.1,6.7,15.1,15.1C130.1,181.9,123.3,188.6,115,188.6z"
27 | 				/>
28 | 		</g>
29 | 		<path class="st0" d="M254.8,110.4c0.6,0,1.1-0.1,1.6-0.1c0.2,0,0.4,0,0.6,0l0.1,0c0.8,0,1.2,0.1,1.2,0.1l0.6-11.9
30 | 			c7.9,0.2,15.4,1.6,22.5,4l-3.9,11.2c0,0,1.4,0.5,3.6,1.3c1.1,0.4,2.3,1.1,3.6,1.7c1.3,0.6,2.7,1.2,4,2.1c1.3,0.8,2.6,1.6,3.9,2.3
31 | 			c1.2,0.9,2.2,1.7,3.1,2.4c1.9,1.3,3,2.4,3,2.4l7.1-8.1v-1.4V38.8l-83.5,35.8l28.9,35.9C252.6,110.5,253.8,110.5,254.8,110.4z"/>
32 | 	</g>
33 | </g>
34 | <g id="Layer_4">
35 | 	<g class="st1">
36 | 		<path class="st0" d="M577.3,91.1c12.5,10.9,18.8,26.4,18.8,46.4c0,21.2-6.2,37.6-18.8,49.4s-30.2,17.6-53,17.6h-24.2v45.5h-58
37 | 			V74.7h82.2C547.1,74.7,564.8,80.2,577.3,91.1z M535.9,154.8c3.4-3.8,5.1-9,5.1-15.6c0-6.5-1.7-11.5-5.1-15.1
38 | 			c-3.4-3.6-8.3-5.4-14.6-5.4h-21.2v41.8h21.2C527.6,160.4,532.5,158.6,535.9,154.8z"/>
39 | 		<path class="st0" d="M726.6,249.9l-8.8-24.8h-65.7l-9,24.8h-59.5l74-175.2h59.5l71.2,175.2H726.6z M667.1,183.7h36.5l-18-51.2
40 | 			L667.1,183.7z"/>
41 | 		<path class="st0" d="M1000.6,74.7h59.2l-59.2,175.2h-59.5l-26.2-104.7l-27,104.7h-59.5L769.6,74.7h62l28.2,115l28-115h56.5
42 | 			l28.5,115L1000.6,74.7z"/>
43 | 		<path class="st0" d="M1170.5,120.9c-11.7-4.2-21.2-6.2-28.5-6.2c-7.2,0-10.8,2.6-10.8,7.8c0,4,2.3,7.1,6.9,9.2
44 | 			c4.6,2.2,11.9,4.6,21.9,7.2c12.5,3.5,22.9,7,31.1,10.5s15.5,9,21.6,16.6c6.2,7.6,9.2,17.5,9.2,29.9c0,12.7-3.3,23.3-9.9,31.9
45 | 			c-6.6,8.6-15.5,15-26.6,19.1c-11.2,4.2-23.6,6.2-37.2,6.2c-15.5,0-31.1-2.5-46.9-7.6s-29.4-12-40.9-20.9l21.5-43.8
46 | 			c8.8,7.7,19.9,14.2,33.2,19.6c13.3,5.4,24.5,8.1,33.5,8.1c4.3,0,7.8-0.8,10.2-2.4c2.5-1.6,3.8-4,3.8-7.4c0-4-2.4-7.1-7.1-9.4
47 | 			s-12.2-4.6-22.4-7.1c-12.3-3.2-22.6-6.5-30.9-9.9c-8.2-3.4-15.4-8.7-21.4-15.9c-6-7.2-9-16.8-9-28.8c0-11.7,3.1-21.9,9.2-30.6
48 | 			c6.2-8.8,14.9-15.5,26.2-20.1c11.3-4.7,24.5-7,39.5-7c13.7,0,27.3,1.9,40.9,5.8c13.6,3.8,25.4,8.9,35.4,15.2l-21,44.2
49 | 			C1192.8,129.9,1182.2,125.1,1170.5,120.9z"/>
50 | 	</g>
51 | </g>
52 | </svg>
53 | 


--------------------------------------------------------------------------------
/images/paws-hub/README.md:
--------------------------------------------------------------------------------
1 | This is the jupyterhub image with a few branding changes for PAWS.
2 | 
3 | The image is built for production by a github action. It can be built locally with:
4 | docker build .
5 | from this directory.
6 | 
7 | The image itself would be deployed into PAWS through helm. Defined in the values.yaml file under the jupyterhub.hub definition. 
8 | 


--------------------------------------------------------------------------------
/images/paws-hub/paws-favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/toolforge/paws/9c42a38a368b3ad460fd122e60851c67335c3ed2/images/paws-hub/paws-favicon.ico


--------------------------------------------------------------------------------
/images/renderer/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:24.04
 2 | 
 3 | RUN apt-get update \
 4 |     && DEBIAN_FRONTEND=noninteractive apt-get install -y \
 5 |     python3-pip \
 6 |     python3-setuptools \
 7 |     python3-wheel \
 8 |     uwsgi \
 9 |     uwsgi-plugin-python3 \
10 |     && apt-get clean \
11 |     && rm -rf /var/lib/apt/lists/*
12 | 
13 | COPY requirements.txt /tmp/requirements.txt
14 | 
15 | # We don't care about breaking system packages since this is a container
16 | RUN pip3 --no-cache-dir install --break-system-packages -r /tmp/requirements.txt
17 | 
18 | # something about the --no-cache-dir keeps pyrsistent from visibly installing
19 | RUN pip3 install --break-system-packages pyrsistent
20 | 
21 | COPY renderer.py /srv/renderer.py
22 | COPY basic.tpl /srv/basic.tpl
23 | COPY full.tpl /srv/full.tpl
24 | 
25 | WORKDIR /srv
26 | 
27 | CMD /usr/bin/uwsgi \
28 |     --plugins python3 \
29 |     --socket 0.0.0.0:8000 \
30 |     --wsgi-file /srv/renderer.py \
31 |     --master \
32 |     --processes 4 \
33 |     --die-on-term
34 | 


--------------------------------------------------------------------------------
/images/renderer/README.md:
--------------------------------------------------------------------------------
1 | This container manages the rendering of several file types. rst, md, and ipynb
2 | 
3 | The files are sent to this container from the nbserve container here:
4 | https://github.com/toolforge/paws/blob/41f03a544041318f1fad479b32ae46ac9e816a55/images/nbserve/nginx.py#L101
5 | 


--------------------------------------------------------------------------------
/images/renderer/basic.tpl:
--------------------------------------------------------------------------------
  1 | {%- extends 'display_priority.tpl' -%}
  2 | 
  3 | 
  4 | {% block codecell %}
  5 | <div class="cell border-box-sizing code_cell rendered">
  6 | {{ super() }}
  7 | </div>
  8 | {%- endblock codecell %}
  9 | 
 10 | {% block input_group -%}
 11 | <div class="input">
 12 | {{ super() }}
 13 | </div>
 14 | {% endblock input_group %}
 15 | 
 16 | {% block output_group %}
 17 | <div class="output_wrapper">
 18 | <div class="output">
 19 | {{ super() }}
 20 | </div>
 21 | </div>
 22 | {% endblock output_group %}
 23 | 
 24 | {% block in_prompt -%}
 25 | {%- endblock in_prompt %}
 26 | 
 27 | {% block empty_in_prompt -%}
 28 | {%- endblock empty_in_prompt %}
 29 | 
 30 | {# 
 31 |   output_prompt doesn't do anything in HTML,
 32 |   because there is a prompt div in each output area (see output block)
 33 | #}
 34 | {% block output_prompt %}
 35 | {% endblock output_prompt %}
 36 | 
 37 | {% block input %}
 38 | <div class="inner_cell">
 39 |     <div class="input_area">
 40 | {{ cell.source | highlight_code(metadata=cell.metadata) }}
 41 | </div>
 42 | </div>
 43 | {%- endblock input %}
 44 | 
 45 | {% block output %}
 46 | <div class="output_area">
 47 | {{ super() }}
 48 | </div>
 49 | {% endblock output %}
 50 | 
 51 | {% block markdowncell scoped %}
 52 | <div class="cell border-box-sizing text_cell rendered">
 53 | {{ self.empty_in_prompt() }}
 54 | <div class="inner_cell">
 55 | <div class="text_cell_render border-box-sizing rendered_html">
 56 | {{ cell.source  | markdown2html | strip_files_prefix }}
 57 | </div>
 58 | </div>
 59 | </div>
 60 | {%- endblock markdowncell %}
 61 | 
 62 | {% block unknowncell scoped %}
 63 | unknown type  {{ cell.type }}
 64 | {% endblock unknowncell %}
 65 | 
 66 | {% block execute_result -%}
 67 | {%- set extra_class="output_execute_result" -%}
 68 | {% block data_priority scoped %}
 69 | {{ super() }}
 70 | {% endblock %}
 71 | {%- set extra_class="" -%}
 72 | {%- endblock execute_result %}
 73 | 
 74 | {% block stream_stdout -%}
 75 | <div class="output_subarea output_stream output_stdout output_text">
 76 | <pre>
 77 | {{- output.text | ansi2html -}}
 78 | </pre>
 79 | </div>
 80 | {%- endblock stream_stdout %}
 81 | 
 82 | {% block stream_stderr -%}
 83 | <div class="output_subarea output_stream output_stderr output_text">
 84 | <pre>
 85 | {{- output.text | ansi2html -}}
 86 | </pre>
 87 | </div>
 88 | {%- endblock stream_stderr %}
 89 | 
 90 | {% block data_svg scoped -%}
 91 | <div class="output_svg output_subarea {{extra_class}}">
 92 | {%- if output.svg_filename %}
 93 | <img src="{{output.svg_filename | posix_path}}"
 94 | {%- else %}
 95 | {{ output.data['image/svg+xml'] }}
 96 | {%- endif %}
 97 | </div>
 98 | {%- endblock data_svg %}
 99 | 
100 | {% block data_html scoped -%}
101 | <div class="output_html rendered_html output_subarea {{extra_class}}">
102 | {{ output.data['text/html'] }}
103 | </div>
104 | {%- endblock data_html %}
105 | 
106 | {% block data_markdown scoped -%}
107 | <div class="output_markdown rendered_html output_subarea {{extra_class}}">
108 | {{ output.data['text/markdown'] | markdown2html }}
109 | </div>
110 | {%- endblock data_markdown %}
111 | 
112 | {% block data_png scoped %}
113 | <div class="output_png output_subarea {{extra_class}}">
114 | {%- if 'image/png' in output.metadata.get('filenames', {}) %}
115 | <img src="{{output.metadata.filenames['image/png'] | posix_path}}"
116 | {%- else %}
117 | <img src="data:image/png;base64,{{ output.data['image/png'] }}"
118 | {%- endif %}
119 | {%- set width=output | get_metadata('width', 'image/png') -%}
120 | {%- if width is not none %}
121 | width={{width}}
122 | {%- endif %}
123 | {%- set height=output | get_metadata('height', 'image/png') -%}
124 | {%- if height is not none %}
125 | height={{height}}
126 | {%- endif %}
127 | {%- if output | get_metadata('unconfined', 'image/png') -%}
128 | class="unconfined"
129 | {%- endif %}
130 | >
131 | </div>
132 | {%- endblock data_png %}
133 | 
134 | {% block data_jpg scoped %}
135 | <div class="output_jpeg output_subarea {{extra_class}}">
136 | {%- if 'image/jpeg' in output.metadata.get('filenames', {}) %}
137 | <img src="{{output.metadata.filenames['image/jpeg'] | posix_path}}"
138 | {%- else %}
139 | <img src="data:image/jpeg;base64,{{ output.data['image/jpeg'] }}"
140 | {%- endif %}
141 | {%- set width=output | get_metadata('width', 'image/jpeg') -%}
142 | {%- if width is not none %}
143 | width={{width}}
144 | {%- endif %}
145 | {%- set height=output | get_metadata('height', 'image/jpeg') -%}
146 | {%- if height is not none %}
147 | height={{height}}
148 | {%- endif %}
149 | {%- if output | get_metadata('unconfined', 'image/jpeg') -%}
150 | class="unconfined"
151 | {%- endif %}
152 | >
153 | </div>
154 | {%- endblock data_jpg %}
155 | 
156 | {% block data_latex scoped %}
157 | <div class="output_latex output_subarea {{extra_class}}">
158 | {{ output.data['text/latex'] }}
159 | </div>
160 | {%- endblock data_latex %}
161 | 
162 | {% block error -%}
163 | <div class="output_subarea output_text output_error">
164 | <pre>
165 | {{- super() -}}
166 | </pre>
167 | </div>
168 | {%- endblock error %}
169 | 
170 | {%- block traceback_line %}
171 | {{ line | ansi2html }}
172 | {%- endblock traceback_line %}
173 | 
174 | {%- block data_text scoped %}
175 | <div class="output_text output_subarea {{extra_class}}">
176 | <pre>
177 | {{- output.data['text/plain'] | ansi2html -}}
178 | </pre>
179 | </div>
180 | {%- endblock -%}
181 | 
182 | {%- block data_javascript scoped %}
183 | {% set div_id = uuid4() %}
184 | <div id="{{ div_id }}"></div>
185 | <div class="output_subarea output_javascript {{extra_class}}">
186 | <script type="text/javascript">
187 | var element = $('#{{ div_id }}');
188 | {{ output.data['application/javascript'] }}
189 | </script>
190 | </div>
191 | {%- endblock -%}
192 | 


--------------------------------------------------------------------------------
/images/renderer/full.tpl:
--------------------------------------------------------------------------------
 1 | {%- extends 'basic.tpl' -%}
 2 | {% from 'mathjax.tpl' import mathjax %}
 3 | 
 4 | 
 5 | {%- block header -%}
 6 | <!DOCTYPE html>
 7 | <html>
 8 | <head>
 9 | {%- block html_head -%}
10 | <meta charset="utf-8" />
11 | <title>{{resources['metadata']['name']}}</title>
12 | 
13 | <script src="https://tools-static.wmflabs.org/cdnjs/ajax/libs/require.js/2.1.10/require.min.js"></script>
14 | <script src="https://tools-static.wmflabs.org/cdnjs/ajax/libs/jquery/2.0.3/jquery.min.js"></script>
15 | 
16 | {% for css in resources.inlining.css -%}
17 |     <style type="text/css">
18 |     {{ css }}
19 |     </style>
20 | {% endfor %}
21 | 
22 | <style type="text/css">
23 | /* Overrides of notebook CSS for static HTML export */
24 | body {
25 |   overflow: visible;
26 |   padding: 8px;
27 | }
28 | 
29 | div#notebook {
30 |   overflow: visible;
31 |   border-top: none;
32 | }
33 | 
34 | div#notebook-container {
35 |   padding: 40px;
36 | }
37 | 
38 | @media print {
39 |   div.cell {
40 |     display: block;
41 |     page-break-inside: avoid;
42 |   } 
43 |   div.output_wrapper { 
44 |     display: block;
45 |     page-break-inside: avoid; 
46 |   }
47 |   div.output { 
48 |     display: block;
49 |     page-break-inside: avoid; 
50 |   }
51 | }
52 | </style>
53 | 
54 | <!-- Custom stylesheet, it must be in the same directory as the html file -->
55 | <link rel="stylesheet" href="custom.css">
56 | 
57 | <!-- Loading mathjax macro -->
58 | {{ mathjax('https://tools-static.wmflabs.org/cdnjs/ajax/libs/mathjax/2.6.1/MathJax.js?config=TeX-AMS_HTML') }}
59 | {%- endblock html_head -%}
60 | </head>
61 | {%- endblock header -%}
62 | 
63 | {% block body %}
64 | <body>
65 |   <div tabindex="-1" id="notebook" class="border-box-sizing">
66 |     <div class="container" id="notebook-container">
67 | {{ super() }}
68 |     </div>
69 |   </div>
70 | </body>
71 | {%- endblock body %}
72 | 
73 | {% block footer %}
74 | </html>
75 | {% endblock footer %}
76 | 


--------------------------------------------------------------------------------
/images/renderer/renderer.py:
--------------------------------------------------------------------------------
 1 | from werkzeug.wrappers import Request, Response
 2 | from nbconvert.exporters import HTMLExporter
 3 | 
 4 | import os
 5 | import markdown
 6 | import docutils.core
 7 | 
 8 | BASE_PATH = os.environ["BASE_PATH"]
 9 | URL_PREFIX = os.environ["URL_PREFIX"]
10 | 
11 | 
12 | def get_extension(path, format):
13 |     """
14 |     Return the extension of the path, if any
15 |     """
16 |     splits = path.split(".")
17 |     if len(splits) == 1:
18 |         # This means there's no two parts - so either no ., or nothing before
19 |         # or after the .. Easier to handle by just saying we found no extensions
20 |         return ""
21 |     return splits[-1]
22 | 
23 | 
24 | def render_ipynb(full_path, format):
25 |     """
26 |     Render a given ipynb file
27 |     """
28 |     exporter = HTMLExporter()
29 |     with open(full_path, encoding="utf-8") as file_handle:
30 |         html, _ = exporter.from_file(file_handle)
31 |     return Response(html, mimetype="text/html")
32 | 
33 | 
34 | def render_md(full_path, format):
35 |     """
36 |     Render a given markdown file
37 |     """
38 |     with open(full_path, encoding="utf-8") as file_handle:
39 |         text = file_handle.read()
40 |     html = markdown.markdown(text)
41 |     return Response(html, mimetype="text/html")
42 | 
43 | 
44 | def render_rst(full_path, format):
45 |     """
46 |     Render a given reStructuredText file
47 |     """
48 |     with open(full_path, encoding="utf-8") as file_handle:
49 |         text = file_handle.read()
50 |     html = docutils.core.publish_string(source=text, writer_name="html")
51 |     return Response(html, mimetype="text/html")
52 | 
53 | 
54 | # Map of extensions to functions to call for handling them
55 | handlers = {
56 |     "rst": render_rst,
57 |     "md": render_md,
58 |     "ipynb": render_ipynb,
59 | }
60 | 
61 | 
62 | @Request.application
63 | def application(request):
64 |     file_path = request.path.lstrip(URL_PREFIX)
65 |     full_path = os.path.join(BASE_PATH, file_path)
66 |     # Protect against path traversal attacks, if they make it this far.
67 |     if not full_path.startswith(BASE_PATH):
68 |         # DANGER!
69 |         return Response("Suspicious url", status=403)
70 |     format = request.args.get("format", None)
71 |     if format == "raw":
72 |         # Let nginx serve raw files
73 |         accel_path = os.path.join("/accelredir/", file_path)
74 |         return Response("", headers={"X-Accel-Redirect": accel_path})
75 | 
76 |     try:
77 |         extension = get_extension(full_path, format)
78 |         if extension and extension in handlers:
79 |             return handlers[extension](full_path, format)
80 |         else:
81 |             return Response("No handlers for format %s" % extension, status=400)
82 |     except FileNotFoundError:
83 |         return Response("Not found", status=404)
84 |     return Response(full_path)
85 | 
86 | 
87 | if __name__ == "__main__":
88 |     from werkzeug.serving import run_simple
89 | 
90 |     run_simple("localhost", 4000, application)
91 | 


--------------------------------------------------------------------------------
/images/renderer/requirements.txt:
--------------------------------------------------------------------------------
1 | nbconvert
2 | ipython
3 | werkzeug
4 | markdown
5 | docutils
6 | 


--------------------------------------------------------------------------------
/images/singleuser/Dockerfile:
--------------------------------------------------------------------------------
  1 | FROM ubuntu:24.04
  2 | 
  3 | ENV PYWIKIBOT_VERSION=9.6.3
  4 | ENV EDITOR=/bin/nano
  5 | ENV PYWIKIBOT_DIR=/srv/paws
  6 | ENV DEBIAN_FRONTEND=noninteractive
  7 | 
  8 | ## Begin minimal setup ##
  9 | # Use bash as default shell, rather than sh
 10 | ENV SHELL /bin/bash
 11 | 
 12 | # Set up user
 13 | ENV NB_USER tools.paws
 14 | ENV NB_UID 52771
 15 | ENV HOME /home/paws
 16 | 
 17 | RUN useradd \
 18 |     --uid ${NB_UID} \
 19 |     --home-dir ${HOME} \
 20 |     ${NB_USER}
 21 | WORKDIR ${HOME}
 22 | 
 23 | RUN apt-get update && \
 24 |     apt-get install --yes \
 25 |         python3-venv \
 26 |         pip \
 27 |         python3
 28 | 
 29 | ENV LC_ALL en_US.UTF-8
 30 | ENV LANG en_US.UTF-8
 31 | ENV LANGUAGE en_US.UTF-8
 32 | 
 33 | # Create venv directory, and let users install into it
 34 | ENV VENV_DIR /srv/paws
 35 | RUN install -d -o ${NB_USER} -g ${NB_USER} ${VENV_DIR}
 36 | 
 37 | ENV PATH=/srv/paws/pwb:/srv/paws/bin:/srv/paws:$PATH
 38 | 
 39 | USER ${NB_USER}
 40 | RUN python3 -m venv /srv/paws
 41 | RUN pip --no-cache-dir install -U pip setuptools wheel
 42 | 
 43 | # Install base notebook packages
 44 | RUN pip install --prefix=/srv/paws --no-cache-dir \
 45 |     jupyterhub==4.1.1 \
 46 |     jupyterlab==4.4.0
 47 | 
 48 | ## End minimal setup ##
 49 | 
 50 | USER root
 51 | 
 52 | # Base building utilities that'll always be required, probably
 53 | RUN apt-get update && \
 54 |     apt-get install --yes \
 55 |         git \
 56 |         locales \
 57 |         pkg-config \
 58 |         build-essential \
 59 |         gcc \
 60 |         apt-transport-https
 61 | 
 62 | RUN apt-get update --yes && \
 63 |     apt-get install --yes \
 64 |         python3-dev \
 65 |         openjdk-11-jdk
 66 | 
 67 | # Utilities
 68 | RUN apt-get install --yes \
 69 |         curl \
 70 |         wget \
 71 |         less \
 72 |         dnsutils \
 73 |         emacs \
 74 |         links \
 75 |         nano \
 76 |         vim \
 77 |         lsof \
 78 |         mariadb-client \
 79 |         unrar
 80 | 
 81 | RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash -
 82 | RUN apt-get install -y nodejs
 83 | 
 84 | # pyaudio
 85 | RUN apt-get install --yes \
 86 |         portaudio19-dev
 87 | 
 88 | RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && \
 89 |     locale-gen
 90 | 
 91 | ## Install R ##
 92 | # Use newer version of R
 93 | # Binary packages from packagemanager.rstudio.com work against this.
 94 | # Base R from Focal is only 3.6.
 95 | ADD r/cran.gpg /etc/apt/trusted.gpg.d/cran.gpg
 96 | ADD r/cran.list /etc/apt/sources.list.d/cran.list
 97 | 
 98 | # Install languages needed and their core dev packages
 99 | RUN apt-get update --yes && \
100 |     apt-get install --yes \
101 |         r-recommended \
102 |         r-base-dev \
103 |         r-cran-littler \
104 |         git \
105 |         curl \
106 |         gdebi \
107 |         # For R's mysql
108 |         libmariadb-dev \
109 |         # For R's curl
110 |         libcurl4-openssl-dev \
111 |         # for ipython kernels
112 |         libzmq3-dev \
113 |         # For R's devtools
114 |         libssl-dev
115 | 
116 | # Install rstudio-server
117 | ENV RSTUDIO_SERVER_URL https://download2.rstudio.org/server/jammy/amd64/rstudio-server-2024.12.0-467-amd64.deb
118 | RUN curl --silent --location --fail ${RSTUDIO_SERVER_URL} > /tmp/rstudio-server.deb
119 | RUN gdebi -n /tmp/rstudio-server.deb && rm /tmp/rstudio-server.deb
120 | 
121 | 
122 | # Create user owned R libs dir
123 | # This lets users temporarily install packages
124 | ENV R_LIBS_USER /srv/r
125 | RUN install -d -o ${NB_USER} -g ${NB_USER} ${R_LIBS_USER}
126 | 
127 | # R_LIBS_USER is set by default in /etc/R/Renviron, which RStudio loads.
128 | # We uncomment the default, and set what we wanna - so it picks up
129 | # the packages we install. Without this, RStudio doesn't see the packages
130 | # that R does.
131 | # Stolen from https://github.com/jupyterhub/repo2docker/blob/6a07a48b2df48168685bb0f993d2a12bd86e23bf/repo2docker/buildpacks/r.py
132 | RUN sed -i -e '/^R_LIBS_USER=/s/^/#/' /etc/R/Renviron && \
133 |     echo "R_LIBS_USER=${R_LIBS_USER}" >> /etc/R/Renviron
134 | 
135 | USER ${NB_USER}
136 | RUN pip install --no-cache-dir \
137 |     jupyter-server-proxy \
138 |     jupyter-rsession-proxy
139 | 
140 | # Set CRAN mirror to rspm before we install anything
141 | COPY r/Rprofile.site /usr/lib/R/etc/Rprofile.site
142 | # RStudio needs its own config
143 | COPY r/rsession.conf /etc/rstudio/rsession.conf
144 | 
145 | USER root
146 | # Install the R Kernel
147 | RUN r -e "install.packages('IRkernel', version='1.3.2')" && \
148 |     r -e "IRkernel::installspec(prefix='${VENV_DIR}')" && \
149 |     rm -rf /tmp/downloaded_packages
150 | 
151 | ## Done installing R
152 | 
153 | USER root
154 | 
155 | ## Setup OpenRefine
156 | ENV OPENREFINE_DIR /srv/openrefine
157 | ENV PATH=$PATH:$OPENREFINE_DIR
158 | RUN mkdir -p ${OPENREFINE_DIR} && cd ${OPENREFINE_DIR} && \
159 |     curl -L 'https://github.com/OpenRefine/OpenRefine/releases/download/3.9.0/openrefine-linux-3.9.0.tar.gz' | tar xzf - --strip=1
160 | 
161 | USER root
162 | RUN apt-get install --yes unzip
163 | 
164 | USER ${NB_USER}
165 | ENV REFINE_DIR /home/paws
166 | RUN pip install --no-cache-dir nb_serverproxy_openrefine
167 | 
168 | USER root
169 | RUN wget https://github.com/OpenRefine/CommonsExtension/releases/download/v0.1.3/openrefine-commons-extension-0.1.3.zip -O /tmp/openrefine-commonsextension.zip
170 | RUN unzip /tmp/openrefine-commonsextension.zip -d ${OPENREFINE_DIR}/webapp/extensions/
171 | 
172 | ## Done setting up OpenRefine
173 | 
174 | ## install quarto
175 | 
176 | USER root
177 | RUN wget https://github.com/quarto-dev/quarto-cli/releases/download/v1.4.554/quarto-1.4.554-linux-amd64.deb -O /tmp/quarto.deb
178 | RUN dpkg -i /tmp/quarto.deb
179 | 
180 | ## done install quarto
181 | 
182 | USER root
183 | 
184 | RUN npm install -g wikibase-cli
185 | 
186 | # Machine-learning type stuff
187 | RUN apt-get update && \
188 |     apt-get install --yes \
189 |     # For scipy & friends
190 |     libblas-dev \
191 |     liblapack-dev \
192 |     libquadmath0 \
193 |     gfortran \
194 |     # for lxml
195 |     libxml2-dev \
196 |     libxslt1-dev \
197 |     # for matplotlib
198 |     libfreetype6-dev \
199 |     libpng-dev \
200 |     # for ipython kernels
201 |     libzmq3-dev \
202 |     libreadline-dev \
203 |     # For PDFs and stuff
204 |     pandoc \
205 |     texlive-xetex
206 | 
207 | 
208 | ## Install Julia
209 | # Install Julia directories and depot path
210 | 
211 | ENV PATH=$PATH:/srv/julia/bin
212 | ENV JULIA_DEPOT_PATH /srv/julia-depot/
213 | RUN install -d -o ${NB_USER} -g ${NB_USER} /srv/julia
214 | RUN install -d -o ${NB_USER} -g ${NB_USER} ${JULIA_DEPOT_PATH}
215 | 
216 | USER ${NB_USER}
217 | 
218 | # install julia and julia kernel
219 | COPY install-julia /tmp/install-julia
220 | RUN /tmp/install-julia
221 | 
222 | ## Done Installing Julia
223 | 
224 | # install sql access tool
225 | COPY install-sql-tool /tmp/install-sql-tool
226 | RUN /tmp/install-sql-tool
227 | 
228 | ## Done installing sql access tool
229 | 
230 | # Install the bash kernel
231 | RUN pip install bash_kernel
232 | RUN python -m bash_kernel.install --sys-prefix
233 | 
234 | # Install mass amount of python libraries!
235 | COPY --chown=tools.paws:tools.paws requirements.txt /tmp/requirements.txt
236 | 
237 | RUN pip --no-cache-dir install -r /tmp/requirements.txt
238 | 
239 | # Install pywikibot
240 | RUN git clone \
241 |     --branch $PYWIKIBOT_VERSION \
242 |     --depth 1 \
243 |     --recurse-submodules \
244 |     --shallow-submodules \
245 |     https://gerrit.wikimedia.org/r/pywikibot/core.git \
246 |     /srv/paws/pwb
247 | RUN pip install --no-cache-dir \
248 |     /srv/paws/pwb[eventstreams,graphviz,google,isbn,memento,mysql,mwoauth,html]
249 | COPY --chown=tools.paws:tools.paws user-config.py /srv/paws/
250 | COPY --chown=tools.paws:tools.paws user-fixes.py /srv/paws/
251 | 
252 | 
253 | COPY install-extensions /usr/local/bin/
254 | RUN /usr/local/bin/install-extensions
255 | 
256 | COPY banner /etc/bash.bashrc
257 | 
258 | # use custom css to hide clusters tab
259 | COPY --chown=tools.paws:tools.paws hide_clusters_tab.css /home/paws/.jupyter/custom/custom.css
260 | 
261 | EXPOSE 8888
262 | 


--------------------------------------------------------------------------------
/images/singleuser/banner:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Make history more useful and permanent!
 4 | # Stolen from http://mywiki.wooledge.org/BashFAQ/088
 5 | shopt -s histappend
 6 | HISTFILESIZE=400000000
 7 | HISTSIZE=10000
 8 | PROMPT_COMMAND="history -a"
 9 | 
10 | # PS1
11 | PS1='${JPY_USER}@PAWS:\w$ '
12 | 
13 | echo "Welcome to PAWS!"
14 | echo "Please behave responsibly"
15 | echo "Getting Started: https://www.mediawiki.org/wiki/Manual:Pywikibot/PAWS"
16 | echo "Questions? Need help? Find us on #wikimedia-cloud on IRC on libera.chat!"
17 | echo "File bugs at https://phabricator.wikimedia.org/maniphest/task/create/?projects=PAWS"
18 | 


--------------------------------------------------------------------------------
/images/singleuser/hide_clusters_tab.css:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Placeholder for custom user CSS
 3 |  * mainly to be overriden in profile/static/custom/custom.css
 4 |  * This will always be an empty file in IPython
 5 |  */
 6 | 
 7 | /* Hide clusters tab */
 8 | .nav > li > .clusters_tab_link {
 9 | 	display: none;
10 | }
11 | 


--------------------------------------------------------------------------------
/images/singleuser/install-extensions:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #set -euo pipefail
 3 | # we're getting closer to the end of life for setup.py pipefail now fails on it.
 4 | # To keep labpawspublic working we should update to use wheel
 5 | 
 6 | pip install --no-cache-dir \
 7 |     pythreejs \
 8 |     ipyleaflet \
 9 |     bqplot \
10 |     RISE \
11 |     py-heat-magic \
12 |     jupyter-resource-usage \
13 |     git+https://github.com/toolforge/labpawspublic@10ba684789ff5b6e0e2e2122ce68be101266acb8 \
14 | #    git+https://github.com/toolforge/paws-favicon@v1.0 \
15 | 
16 | # Rebuild JupyterLab for plotly-dash extension
17 | # Should be fixed by https://github.com/plotly/jupyter-dash/issues/49
18 | jupyter lab build
19 | 
20 | pip install --no-cache-dir git+https://github.com/toolforge/ipynb-paws@147bbeb4dda35e
21 | 


--------------------------------------------------------------------------------
/images/singleuser/install-julia:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # from: https://github.com/berkeley-dsep-infra/datahub/blob/staging/deployments/julia/image/install-julia.bash
 4 | set -euo pipefail
 5 | 
 6 | export JULIA_PATH=/srv/julia/
 7 | export JUPYTER_DATA_DIR=${VENV_DIR}/share/jupyter/
 8 | 
 9 | curl --silent --location --fail https://julialang-s3.julialang.org/bin/linux/x64/1.10/julia-1.10.2-linux-x86_64.tar.gz | tar xvz -C ${JULIA_PATH} --strip-components=1
10 | julia -e 'using Pkg; Pkg.add("IJulia"); Pkg.build("IJulia"); using IJulia; installkernel("Julia");'
11 | 


--------------------------------------------------------------------------------
/images/singleuser/install-sql-tool:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | curl https://gerrit.wikimedia.org/r/plugins/gitiles/labs/toollabs/+/refs/heads/master/misctools/sql?format=TEXT | base64 -d > /srv/paws/bin/sql
4 | chmod 755 /srv/paws/bin/sql
5 | 


--------------------------------------------------------------------------------
/images/singleuser/r/Rprofile.site:
--------------------------------------------------------------------------------
 1 | # Use RStudio's CRAN mirror to get binary packages
 2 | # 'latest' just means it has all available versions.
 3 | # We can specify version numbers in devtools::install_version
 4 | options(repos = c(CRAN = "https://packagemanager.rstudio.com/all/__linux__/noble/latest"))
 5 | 
 6 | # RStudio's CRAN mirror needs this to figure out which binary package to serve.
 7 | # If not set properly, it will just serve up source packages
 8 | # Quite hilarious, IMO.
 9 | # See https://docs.rstudio.com/rspm/admin/binaries.html
10 | options(HTTPUserAgent = sprintf("R/%s R (%s)", getRversion(), paste(getRversion(), R.version$platform, R.version$arch, R.version$os)))
11 | 


--------------------------------------------------------------------------------
/images/singleuser/r/cran.gpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/toolforge/paws/9c42a38a368b3ad460fd122e60851c67335c3ed2/images/singleuser/r/cran.gpg


--------------------------------------------------------------------------------
/images/singleuser/r/cran.list:
--------------------------------------------------------------------------------
1 | deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/cran.gpg] https://cloud.r-project.org/bin/linux/ubuntu noble-cran40/
2 | 


--------------------------------------------------------------------------------
/images/singleuser/r/rsession.conf:
--------------------------------------------------------------------------------
1 | # Use binary packages!
2 | r-cran-repos=https://packagemanager.rstudio.com/all/__linux__/noble/latest
3 | 


--------------------------------------------------------------------------------
/images/singleuser/requirements.txt:
--------------------------------------------------------------------------------
 1 | # Mediawiki related stuff
 2 | mwapi
 3 | mwdb
 4 | mwxml
 5 | mwreverts
 6 | mwsessions
 7 | mwdiffs
 8 | mwoauth
 9 | mwtypes
10 | mwpersistence
11 | mwparserfromhell
12 | git+https://github.com/yuvipanda/python-wdqs.git
13 | 
14 | # visualization libraries
15 | ipywidgets
16 | bokeh
17 | matplotlib
18 | seaborn
19 | ipyleaflet
20 | pythreejs
21 | bqplot
22 | plotly
23 | holoviews[recommended]
24 | streamlit
25 | 
26 | # scientific stuff
27 | pandas
28 | scipy
29 | scikit-learn
30 | 
31 | # Scraping
32 | lxml
33 | beautifulsoup4
34 | 
35 | # SQL!
36 | pymysql
37 | mycli
38 | wmpaws
39 | 
40 | # Web dev stuff
41 | flask
42 | fastapi
43 | 
44 | # mwpersistence has a dep which pulls in PyYAML 5.4.1, which has
45 | # packaging issues with Cython 3.0.0 (also present in PyYAML 6.0.0).
46 | # Pin a higher version.
47 | pyyaml>6.0.0
48 | 


--------------------------------------------------------------------------------
/images/singleuser/user-config.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | mylang = "test"
 4 | family = "wikipedia"
 5 | 
 6 | custom_path = os.path.expanduser("~/user-config.py")
 7 | if os.path.exists(custom_path):
 8 |     with open(custom_path, "rb") as f:
 9 |         exec(compile(f.read(), custom_path, "exec"), globals())
10 | 
11 |     del f
12 | # Clean up temp variables, since pwb issues a warning otherwise
13 | # to help people catch misspelt config
14 | del custom_path
15 | 
16 | # Things that should be non-easily-overridable
17 | for fam in (
18 |     "wikipedia",
19 |     "commons",
20 |     "meta",
21 |     "wikibooks",
22 |     "wikimedia",
23 |     "wikiquote",
24 |     "wikisource",
25 |     "wikisource",
26 |     "wiktionary",
27 |     "wikiversity",
28 |     "wikidata",
29 |     "mediawiki",
30 |     "wikivoyage",
31 |     "wikinews",
32 |     "species",
33 |     "wikifunctions",
34 |     "wikitech",
35 | ):
36 |     usernames[fam]["*"] = os.environ["USER"]  # noqa: F821
37 | 
38 | del fam
39 | 
40 | # If OAuth integration is available, take it
41 | if "CLIENT_ID" in os.environ:
42 |     authenticate["*"] = (  # noqa: F821
43 |         os.environ["CLIENT_ID"],
44 |         os.environ["CLIENT_SECRET"],
45 |         os.environ["ACCESS_KEY"],
46 |         os.environ["ACCESS_SECRET"],
47 |     )
48 | 


--------------------------------------------------------------------------------
/images/singleuser/user-fixes.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | custom_path = os.path.expanduser("~/user-fixes.py")
 4 | if os.path.exists(custom_path):
 5 |     with open(custom_path, "rb") as f:
 6 |         exec(compile(f.read(), custom_path, "exec"), globals())
 7 | 
 8 |     del f
 9 | 
10 | # Clean up temp variables, since pwb issues a warning otherwise
11 | # to help people catch misspelt config
12 | del custom_path
13 | 


--------------------------------------------------------------------------------
/paws/.helmignore:
--------------------------------------------------------------------------------
 1 | # Patterns to ignore when building packages.
 2 | # This supports shell glob matching, relative path matching, and
 3 | # negation (prefixed with !). Only one pattern per line.
 4 | .DS_Store
 5 | # Common VCS dirs
 6 | .git/
 7 | .gitignore
 8 | .bzr/
 9 | .bzrignore
10 | .hg/
11 | .hgignore
12 | .svn/
13 | # Common backup files
14 | *.swp
15 | *.bak
16 | *.tmp
17 | *~
18 | # Various IDEs
19 | .project
20 | .idea/
21 | *.tmproj
22 | 


--------------------------------------------------------------------------------
/paws/Chart.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | 
 3 | apiVersion: v2
 4 | description: A Helm chart for PAWS
 5 | name: paws
 6 | version: 3.0.0
 7 | dependencies:
 8 |   - name: jupyterhub
 9 |     version: 3.3.8
10 |     repository: "https://jupyterhub.github.io/helm-chart"
11 | 


--------------------------------------------------------------------------------
/paws/codfw-secrets.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/toolforge/paws/9c42a38a368b3ad460fd122e60851c67335c3ed2/paws/codfw-secrets.yaml


--------------------------------------------------------------------------------
/paws/codfw.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | 
 3 | mysql:
 4 |   domain: analytics.db.svc.eqiad.wmflabs
 5 |   # TODO: remove this when the multiinstance replica proxy is removed
 6 |   host: enwiki.analytics.db.svc.eqiad.wmflabs
 7 | jupyterhub:
 8 |   hub:
 9 |     extraVolumes:
10 |       - name: homes
11 |         nfs:
12 |           server: pawsdev-nfs.pawsdev.codfw1dev.wikimedia.cloud
13 |           path: /srv/paws/project
14 |       - name: dumps
15 |         nfs:
16 |           server: pawsdev-nfs.pawsdev.codfw1dev.wikimedia.cloud
17 |           path: /
18 |       # Without this, dumps becomes inaccessible and can hang the host
19 |       - name: dumps-src1
20 |         nfs:
21 |           server: pawsdev-nfs.pawsdev.codfw1dev.wikimedia.cloud
22 |           path: /
23 |       - name: dumps-src2
24 |         nfs:
25 |           server: pawsdev-nfs.pawsdev.codfw1dev.wikimedia.cloud
26 |           path: /
27 |     extraConfig:
28 |       00-myConfig: |
29 |           localdev = False
30 |           nfs_home = 'pawsdev-nfs.pawsdev.codfw1dev.wikimedia.cloud'
31 |           dumps_src1 = 'pawsdev-nfs.pawsdev.codfw1dev.wikimedia.cloud'
32 |           dumps_src2 = 'pawsdev-nfs.pawsdev.codfw1dev.wikimedia.cloud'
33 |   ingress:
34 |     enabled: true
35 |     hosts:
36 |       - hub-paws-dev.codfw1dev.wmcloud.org
37 |   singleuser:
38 |     extraEnv:
39 |       HUB_DOMAIN: "hub-paws-dev.codfw1dev.wmcloud.org"  # Check jupyterhub.ingress.hosts
40 | minesweeper:
41 |   enabled: true
42 | localdev:
43 |   enabled: false
44 | pawspublic:
45 |   nbserve:
46 |     requests:
47 |       memory: "200Mi"
48 |   ingress:
49 |     host: public-paws-dev.codfw1dev.wmcloud.org
50 |   renderer:
51 |     requests:
52 |       cpu: "50m"
53 |       memory: "10Mi"
54 | 


--------------------------------------------------------------------------------
/paws/files/minesweeper/minesweeper.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | """
  3 | minesweeper script
  4 | 
  5 | Continuous process, on each node via DaemonSet,
  6 | to identify processes that could be considered for termination:
  7 | 
  8 | - determine which processes are "suspicious" (see herorat.py)
  9 | - produce report on suspicious pods:
 10 |     - show running processes (`ps aux`)
 11 |     - tail pod logs
 12 | - automatically terminate pods likely to be abuse, etc.
 13 | 
 14 | Downloaded originally from https://raw.githubusercontent.com/jupyterhub/mybinder.org-deploy/6364f6e2ad3fefd8fa00937749f456528489ec76/mybinder/files/minesweeper/minesweeper.py
 15 | 
 16 | """
 17 | 
 18 | import asyncio
 19 | import copy
 20 | import glob
 21 | import json
 22 | import os
 23 | import pprint
 24 | import re
 25 | import socket
 26 | import sys
 27 | import threading
 28 | from concurrent.futures import ThreadPoolExecutor
 29 | from functools import partial
 30 | from operator import attrgetter
 31 | from textwrap import indent
 32 | 
 33 | import kubernetes.client
 34 | import kubernetes.config
 35 | from kubernetes.stream import stream
 36 | 
 37 | import psutil
 38 | 
 39 | # herorat located in secrets/minesweeper/
 40 | import herorat
 41 | from herorat import inspect_pod
 42 | from herorat import inspect_process
 43 | 
 44 | 
 45 | kubernetes.config.load_incluster_config()
 46 | kube = kubernetes.client.CoreV1Api()
 47 | local = threading.local()
 48 | config = {}
 49 | hostname = os.environ.get("NODE_NAME", socket.gethostname())
 50 | 
 51 | default_config = {
 52 |     "userid": 1000,
 53 |     "inspect_procs_without_pod": False,
 54 |     "threads": 8,
 55 |     "interval": 300,
 56 |     "namespace": os.environ.get("NAMESPACE", "default"),
 57 |     "pod_selectors": {
 58 |         "label_selector": "component=singleuser-server",
 59 |         "field_selector": f"spec.nodeName={hostname}",
 60 |     },
 61 |     "log_tail_lines": 100,
 62 |     # process attributes to retrieve
 63 |     # see psutil.as_dict docs for available fields:
 64 |     # https://psutil.readthedocs.io/en/latest/#psutil.Process.as_dict
 65 |     "proc_attrs": [
 66 |         "cmdline",
 67 |         "cpu_percent",
 68 |         "cpu_times",
 69 |         "exe",
 70 |         "memory_info",
 71 |         "name",
 72 |         "pid",
 73 |         "ppid",
 74 |         "status",
 75 |         "uids",
 76 |     ],
 77 | }
 78 | 
 79 | default_config.update(herorat.default_config)
 80 | 
 81 | 
 82 | def get_kube():
 83 |     """Get thread-local kubernetes client
 84 | 
 85 |     kubernetes client objects aren't threadsafe, I guess
 86 |     """
 87 |     if not hasattr(local, "kube"):
 88 |         local.kube = kubernetes.client.CoreV1Api()
 89 |     return local.kube
 90 | 
 91 | 
 92 | class Proc(dict):
 93 |     """Proc is a dict subclass with attribute-access for keys
 94 | 
 95 |     suspicious and should_terminate are added via inspection.
 96 |     They can be booleans or truthy strings explaining
 97 |     why they are suspicious or should be terminated.
 98 |     """
 99 | 
100 |     def __init__(self, **kwargs):
101 |         kwargs.setdefault("suspicious", False)
102 |         kwargs.setdefault("should_terminate", False)
103 |         super().__init__(**kwargs)
104 | 
105 |         # secondary derived fields
106 |         # cmd is the command-line as a single string
107 |         self["cmd"] = " ".join(self["cmdline"])
108 |         # cpu_total is the sum of cpu times (user, system, children, etc.)
109 |         self["cpu_total"] = sum(kwargs.get("cpu_times", []))
110 | 
111 |     def __repr__(self):
112 |         key_fields = ", ".join(
113 |             [
114 |                 f"{key}={self.get(key)}"
115 |                 for key in [
116 |                     "pid",
117 |                     "status",
118 |                     "suspicious",
119 |                     "should_terminate",
120 |                     "cmd",
121 |                 ]
122 |                 if self.get(key) is not None
123 |             ]
124 |         )
125 |         return f"{self.__class__.__name__}({key_fields})"
126 | 
127 |     def __getattr__(self, key):
128 |         return self[key]
129 | 
130 |     def __setattr__(self, key, value):
131 |         self[key] = value
132 | 
133 | 
134 | def get_procs(userid):
135 |     """Get all container processes running with a given user id"""
136 |     procs = []
137 |     for p in psutil.process_iter(attrs=config["proc_attrs"]):
138 |         # TODO: should we filter to userid?
139 |         # For now: skip userid filtering, because we
140 |         # want to catch all processes in pods, even if they
141 |         # ran setuid
142 |         # if p.info["uids"].real != userid:
143 |         #     continue
144 |         if not p.info["cmdline"]:
145 |             # ignore empty commands, e.g. kernel processes
146 |             continue
147 | 
148 |         proc = Proc(**p.info)
149 |         procs.append(proc)
150 | 
151 |     procs = sorted(procs, key=attrgetter("cpu_percent"), reverse=True)
152 |     return procs
153 | 
154 | 
155 | def get_pods():
156 |     """Get all the pods in our namespace"""
157 |     kube = get_kube()
158 |     namespace = config["namespace"]
159 |     # _preload_content=False doesn't even json-parse list results??
160 |     resp = kube.list_namespaced_pod(
161 |         namespace,
162 |         _preload_content=False,
163 |         **config["pod_selectors"],
164 |     )
165 |     return json.loads(resp.read().decode("utf8"))["items"]
166 | 
167 | 
168 | def pods_by_uid(pods):
169 |     """Construct a dict of pods, keyed by pod uid"""
170 |     return {pod["metadata"]["uid"]: pod for pod in pods}
171 | 
172 | 
173 | def get_all_pod_uids():
174 |     """Return mapping of pid to pod uid"""
175 | 
176 |     pod_uids = {}
177 |     for cgroup_file in glob.glob("/proc/[0-9]*/cgroup"):
178 |         pid = int(cgroup_file.split("/")[-2])
179 | 
180 |         try:
181 |             with open(cgroup_file) as f:
182 |                 cgroups = f.read()
183 | 
184 |         except FileNotFoundError:
185 |             # process deleted, ignore
186 |             continue
187 | 
188 |         m = re.search("/pod([^/]+)", cgroups)
189 |         if m is None:
190 |             # not a pod proc
191 |             continue
192 |         pod_uids[pid] = m.group(1)
193 |     return pod_uids
194 | 
195 | 
196 | def associate_pods_procs(pods, procs):
197 |     """Associate pods and processes
198 |     For all pods, defines pod["minesweeper"]["procs"] = list_of_procs_in_pod
199 | 
200 |     Returns (pods, procs_without_pods)
201 |     """
202 |     for pod in pods.values():
203 |         pod["minesweeper"] = {
204 |             "procs": [],
205 |         }
206 |     procs_without_pods = []
207 |     pod_uids = get_all_pod_uids()
208 |     for proc in procs:
209 |         pod_uid = pod_uids.get(proc.pid)
210 |         pod = pods.get(pod_uid)
211 |         if not pod:
212 |             procs_without_pods.append(proc)
213 |         else:
214 |             pod["minesweeper"]["procs"].append(proc)
215 | 
216 |     return pods, procs_without_pods
217 | 
218 | 
219 | def ps_pod(pod, userid=1000):
220 |     """Get ps output from a single pod"""
221 |     kube = get_kube()
222 |     try:
223 |         client = stream(
224 |             kube.connect_get_namespaced_pod_exec,
225 |             pod["metadata"]["name"],
226 |             namespace=pod["metadata"]["namespace"],
227 |             command=["ps", "aux"],
228 |             stderr=True,
229 |             stdin=False,
230 |             stdout=True,
231 |             _preload_content=False,
232 |         )
233 |         client.run_forever(timeout=60)
234 |         stderr = client.read_stderr()
235 |         if stderr.strip():
236 |             print(f"err! {stderr}", file=sys.stderr)
237 |         stdout = client.read_stdout()
238 | 
239 |         returncode = client.returncode
240 |         if returncode:
241 |             raise RuntimeError(f"stdout={stdout}\nstderr={stderr}")
242 |         return stdout
243 |     except Exception as e:
244 |         return f"Error reporting on ps in {pod['metadata']['name']}: {e}"
245 | 
246 | 
247 | def log_pod(pod):
248 |     """Return the logs for a suspicious pod"""
249 |     kube = get_kube()
250 |     try:
251 |         return kube.read_namespaced_pod_log(
252 |             pod["metadata"]["name"],
253 |             namespace=pod["metadata"]["namespace"],
254 |             tail_lines=config["log_tail_lines"],
255 |         )
256 |     except Exception as e:
257 |         return f"Error collecting logs for {pod['metadata']['name']}: {e}"
258 | 
259 | 
260 | async def report_pod(pod):
261 |     """Produce a report on a single pod"""
262 |     pod_name = pod["metadata"]["name"]
263 |     ps_future = in_pool(lambda: ps_pod(pod))
264 |     logs_future = in_pool(lambda: log_pod(pod))
265 |     ps, logs = await asyncio.gather(ps_future, logs_future)
266 |     print(
267 |         "\n".join(
268 |             [
269 |                 pod_name,
270 |                 f"ps {pod_name}:",
271 |                 indent(ps, "    "),
272 |                 f"logs {pod_name}:",
273 |                 indent(logs, "    "),
274 |             ]
275 |         )
276 |     )
277 | 
278 | 
279 | def terminate_pod(pod):
280 |     """Call in a thread to terminate a pod"""
281 |     namespace = pod["metadata"]["namespace"]
282 |     name = pod["metadata"]["name"]
283 |     print(f"Deleting pod {name}")
284 |     kube = get_kube()
285 |     kube.delete_namespaced_pod(name=name, namespace=namespace)
286 | 
287 | 
288 | async def node_report(pods=None, userid=1000):
289 |     """Print a report of suspicious processes on a single node"""
290 |     if pods is None:
291 |         pods = pods_by_uid(await in_pool(get_pods))
292 |     procs = await in_pool(lambda: get_procs(userid))
293 |     print(f"Total processes for {hostname}: {len(procs)}\n", end="")
294 |     pods, procs_without_pod = associate_pods_procs(pods, procs)
295 | 
296 |     # inspect all procs in our pods
297 |     user_procs = []
298 |     for pod in pods.values():
299 |         user_procs.extend(pod["minesweeper"]["procs"])
300 |         pod["minesweeper"]["procs"] = [
301 |             inspect_process(p) for p in pod["minesweeper"]["procs"]
302 |         ]
303 |     print(f"Total user pods for {hostname}: {len(pods)}\n", end="")
304 |     print(f"Total user processes for {hostname}: {len(user_procs)}\n", end="")
305 |     suspicious_pods = [pod for pod in pods.values() if inspect_pod(pod)["suspicious"]]
306 | 
307 |     print(f"Pods of interest for {hostname}: {len(suspicious_pods)}")
308 | 
309 |     # report on all suspicious pods
310 |     report_futures = []
311 |     for pod in suspicious_pods:
312 |         fut = asyncio.ensure_future(report_pod(pod))
313 |         report_futures.append(fut)
314 |         await asyncio.sleep(0)
315 | 
316 |     # report on suspicious processes with no matching pod
317 |     suspicious_procs_without_pod = []
318 |     if config["inspect_procs_without_pod"]:
319 |         procs_without_pod = [inspect_process(p) for p in procs_without_pod]
320 |         suspicious_procs_without_pod = [p for p in procs_without_pod if p.suspicious]
321 | 
322 |     if suspicious_procs_without_pod:
323 |         print(
324 |             f"No pods found for {len(suspicious_procs_without_pod)} suspicious processes on {hostname}:"
325 |         )
326 |         for proc in suspicious_procs_without_pod:
327 |             print(f"  {proc.pid}: {proc.cmd}")
328 | 
329 |     if report_futures:
330 |         await asyncio.gather(*report_futures)
331 | 
332 |     # finally, terminate pods that meet the immediate termination condition
333 |     pods_to_terminate = [
334 |         pod for pod in suspicious_pods if pod["minesweeper"]["should_terminate"]
335 |     ]
336 |     if pods_to_terminate:
337 |         terminate_futures = [
338 |             in_pool(partial(terminate_pod, pod)) for pod in pods_to_terminate
339 |         ]
340 |         await asyncio.gather(*terminate_futures)
341 | 
342 | 
343 | def get_pool(n=None):
344 |     """Get the global thread pool executor"""
345 |     if get_pool._pool is None:
346 |         get_pool._pool = ThreadPoolExecutor(config["threads"])
347 |     return get_pool._pool
348 | 
349 | 
350 | get_pool._pool = None
351 | 
352 | 
353 | def in_pool(func):
354 |     f = get_pool().submit(func)
355 |     return asyncio.wrap_future(f)
356 | 
357 | 
358 | def load_config():
359 |     """load config from mounted config map
360 | 
361 |     may change during run, so reload from file each time
362 |     """
363 |     global config
364 |     prior_config = copy.deepcopy(config)
365 |     config.update(default_config)
366 |     config_file = "/etc/minesweeper/minesweeper.json"
367 |     if os.path.isfile(config_file):
368 |         with open(config_file) as f:
369 |             file_config = json.load(f)
370 |         config.update(file_config)
371 |         # sync global config with herorat
372 |         herorat.config = config
373 |     else:
374 |         print(f"No such file: {config_file}")
375 | 
376 |     if config != prior_config:
377 |         print("Loaded config:")
378 |         pprint.pprint(config)
379 | 
380 |     return config
381 | 
382 | 
383 | async def main():
384 |     """Main entrypoint: run node_report periodically forever"""
385 |     while True:
386 |         # reload since configmap can change
387 |         load_config()
388 |         await node_report(userid=config["userid"])
389 |         await asyncio.sleep(config["interval"])
390 | 
391 | 
392 | if __name__ == "__main__":
393 |     asyncio.run(main())
394 | 


--------------------------------------------------------------------------------
/paws/files/minesweeper/secrets/ban.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/toolforge/paws/9c42a38a368b3ad460fd122e60851c67335c3ed2/paws/files/minesweeper/secrets/ban.py


--------------------------------------------------------------------------------
/paws/files/minesweeper/secrets/herorat.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/toolforge/paws/9c42a38a368b3ad460fd122e60851c67335c3ed2/paws/files/minesweeper/secrets/herorat.py


--------------------------------------------------------------------------------
/paws/files/minesweeper/secrets/minesweeper.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/toolforge/paws/9c42a38a368b3ad460fd122e60851c67335c3ed2/paws/files/minesweeper/secrets/minesweeper.yaml


--------------------------------------------------------------------------------
/paws/production.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | 
 3 | mysql:
 4 |   domain: analytics.db.svc.eqiad.wmflabs
 5 |   # TODO: remove this when the multiinstance replica proxy is removed
 6 |   host: enwiki.analytics.db.svc.eqiad.wmflabs
 7 | jupyterhub:
 8 |   hub:
 9 |     extraVolumes:
10 |       - name: homes
11 |         nfs:
12 |           server: paws-nfs.svc.paws.eqiad1.wikimedia.cloud
13 |           path: /srv/paws/project
14 |       - name: dumps
15 |         nfs:
16 |           server: clouddumps1002.wikimedia.org
17 |           path: /
18 |       # Without this, dumps becomes inaccessible and can hang the host
19 |       - name: dumps-src1
20 |         nfs:
21 |           server: clouddumps1001.wikimedia.org
22 |           path: /
23 |       - name: dumps-src2
24 |         nfs:
25 |           server: clouddumps1002.wikimedia.org
26 |           path: /
27 |     extraConfig:
28 |       00-myConfig: |
29 |           localdev = False
30 |           nfs_home = 'paws-nfs.svc.paws.eqiad1.wikimedia.cloud'
31 |           dumps_src1 = 'clouddumps1001.wikimedia.org'
32 |           dumps_src2 = 'clouddumps1002.wikimedia.org'
33 |   ingress:
34 |     enabled: true
35 |     hosts:
36 |       - hub-paws.wmcloud.org
37 |   singleuser:
38 |     extraEnv:
39 |       HUB_DOMAIN: "hub-paws.wmcloud.org"  # Check jupyterhub.ingress.hosts
40 | minesweeper:
41 |   enabled: true
42 | localdev:
43 |   enabled: false
44 | pawspublic:
45 |   nbserve:
46 |     requests:
47 |       memory: "2000Mi"
48 |   ingress:
49 |     host: public-paws.wmcloud.org
50 |   renderer:
51 |     requests:
52 |       cpu: "500m"
53 |       memory: "1000Mi"
54 | 


--------------------------------------------------------------------------------
/paws/secrets.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/toolforge/paws/9c42a38a368b3ad460fd122e60851c67335c3ed2/paws/secrets.yaml


--------------------------------------------------------------------------------
/paws/templates/NOTES.txt:
--------------------------------------------------------------------------------
 1 | Thank you for installing {{ .Chart.Name }}!
 2 | 
 3 | An essential part of setting up your dev environment is adding a hosts file entry.
 4 | Get the IP of minikube with:
 5 | minikube ip
 6 | Add these lines to your hosts file:
 7 | <ip address> {{ index .Values.jupyterhub.ingress.hosts 0 }}
 8 | <ip address> {{ .Values.pawspublic.ingress.host }}
 9 | 
10 | You can override any values you need in a file called dev-values.yaml.
11 | 
12 | If you create one, run:
13 | helm -n {{ .Release.Namespace }} upgrade {{ .Release.Name }} paws/ -f dev-values.yaml
14 | Happy hacking.
15 | 


--------------------------------------------------------------------------------
/paws/templates/frontpage.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | 
 3 | {{ if .Values.paws.frontPageEnabled }}
 4 | apiVersion: networking.k8s.io/v1
 5 | kind: Ingress
 6 | metadata:
 7 |   labels:
 8 |     name: paws-front-page
 9 |     ingress.paws.wmcloud.org: front
10 |   annotations:
11 |     nginx.ingress.kubernetes.io/permanent-redirect: {{ .Values.paws.ingress.frontRedirect | quote }}
12 |   name: paws-front-page
13 | spec:
14 |   ingressClassName: nginx
15 |   rules:
16 |     - host: {{ .Values.paws.ingress.frontHost | quote }}
17 | {{ end }}
18 | 


--------------------------------------------------------------------------------
/paws/templates/localdev.yaml:
--------------------------------------------------------------------------------
 1 | {{ if .Values.localdev.enabled }}
 2 | # We assume this is only enabled in a local dev environment, so...
 3 | ---
 4 | apiVersion: v1
 5 | kind: PersistentVolume
 6 | metadata:
 7 |   name: userhomes
 8 | spec:
 9 |   accessModes:
10 |     - ReadWriteMany
11 |   capacity:
12 |     storage: 1Gi
13 |   hostPath:
14 |     path: /srv/paws/project/paws/userhomes
15 | ---
16 | apiVersion: v1
17 | kind: PersistentVolume
18 | metadata:
19 |   name: dumps1
20 | spec:
21 |   accessModes:
22 |     - ReadOnlyMany
23 |   capacity:
24 |     storage: 1Gi
25 |   hostPath:
26 |     path: /mnt/nfs/dumps-clouddumps1001.wikimedia.org
27 | ---
28 | kind: PersistentVolume
29 | apiVersion: v1
30 | metadata:
31 |   name: dumps2
32 | spec:
33 |   accessModes:
34 |     - ReadOnlyMany
35 |   capacity:
36 |     storage: 1Gi
37 |   hostPath:
38 |     path: /mnt/nfs/dumps-clouddumps1002.wikimedia.org
39 | ---
40 | kind: PersistentVolume
41 | apiVersion: v1
42 | metadata:
43 |   name: dumps
44 | spec:
45 |   accessModes:
46 |     - ReadOnlyMany
47 |   capacity:
48 |     storage: 1Gi
49 |   hostPath:
50 |     path: /mnt/public/dumps
51 | ---
52 | apiVersion: batch/v1
53 | kind: Job
54 | metadata:
55 |   name: scary-host-vol-perm-fix
56 |   labels:
57 |     app.kubernetes.io/managed-by: {{ .Release.Service | quote }}
58 |     app.kubernetes.io/instance: {{ .Release.Name | quote }}
59 |   annotations:
60 |     "helm.sh/hook": post-install
61 |     "helm.sh/hook-weight": "-1"
62 |     "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
63 | spec:
64 |   template:
65 |     metadata:
66 |       name: {{ printf "permission-fix-%s" .Release.Name | quote }}
67 |       labels:
68 |         app.kubernetes.io/managed-by: {{ .Release.Service | quote }}
69 |         app.kubernetes.io/instance: {{ .Release.Name | quote }}
70 |         helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}"
71 |     spec:
72 |       restartPolicy: Never
73 |       containers:
74 |         - name: fixingperms-host-vols
75 |           image: {{ tpl .Values.localdev.image.template . | quote }}
76 |           command:
77 |             - chown
78 |             - -R
79 |             - 52771:52771
80 |             - /data/project/paws/userhomes
81 |           volumeMounts:
82 |             - mountPath: /data/project/paws/userhomes
83 |               name: security-disaster-only-for-minikube
84 |       volumes:
85 |         - name: security-disaster-only-for-minikube
86 |           hostPath:
87 |             path: /srv/paws/project/paws/userhomes
88 | {{ end }}
89 | 


--------------------------------------------------------------------------------
/paws/templates/minesweeper/configmap.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | 
 3 | {{- /* configmap for minesweeper source files */}}
 4 | kind: ConfigMap
 5 | apiVersion: v1
 6 | metadata:
 7 |   name: minesweeper-src
 8 |   labels:
 9 |     app: minesweeper
10 |     component: minesweeper-src
11 |     heritage: {{ .Release.Service | quote }}
12 |     release: {{ .Release.Name | quote }}
13 | data:
14 | {{- (.Files.Glob "files/minesweeper/*").AsConfig | nindent 2 }}
15 | {{- (.Files.Glob "files/minesweeper/secrets/*").AsConfig | nindent 2 }}
16 | {{- /* configmap for minesweeper configuration from values */}}
17 | ---
18 | apiVersion: v1
19 | kind: ConfigMap
20 | metadata:
21 |   name: minesweeper-config
22 |   labels:
23 |     app: minesweeper
24 |     component: minesweeper-config
25 |     heritage: {{ .Release.Service }}
26 |     release: {{ .Release.Name }}
27 | data:
28 |   "minesweeper.json": {{ toJson .Values.minesweeper | quote }}
29 | 


--------------------------------------------------------------------------------
/paws/templates/minesweeper/daemonset.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | 
 3 | {{- if .Values.minesweeper.enabled -}}
 4 | apiVersion: apps/v1
 5 | kind: DaemonSet
 6 | metadata:
 7 |   name: minesweeper
 8 | spec:
 9 |   selector:
10 |     matchLabels:
11 |       name: minesweeper
12 |       release: {{ .Release.Name }}
13 |   template:
14 |     metadata:
15 |       labels:
16 |         name: minesweeper
17 |         app: binder
18 |         component: minesweeper
19 |         release: {{ .Release.Name }}
20 |         heritage: {{ .Release.Service }}
21 |       annotations:
22 |         checksum/configmap: {{ include (print $.Template.BasePath "/minesweeper/configmap.yaml") . | sha256sum }}
23 | 
24 |     spec:
25 |       hostPID: true
26 |       securityContext:
27 |         # run as same uid as user pods to limit privileges
28 |         # we may need to run as root in the future
29 |         # to access certain fields.
30 |         # we could limit further with runAsUser: nobody
31 |         # but that takes away some fields we use
32 |         runAsUser: {{ .Values.jupyterhub.singleuser.uid }}
33 |         # root group gets us read-only access to what we want for now
34 |         runAsGroup: 0
35 |       tolerations:
36 |         - effect: NoSchedule
37 |           key: hub.jupyter.org/dedicated
38 |           operator: Equal
39 |           value: user
40 |         - effect: NoSchedule
41 |           key: hub.jupyter.org_dedicated
42 |           operator: Equal
43 |           value: user
44 |       serviceAccountName: minesweeper
45 | 
46 |       {{- with .Values.imagePullSecrets }}
47 |       imagePullSecrets:
48 |       {{- . | toYaml | nindent 8 }}
49 |       {{- end }}
50 | 
51 |       containers:
52 |         - name: minesweeper
53 |           image: {{ tpl .Values.minesweeper.image.template . | quote}}
54 |           securityContext:
55 |             # we are running with hostPID, but want extremely limited capabilities
56 |             capabilities:
57 |               drop:
58 |                 - all
59 |         {{- with .Values.minesweeper.resources }}
60 |           resources: {{ toJson . }}
61 |         {{- end }}
62 |           volumeMounts:
63 |             - name: config
64 |               mountPath: /etc/minesweeper
65 |               readOnly: true
66 |             - name: src
67 |               mountPath: /srv/minesweeper
68 |               readOnly: true
69 |           command:
70 |             - python3
71 |             - /srv/minesweeper/minesweeper.py
72 |           env:
73 |             - name: NODE_NAME
74 |               valueFrom:
75 |                 fieldRef:
76 |                   fieldPath: spec.nodeName
77 |             - name: NAMESPACE
78 |               value: {{ .Release.Namespace }}
79 |       terminationGracePeriodSeconds: 0
80 |       volumes:
81 |         - name: src
82 |           configMap:
83 |             name: minesweeper-src
84 |         - name: config
85 |           configMap:
86 |             name: minesweeper-config
87 | {{- end }}
88 | 


--------------------------------------------------------------------------------
/paws/templates/minesweeper/rbac.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | kind: Role
 3 | apiVersion: rbac.authorization.k8s.io/v1
 4 | metadata:
 5 |   name: minesweeper
 6 | rules:
 7 |   # needs read/exec/logs/delete access
 8 |   - apiGroups: [""]
 9 |     resources: ["pods"]
10 |     verbs: ["get", "watch", "list", "create", "delete"]
11 |   - apiGroups: [""]
12 |     resources: ["pods/log"]
13 |     verbs: ["get"]
14 |   - apiGroups: [""]
15 |     resources: ["pods/exec"]
16 |     verbs: ["create", "get"]
17 | ---
18 | kind: RoleBinding
19 | apiVersion: rbac.authorization.k8s.io/v1
20 | metadata:
21 |   name: minesweeper
22 | subjects:
23 |   - kind: ServiceAccount
24 |     namespace: {{ .Release.Namespace }}
25 |     name: minesweeper
26 | roleRef:
27 |   kind: Role
28 |   name: minesweeper
29 |   apiGroup: rbac.authorization.k8s.io
30 | ---
31 | apiVersion: v1
32 | kind: ServiceAccount
33 | metadata:
34 |   name: minesweeper
35 | 


--------------------------------------------------------------------------------
/paws/templates/nbserve/nginx.yaml:
--------------------------------------------------------------------------------
  1 | ---
  2 | 
  3 | apiVersion: v1
  4 | kind: ConfigMap
  5 | metadata:
  6 |   name: nbserve-nginx
  7 | data:
  8 |   nginx.conf: |
  9 |     # Let nginx automatically determine the number of worker processes
 10 |     # to run. This defaults to number of cores on the host.
 11 |     worker_processes auto;
 12 | 
 13 |     # Do not daemonize - we'll either run this under a supervisor
 14 |     # ourselves, or jupyterhub will manage the process, restarting
 15 |     # it when it dies as necessary
 16 |     daemon off;
 17 | 
 18 |     # Set number of connections accepted per worker
 19 |     events {
 20 |         worker_connections 768;
 21 |     }
 22 | 
 23 |     # This needs to be in 'main' since otherwise nginx
 24 |     # will try to write to /var/log/nginx/error.log and failed
 25 |     # because it does not have permissions
 26 |     error_log stderr info;
 27 | 
 28 |     # We do not really need / care about a pidfile
 29 |     pid /dev/null;
 30 | 
 31 |     http {
 32 |         sendfile on;
 33 |         tcp_nopush on;
 34 |         tcp_nodelay on;
 35 |         keepalive_timeout 65;
 36 |         # Some complex notebooks take a long time to render
 37 |         proxy_read_timeout 180s;
 38 |         proxy_connect_timeout 180s;
 39 |         uwsgi_read_timeout 180s;
 40 |         types_hash_max_size 2048;
 41 |         # server_tokens off;
 42 | 
 43 |         # These are varilous temp file paths, many that we do not use.
 44 |         # They are by default set to /var/lib/nginx/*, which causes
 45 |         # problems when running as non-root, as we are here. So we
 46 |         # shall set them all to /tmp. FIXME: Find proper paths for
 47 |         # these somewhere (perhaps on current-dir?)
 48 |         client_body_temp_path /tmp;
 49 |         proxy_temp_path /tmp;
 50 |         fastcgi_temp_path /tmp;
 51 |         uwsgi_temp_path /tmp;
 52 |         scgi_temp_path /tmp;
 53 | 
 54 |         # access_log does not support 'stderr' directive directly
 55 |         access_log /dev/stderr;
 56 | 
 57 |         # nginx needs an async way to resolve hostnames to IPs, and
 58 |         # the default `gethostbyname` setup does not allow for this.
 59 |         # While ideally nginx should parse /etc/resolv.conf itself,
 60 |         # it does not do so at this time, and needs us to set the DNS
 61 |         # server explicitly.
 62 |         # openresty allows a "local=on" which pulls from
 63 |         # /etc/resolv.conf
 64 | 
 65 |         # NOTE: This causes issues when resolving localhost and
 66 |         # other hostnames traditionally set in /etc/hosts, since
 67 |         # DNS servers respond erratically to queries for them.
 68 |         resolver local=on ipv6=off;
 69 | 
 70 |         # This is used to support websocket proxying. We need to set
 71 |         # the 'Upgrade' HTTP header to either 'upgrade' (for websockets)
 72 |         # or 'close' (for everything else).
 73 |         # See https://www.nginx.com/resources/admin-guide/reverse-proxy/
 74 |         # for more details.
 75 |         map $http_upgrade $connection_upgrade {
 76 |             default upgrade;
 77 |             ''      close;
 78 |         }
 79 | 
 80 |         # Shared memory area for caching username to id mappings
 81 |         lua_shared_dict usernamemapping 16m;
 82 | 
 83 |         lua_ssl_trusted_certificate /etc/ssl/certs/ca-certificates.crt;
 84 |         lua_ssl_verify_depth 10;
 85 | 
 86 |         # Serve things with appropriate mimetypes
 87 |         include /usr/local/openresty/nginx/conf/mime.types;
 88 | 
 89 |         # This is the 'regular' server, that sees all public
 90 |         # traffic and proxies them to the appropriate backend server.
 91 |         server {
 92 |             listen 0.0.0.0:8000;
 93 | 
 94 |             location ~ \/\. {
 95 |                 deny all;
 96 |             }
 97 | 
 98 |             location = /robots.txt {
 99 |                 alias /var/www/robots.txt;
100 |             }
101 | 
102 |             # No port numbes in redirects
103 |             port_in_redirect off;
104 | 
105 |             location ~ ^/user/([^/]+)/notebooks/(.*)$ {
106 |                 rewrite /user/([^/]+)/notebooks/(.*)$ /User:$1/$2 permanent;
107 |             }
108 | 
109 |             # Only after the User: redirect! Otherwise our backend can't find the file.
110 |             location ~ ^/\d+/.*\.(rst|md|ipynb)$ {
111 |                 include /usr/local/openresty/nginx/conf/uwsgi_params;
112 |                 uwsgi_pass uwsgi://renderer.{{ .Release.Namespace }}.svc.cluster.local:8000;
113 |             }
114 | 
115 |             location / {
116 |                 index 2987347263023847928376409;
117 |                 fancyindex on;
118 | 
119 |                 alias /data/project/paws/userhomes/;
120 |             }
121 | 
122 | 
123 |             location /accelredir {
124 |                 internal;
125 | 
126 |                 alias /data/project/paws/userhomes/;
127 |             }
128 | 
129 |             # this section is what allows:
130 |             # https://public.hub.paws.local/User:VRook_(WMF)/awesome.ipynb
131 |             # to present whatever is at:
132 |             # https://public.hub.paws.local/67158682/awesome.ipynb
133 |             location /User: {
134 |                 rewrite_by_lua_block {
135 |                     local m = ngx.re.match(ngx.var.uri, "/User:([^/]+)(.*)");
136 |                     if m then
137 |                         local userid = ngx.shared.usernamemapping:get(m[1]);
138 |                         if userid == nil then
139 |                             local http = require "resty.http";
140 |                             local httpc = http.new();
141 |                             local apiurl = "https://meta.wikimedia.org/w/api.php?" ..
142 |                                            "action=query&format=json&formatversion=2" ..
143 |                                            "&prop=&list=users&meta=&usprop=centralids" ..
144 |                                            "&ususers=" .. ngx.escape_uri(m[1]);
145 | 
146 |                             local res, err = httpc:request_uri(apiurl);
147 |                             local cjson = require "cjson";
148 |                             local resp_data = cjson.decode(res.body);
149 | 
150 |                             ngx.log(ngx.ERR, res.body);
151 |                             if resp_data["query"]["users"][1]["missing"] then
152 |                                 ngx.exit(404);
153 |                             end
154 |                             userid = resp_data["query"]["users"][1]["centralids"]["CentralAuth"]
155 | 
156 |                             ngx.shared.usernamemapping:set(m[1], userid);
157 |                         end
158 |                         ngx.req.set_uri("/" .. userid  .. m[2], true, true);
159 |                     end
160 |                 }
161 | 
162 |                 proxy_http_version 1.1;
163 | 
164 |                 # This is required for websockets to be proxied correctly
165 |                 proxy_set_header Upgrade $http_upgrade;
166 |                 proxy_set_header Connection $connection_upgrade;
167 | 
168 |                 # This is required for the target servers to know what
169 |                 # exactly the original protocol / URI / Host was.
170 |                 proxy_set_header X-Forwarded-Proto $scheme;
171 |                 proxy_set_header X-Original-URI $request_uri;
172 |                 proxy_set_header Host $host:$server_port;
173 |             }
174 |         }
175 |     }
176 | 


--------------------------------------------------------------------------------
/paws/templates/public.yaml:
--------------------------------------------------------------------------------
  1 | ---
  2 | 
  3 | {{ if .Values.pawsPublicEnabled }}
  4 | apiVersion: v1
  5 | kind: ServiceAccount
  6 | metadata:
  7 |   name: paws-public
  8 | ---
  9 | apiVersion: apps/v1
 10 | kind: Deployment
 11 | metadata:
 12 |   labels:
 13 |     name: nbserve
 14 |   name: nbserve
 15 | spec:
 16 |   replicas: {{ .Values.pawspublic.nbserve.replicas }}
 17 |   selector:
 18 |     matchLabels:
 19 |       name: nbserve
 20 |   template:
 21 |     metadata:
 22 |       creationTimestamp: null
 23 |       labels:
 24 |         name: nbserve
 25 |     spec:
 26 |       serviceAccount: paws-public
 27 |       containers:
 28 |         - image: {{ tpl .Values.pawspublic.nbserve.image.template . | quote }}
 29 |           imagePullPolicy: Always
 30 |           name: nbserve
 31 |           ports:
 32 |             - containerPort: 8000
 33 |               name: nbserve
 34 |               protocol: TCP
 35 |           volumeMounts:
 36 |             - mountPath: /data/project/paws/userhomes
 37 |               name: pawshomes
 38 |             - name: nginx-conf
 39 |               mountPath: /mnt
 40 |           resources:
 41 |             requests:
 42 |               memory: {{ .Values.pawspublic.nbserve.requests.memory }}
 43 |               cpu: "50m"
 44 | 
 45 |       volumes:
 46 | {{ if .Values.localdev.enabled }}
 47 |         - hostPath:
 48 |             path: /srv/paws/project/paws/userhomes
 49 | {{ else }}
 50 |         - nfs:
 51 | {{- with (index .Values.jupyterhub.hub.extraVolumes 0) }}
 52 |             server: {{ .nfs.server }}
 53 | {{- end }}
 54 |             path: /srv/paws/project/paws/userhomes
 55 | {{ end }}
 56 |           name: pawshomes
 57 |         - name: nginx-conf
 58 |           configMap:
 59 |             name: nbserve-nginx
 60 | 
 61 | ---
 62 | 
 63 | apiVersion: apps/v1
 64 | kind: Deployment
 65 | metadata:
 66 |   labels:
 67 |     name: renderer
 68 |   name: renderer
 69 | spec:
 70 |   selector:
 71 |     matchLabels:
 72 |       name: renderer
 73 |   template:
 74 |     metadata:
 75 |       labels:
 76 |         name: renderer
 77 |     spec:
 78 |       serviceAccount: paws-public
 79 |       containers:
 80 |         - env:
 81 |             - name: BASE_PATH
 82 |               value: /data/project/paws/userhomes/
 83 |             - name: URL_PREFIX
 84 |               value: /
 85 |           image: {{ tpl .Values.pawspublic.renderer.image.template . | quote }}
 86 |           imagePullPolicy: Always
 87 |           name: renderer
 88 |           ports:
 89 |             - containerPort: 8000
 90 |               name: nbserve
 91 |               protocol: TCP
 92 |           volumeMounts:
 93 |             - mountPath: /data/project/paws/userhomes
 94 |               name: pawshomes
 95 |           resources:
 96 |             requests:
 97 |               memory: {{ .Values.pawspublic.renderer.requests.memory }}
 98 |               cpu: {{ .Values.pawspublic.renderer.requests.cpu }}
 99 |             limits:
100 |               cpu: 2000m
101 |       volumes:
102 | {{ if .Values.localdev.enabled }}
103 |         - hostPath:
104 |             path: /srv/paws/project/paws/userhomes
105 | {{ else }}
106 |         - nfs:
107 | {{- with (index .Values.jupyterhub.hub.extraVolumes 0) }}
108 |             server: {{ .nfs.server }}
109 | {{- end }}
110 |             path: /srv/paws/project/paws/userhomes
111 | {{ end }}
112 |           name: pawshomes
113 | ---
114 | apiVersion: autoscaling/v1
115 | kind: HorizontalPodAutoscaler
116 | metadata:
117 |   name: renderer
118 | spec:
119 |   scaleTargetRef:
120 |     apiVersion: apps/v1
121 |     kind: Deployment
122 |     name: renderer
123 |   minReplicas: 1
124 |   maxReplicas: 6
125 |   targetCPUUtilizationPercentage: 200
126 | ---
127 | apiVersion: v1
128 | kind: Service
129 | metadata:
130 |   labels:
131 |     name: paws-public
132 |   name: paws-public
133 | spec:
134 |   ports:
135 |     - name: http
136 |       port: 8000
137 |       protocol: TCP
138 |       targetPort: 8000
139 |   selector:
140 |     name: nbserve
141 | ---
142 | apiVersion: v1
143 | kind: Service
144 | metadata:
145 |   labels:
146 |     name: renderer
147 |   name: renderer
148 | spec:
149 |   ports:
150 |     - name: http
151 |       port: 8000
152 |       protocol: TCP
153 |       targetPort: 8000
154 |   selector:
155 |     name: renderer
156 | ---
157 | apiVersion: networking.k8s.io/v1
158 | kind: Ingress
159 | metadata:
160 |   labels:
161 |     name: paws-public-custom
162 |     ingress.paws.wmcloud.org: public
163 |   annotations:
164 |   name: paws-public-custom
165 | spec:
166 |   ingressClassName: nginx
167 |   rules:
168 |     - host: {{ .Values.pawspublic.ingress.host | quote }}
169 |       http:
170 |         paths:
171 |           - backend:
172 |               service:
173 |                 name: paws-public
174 |                 port:
175 |                   number: 8000
176 |             pathType: Prefix
177 |             path: /
178 | {{ end }}
179 | 


--------------------------------------------------------------------------------
/paws/values.yaml:
--------------------------------------------------------------------------------
  1 | ---
  2 | 
  3 | # pawsPublicEnabled enables the anonymous viewing service for notebooks
  4 | pawsPublicEnabled: true
  5 | pawspublic:
  6 |   nbserve:
  7 |     image:
  8 |       name: quay.io/wikimedia-paws-prod/nbserve
  9 |       tag: pr-419 # nbserve tag managed by github actions
 10 |       # pawspublic.nbserve.image.template safely defines image:tag name in yaml
 11 |       template: "{{ .Values.pawspublic.nbserve.image.name}}:{{.Values.pawspublic.nbserve.image.tag }}"
 12 |     replicas: 1
 13 |     requests:
 14 |       memory: "20Mi"
 15 |   renderer:
 16 |     image:
 17 |       name: quay.io/wikimedia-paws-prod/renderer
 18 |       tag: pr-418 # renderer tag managed by github actions
 19 |       # pawspublic.nbserve.image.template safely defines image:tag name in yaml
 20 |       template: "{{ .Values.pawspublic.renderer.image.name}}:{{.Values.pawspublic.renderer.image.tag }}"
 21 |     requests:
 22 |       cpu: "10m" # give a token amount for local dev
 23 |       memory: "10Mi"
 24 |   ingress:
 25 |     host: public.hub.paws.local
 26 | paws:
 27 |   # frontPageEnabled switches the URL path of / to a redirect to paws.ingress.frontRedirect
 28 |   frontPageEnabled: true
 29 |   ingress:
 30 |     # paws.ingress.frontHost should be the domain the URL path of / uses to redirect to docs
 31 |     frontHost: paws.wmcloud.org
 32 |     # paws.ingress.frontRedirect should be the destination for URL path of / at paws.ingress.frontHost
 33 |     frontRedirect: https://wikitech.wikimedia.org/wiki/PAWS
 34 | jupyterhub:
 35 |   prePuller:
 36 |     containerSecurityContext:
 37 |       runAsUser: 52771
 38 |       runAsGroup: 52771
 39 |     hook:
 40 |       containerSecurityContext:
 41 |         runAsUser: 52771
 42 |         runAsGroup: 52771
 43 |     pause:
 44 |       containerSecurityContext:
 45 |         runAsUser: 52771
 46 |         runAsGroup: 52771
 47 |   proxy:
 48 |     chp:
 49 |       resources:
 50 |         requests:
 51 |           memory: "200Mi"
 52 |           cpu: .2
 53 |       containerSecurityContext:
 54 |         runAsUser: 52771
 55 |         runAsGroup: 52771
 56 |     # jupyterhub.proxy.secretToken is a valid dummy value for development
 57 |     secretToken: "23f542cc4b1af000e68088f1acc7ca8275a67cf496bae15ead6a79b8c6702597"
 58 |     service:
 59 |       nodePorts:
 60 |         http: 32611
 61 |       type: NodePort
 62 |   cull:
 63 |     timeout: 86400
 64 |   hub:
 65 |     config:
 66 |       # updated auth object for chart version 0.11.0+ this is the local dev values
 67 |       MWOAuthenticator:
 68 |         client_id: fea321f1b6b5aed9fa83d5362839cd3d
 69 |         client_secret: 6b17e5b87ae5ee893f5d4ba8b0e2377c6c0c3fcc
 70 |         mw_index_url: https://meta.wikimedia.org/w/index.php
 71 |       JupyterHub:
 72 |         authenticator_class: mediawiki
 73 |     db:
 74 |       # jupyterhub.hub.db values are overridden in Cloud VPS
 75 |       url: sqlite://
 76 |       type: sqlite-pvc
 77 |       upgrade: true
 78 |     extraVolumes:
 79 |       - name: homes
 80 |         hostPath:
 81 |           path: /srv/paws/project
 82 |       - name: dumps
 83 |         hostPath:
 84 |           path: /mnt/public/dumps
 85 |       # Without this, dumps becomes inaccessible and can hang the host
 86 |       - name: dumps-src1
 87 |         hostPath:
 88 |           path: /mnt/nfs/dumps-clouddumps1001.wikimedia.org
 89 |           type: DirectoryOrCreate
 90 |       - name: dumps-src2
 91 |         hostPath:
 92 |           path: /mnt/nfs/dumps-clouddumps1002.wikimedia.org
 93 |           type: DirectoryOrCreate
 94 |     extraVolumeMounts:
 95 |       - name: homes
 96 |         mountPath: /data/project
 97 |       - name: dumps
 98 |         mountPath: /public/dumps
 99 |         readOnly: true
100 |       - name: dumps-src1
101 |         mountPath: /mnt/nfs/dumps-clouddumps1001.wikimedia.org
102 |         readOnly: true
103 |       - name: dumps-src2
104 |         mountPath: /mnt/nfs/dumps-clouddumps1002.wikimedia.org
105 |         readOnly: true
106 |     extraConfig:
107 |       fixLabels: |
108 |           def fix_labels(spawner, pod):
109 |             del pod.metadata.labels['hub.jupyter.org/username']
110 |             return pod
111 | 
112 |           c.KubeSpawner.modify_pod_hook = fix_labels
113 |       00-myConfig: |
114 |           localdev = True
115 |       10-myConfig: |
116 |           import hmac
117 |           import hashlib
118 |           import subprocess
119 |           import os
120 |           import json
121 |           from oauthenticator.mediawiki import MWOAuthenticator
122 |           from tornado import gen
123 | 
124 |           from tornado.escape import url_escape
125 |           from tornado.httpclient import AsyncHTTPClient
126 | 
127 | 
128 |           class Auth(MWOAuthenticator):
129 |               enable_auth_state = True
130 |               def normalize_username(self, username):
131 |                   return username
132 | 
133 |               async def refresh_user(self, user, handler=None):
134 |                   client = AsyncHTTPClient()
135 |                   try:
136 |                       response = await client.fetch(f"https://meta.wikimedia.org/w/api.php?action=query&format=json&formatversion=2&meta=globaluserinfo&guiuser={url_escape(user.name)}",
137 |                                                      user_agent="PAWS-authenticator/0.1 (https://phabricator.wikimedia.org/tag/paws/)" )
138 |                       locked = bool(json.loads(response.body)['query']['globaluserinfo'].get("locked", False))
139 |                       if locked:
140 |                           await user.spawner.stop(now=True)
141 |                           return False
142 |                       else:
143 |                           return True
144 |                   except Exception as e:
145 |                       self.log.error(f"Error checking for Wikimedia lock on user {user.name}: {e}")
146 |                       return False  # Notebook cookies keep user logged in
147 | 
148 |               @gen.coroutine
149 |               # more information about where this comes from found here:
150 |               # https://jupyterhub-kubespawner.readthedocs.io/en/latest/spawner.html#kubespawner.KubeSpawner.volumes
151 |               def pre_spawn_start(self, user, spawner):
152 |                   auth_state = yield user.get_auth_state()
153 |                   identity = auth_state['MEDIAWIKI_USER_IDENTITY']
154 |                   spawner.environment['ACCESS_KEY'] = auth_state['ACCESS_TOKEN_KEY']
155 |                   spawner.environment['ACCESS_SECRET'] = auth_state['ACCESS_TOKEN_SECRET']
156 |                   spawner.environment['CLIENT_ID'] = self.client_id
157 |                   spawner.environment['CLIENT_SECRET'] = self.client_secret
158 |                   spawner.environment['USER'] = identity['username']
159 |                   # Set rather than use .extend!
160 |                   # Since otherwise the volumes list will grow each time
161 |                   # the spawner stops and starts!
162 | 
163 |                   homedir = '/data/project/paws/userhomes/{}'.format(identity['sub'])
164 |                   homenfs = '/srv/paws/project/paws/userhomes/{}'.format(identity['sub'])
165 |                   # Create the homedir so docker doesn't do it as root
166 |                   os.makedirs(homedir, mode=0o755, exist_ok=True)
167 |                   if localdev == True:
168 |                       spawner.volumes = [
169 |                           {
170 |                               'name': 'home',
171 |                               'hostPath': { 'path': homenfs }
172 |                           },
173 |                           {
174 |                               'name': 'dumps',
175 |                               'hostPath': { 'path': '/public/dumps' }
176 |                           },
177 |                           {
178 |                               'name': 'dumps-src1',
179 |                               'hostPath': { 'path': '/mnt/nfs/dumps-clouddumps1001.wikimedia.org' }
180 |                           },
181 |                           {
182 |                               'name': 'dumps-src2',
183 |                               'hostPath': { 'path': '/mnt/nfs/dumps-clouddumps1002.wikimedia.org' }
184 |                           }
185 |                       ]
186 |                   else:
187 |                       spawner.volumes = [
188 |                           {
189 |                               'name': 'home',
190 |                               'nfs': { 'server': nfs_home, 'path': homenfs }
191 |                           },
192 |                           {
193 |                               'name': 'dumps',
194 |                               'nfs': { 'server': dumps_src1, 'path': '/' }
195 |                           },
196 |                           {
197 |                               'name': 'dumps-src1',
198 |                               'nfs': { 'server': dumps_src1, 'path': '/' }
199 |                           },
200 |                           {
201 |                               'name': 'dumps-src2',
202 |                               'nfs': { 'server': dumps_src2, 'path': '/' }
203 |                           }
204 |                       ]
205 | 
206 |                   spawner.volume_mounts = [
207 |                       {
208 |                           'name': 'home',
209 |                           'mountPath': '/home/paws'
210 |                       },
211 |                       {
212 |                           'name': 'dumps',
213 |                           'mountPath': '/public/dumps/public',
214 |                           'readOnly': True
215 |                       },
216 |                       {
217 |                           'name': 'dumps-src1',
218 |                           'mountPath': '/mnt/nfs/dumps-clouddumps1001.wikimedia.org',
219 |                           'readOnly': True
220 |                       },
221 |                       {
222 |                           'name': 'dumps-src2',
223 |                           'mountPath': '/mnt/nfs/dumps-clouddumps1002.wikimedia.org',
224 |                           'readOnly': True
225 |                       },
226 |                   ]
227 | 
228 |           c.OAuthenticator.admin_users = {
229 |               "BDavis (WMF)",
230 |               "VRook (WMF)",
231 |               "ABorrero (WMF)",
232 |               "NSkaggs (WMF)",
233 |               "Andrewbogott",
234 |               "Chicocvenancio"
235 |           }
236 |           c.OAuthenticator.allow_all = True
237 |           c.JupyterHub.authenticator_class = Auth
238 |           c.JupyterHub.authenticate_prometheus = False
239 |           c.JupyterHub.logo_file = '/srv/jupyterhub/PAWS.svg'
240 |           c.JupyterHub.template_vars = {
241 |               'announcement': ('<span class="alert-success">'
242 |                               'Welcome to PAWS. '
243 |                               'Please <a href="https://phabricator.wikimedia.or'
244 |                               'g/maniphest/task/edit/form/1/?title=%5Bbug%5D%20%3Cyour%20request%20here%3E&description=%3D%3D%20What%20happened%3F%0D%0A%0D%0AA%20brief%20explanation%20of%20the%20problem%20you%20found%20(if%20you%20can%20give%20screenshots%20please%20do)%0D%0A%0D%0A%3D%3D%20What%20should%20have%20happened%3F%0D%0A%0D%0AWhat%20is%20it%20that%20you%20expected%20to%20happen%20instead&projects=PAWS&subscribers=dcaro%2Ckomla&priority=triage">'
245 |                               ' report any issues on Phabricator</a>, you can also give feedback <a href="https://phabricator.wikimedia.org/maniphest/task/edit/form/1/?title=%5Bfeature%5D%20%3Cyour%20request%20here%3E&description=%3D%3D%20Why%0D%0A%0D%0AA%20brief%20explanation%20of%20the%20problem%20you%20are%20trying%20to%20solve%0D%0A%0D%0A%3D%3D%20How%3F%0D%0A%0D%0AIf%20you%20have%20an%20idea%2C%20some%20explanation%20on%20how%20to%20solve%20it%20(optional).&projects=PAWS&subscribers=dcaro%2Ckomla&priority=triage">here</a>'
246 |                               '</span>')
247 |           }
248 |     extraEnv:
249 |       USER: tools.paws
250 |       JUPYTERHUB_CRYPT_KEY: "4849a4d92a49cdf9a80b49486293e29966c4f02daefa0f5597cf14546bab09f8"
251 |       MYSQL_HMAC_KEY: "9a33d49db4bb823e87187a11e4f6296bee41bc35c41dc195634dff440c1870f0"
252 |     cookieSecret: 827902ad187337f83adc565dadfb4c095ce1962442aae043ac78948f9b216a8f
253 |     podSecurityContext:
254 |       fsGroup: 52771
255 |     image:
256 |       name: quay.io/wikimedia-paws-prod/paws-hub
257 |       tag: pr-448 # paws-hub tag managed by github actions
258 |     containerSecurityContext:
259 |       runAsUser: 52771
260 |     resources:
261 |       requests:
262 |         memory: "200Mi"
263 |         cpu: .2
264 |   ingress:
265 |     enabled: true
266 |     hosts:
267 |       - hub.paws.local
268 |     ingressClassName: "nginx"
269 |   # We are not on an autoscaling cluster, so we don't want this
270 |   scheduling:
271 |     userScheduler:
272 |       enabled: false
273 |     userPlaceholder:
274 |       containerSecurityContext:
275 |         runAsUser: 52771
276 |         runAsGroup: 52771
277 |   singleuser:
278 |     cmd:
279 |       - jupyterhub-singleuser
280 |     fsGid: 52771
281 |     image:
282 |       name: quay.io/wikimedia-paws-prod/singleuser
283 |       tag: pr-486 # singleuser tag managed by github actions
284 |       pullPolicy: Always
285 |     memory:
286 |       guarantee: 0.70G
287 |       limit: 3G
288 |     cpu:
289 |       guarantee: .15
290 |       limit: 1
291 |     storage:
292 |       type: none
293 |     uid: 52771
294 |     # This must be false or this whole thing cannot work with restrictive PSP
295 |     cloudMetadata:
296 |       blockWithIptables: false
297 |     extraEnv:
298 |       HUB_DOMAIN: "hub.paws.local"  # Check jupyterhub.ingress.hosts
299 |       REFINE_DOMAIN: "*"  # Check jupyterhub.ingress.hosts
300 |     networkPolicy:
301 |       egressAllowRules:
302 |         privateIPs: true  # Allow all connections to private IPs, needed for access to replicas
303 |         nonPrivateIPs: false # Block all connections to non-private IPs, except the ones allowed below
304 |       egress:
305 |         # Allow connections to non-private IPs only for TCP ports 80 and 443
306 |         # and for UDP ports 53 (DNS) and 123 (NTP)
307 |         - ports:
308 |             - protocol: TCP
309 |               port: 80
310 |             - protocol: TCP
311 |               port: 443
312 |             - protocol: UDP
313 |               port: 53
314 |             - protocol: UDP
315 |               port: 123
316 | # mysql configures the wiki replica backend variables
317 | mysql:
318 |   domain: "svc.cluster.local"
319 |   username: s52771
320 |   password: "iAmNotSecret0"
321 | minesweeper:
322 |   enabled: false  # most local-dev testers won't have the key to configs
323 |   image:
324 |     name: quay.io/wikimedia-paws-prod/minesweeper
325 |     tag: pr-433 # minesweeper tag managed by github actions
326 |     template: "{{ .Values.minesweeper.image.name }}:{{ .Values.minesweeper.image.tag }}"
327 | # If not deployed for prod use, we use the some hacks for testing
328 | localdev:
329 |   enabled: true
330 |   image:
331 |     name: quay.io/wikimedia-paws-prod/jobber
332 |     tag: pr-420 # jobber tag managed by github actions
333 |     # mediawikiHacks.image.template safely defines image:tag name in yaml
334 |     template: "{{ .Values.localdev.image.name}}:{{.Values.localdev.image.tag }}"
335 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.black]
2 | line-length = 80
3 | target_version = ['py37']
4 | 


--------------------------------------------------------------------------------
/secrets-codfw1dev.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/toolforge/paws/9c42a38a368b3ad460fd122e60851c67335c3ed2/secrets-codfw1dev.sh


--------------------------------------------------------------------------------
/secrets-eqiad1.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/toolforge/paws/9c42a38a368b3ad460fd122e60851c67335c3ed2/secrets-eqiad1.sh


--------------------------------------------------------------------------------
/tests/helm-lint/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:24.04
 2 | 
 3 | RUN apt-get update \
 4 |     && DEBIAN_FRONTEND=noninteractive apt-get install -y \
 5 |     curl \
 6 |     ca-certificates
 7 | 
 8 | COPY . /paws
 9 | WORKDIR /paws
10 | 
11 | RUN curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
12 | RUN helm repo add jupyterhub https://jupyterhub.github.io/helm-chart/
13 | RUN helm repo add bitnami https://charts.bitnami.com/bitnami
14 | RUN helm dependency update paws/
15 | 
16 | 
17 | ENTRYPOINT ["helm", "lint", "paws/"]
18 | 


--------------------------------------------------------------------------------
/tests/tox/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:24.04
 2 | 
 3 | RUN apt-get update \
 4 |     && DEBIAN_FRONTEND=noninteractive apt-get install -y \
 5 |     tox
 6 | 
 7 | COPY . /paws
 8 | WORKDIR /paws
 9 | 
10 | 
11 | ENTRYPOINT ["tox"]
12 | 


--------------------------------------------------------------------------------
/tofu/127a.tf:
--------------------------------------------------------------------------------
 1 | resource "openstack_containerinfra_cluster_v1" "k8s_127a" {
 2 |   name                = "paws${var.name[var.datacenter]}-127a"
 3 |   cluster_template_id = resource.openstack_containerinfra_clustertemplate_v1.template_127a.id
 4 |   master_count        = 1
 5 |   node_count          = var.workers[var.datacenter]
 6 | }
 7 | 
 8 | resource "local_file" "kube_config" {
 9 |   content  = resource.openstack_containerinfra_cluster_v1.k8s_127a.kubeconfig.raw_config
10 |   filename = "kube.config"
11 | }
12 | 
13 | resource "openstack_containerinfra_clustertemplate_v1" "template_127a" {
14 |   name                  = "paws${var.name[var.datacenter]}-127a"
15 |   coe                   = "kubernetes"
16 |   dns_nameserver        = "8.8.8.8"
17 |   docker_storage_driver = "overlay2"
18 |   docker_volume_size    = var.volume_size[var.datacenter]
19 |   external_network_id   = var.external_network_id[var.datacenter]
20 |   fixed_subnet          = var.fixed_subnet[var.datacenter]
21 |   fixed_network         = var.fixed_network[var.datacenter]
22 |   flavor                = var.worker_flavor[var.datacenter]
23 |   floating_ip_enabled   = "false"
24 |   image                 = "Fedora-CoreOS-38"
25 |   master_flavor         = var.control_flavor[var.datacenter]
26 |   network_driver        = "calico"
27 | 
28 |   labels = {
29 |     kube_tag                       = "v1.27.8-rancher2"
30 |     container_runtime              = "containerd"
31 |     containerd_version             = "1.6.28"
32 |     containerd_tarball_sha256      = "f70736e52d61e5ad225f4fd21643b5ca1220013ab8b6c380434caeefb572da9b"
33 |     cloud_provider_tag             = "v1.27.3"
34 |     cinder_csi_plugin_tag          = "v1.27.3"
35 |     k8s_keystone_auth_tag          = "v1.27.3"
36 |     magnum_auto_healer_tag         = "v1.27.3"
37 |     octavia_ingress_controller_tag = "v1.27.3"
38 |     calico_tag                     = "v3.26.4"
39 |   }
40 | }
41 | 


--------------------------------------------------------------------------------
/tofu/codfw1dev-backend.conf:
--------------------------------------------------------------------------------
 1 | # https://github.com/hashicorp/terraform/issues/13022
 2 | 
 3 | region   = "codfw1dev"
 4 | bucket   = "d34805fb49a44a82a0b9668ad1d1227a:tofu-state"
 5 | endpoint = "https://object.codfw1dev.wikimediacloud.org"
 6 | key      = "state/main"
 7 | 
 8 | 
 9 | skip_region_validation      = true
10 | skip_credentials_validation = true
11 | use_path_style              = true
12 | 


--------------------------------------------------------------------------------
/tofu/eqiad1-backend.conf:
--------------------------------------------------------------------------------
 1 | # https://github.com/hashicorp/terraform/issues/13022
 2 | 
 3 | region   = "eqiad1"
 4 | bucket   = "paws:tofu-state"
 5 | endpoint = "https://object.eqiad1.wikimediacloud.org"
 6 | key      = "state/main"
 7 | 
 8 | skip_region_validation      = true
 9 | skip_credentials_validation = true
10 | force_path_style            = true
11 | 


--------------------------------------------------------------------------------
/tofu/main.tf:
--------------------------------------------------------------------------------
 1 | terraform {
 2 |   required_version = ">= 1.6.0"
 3 |   backend "s3" {}
 4 |   required_providers {
 5 |     openstack = {
 6 |       source  = "terraform-provider-openstack/openstack"
 7 |       version = "~> 1.51.0"
 8 |     }
 9 |   }
10 | }
11 | 
12 | provider "openstack" {
13 |   auth_url                      = var.auth-url[var.datacenter]
14 |   tenant_id                     = var.tenant_id[var.datacenter]
15 |   application_credential_id     = var.application_credential_id[var.datacenter]
16 |   application_credential_secret = var.application_credential_secret[var.datacenter]
17 | }
18 | 


--------------------------------------------------------------------------------
/tofu/secrets.tf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/toolforge/paws/9c42a38a368b3ad460fd122e60851c67335c3ed2/tofu/secrets.tf


--------------------------------------------------------------------------------
/tofu/vars.tf:
--------------------------------------------------------------------------------
 1 | variable "datacenter" {
 2 |   type = string
 3 | }
 4 | 
 5 | # name codfw1dev artifacts with '-dev' names
 6 | variable "name" {
 7 |   type = map(any)
 8 |   default = {
 9 |     "codfw1dev" = "-dev"
10 |     "eqiad1"    = ""
11 |   }
12 | }
13 | 
14 | # connection vars
15 | variable "auth-url" {
16 |   type = map(any)
17 |   default = {
18 |     "codfw1dev" = "https://openstack.codfw1dev.wikimediacloud.org:25000"
19 |     "eqiad1"    = "https://openstack.eqiad1.wikimediacloud.org:25000"
20 |   }
21 | }
22 | variable "tenant_id" {
23 |   type = map(any)
24 |   default = {
25 |     "codfw1dev" = "pawsdev"
26 |     "eqiad1"    = "paws"
27 |   }
28 | }
29 | variable "application_credential_id" {
30 |   type = map(any)
31 |   default = {
32 |     "codfw1dev" = "6b404a11241446c7a52c04f39983eda6"
33 |     "eqiad1"    = "43edf67449c740538af78c1743cb72c3"
34 |   }
35 | }
36 | 
37 | # magnum vars
38 | variable "worker_flavor" {
39 |   type = map(any)
40 |   default = {
41 |     "codfw1dev" = "g4.cores1.ram2.disk20"
42 |     "eqiad1"    = "g4.cores8.ram32.disk20"
43 |   }
44 | }
45 | variable "control_flavor" {
46 |   type = map(any)
47 |   default = {
48 |     "codfw1dev" = "g4.cores1.ram2.disk20"
49 |     "eqiad1"    = "g4.cores2.ram4.disk20"
50 |   }
51 | }
52 | variable "volume_size" {
53 |   type = map(any)
54 |   default = {
55 |     "codfw1dev" = "20"
56 |     "eqiad1"    = "80"
57 |   }
58 | }
59 | variable "external_network_id" {
60 |   type = map(any)
61 |   default = {
62 |     "codfw1dev" = "wan-transport-codfw"
63 |     "eqiad1"    = "wan-transport-eqiad"
64 |   }
65 | }
66 | variable "fixed_network" {
67 |   type = map(any)
68 |   default = {
69 |     "codfw1dev" = "lan-flat-cloudinstances2b"
70 |     "eqiad1"    = "lan-flat-cloudinstances2b"
71 |   }
72 | }
73 | variable "fixed_subnet" {
74 |   type = map(any)
75 |   default = {
76 |     "codfw1dev" = "cloud-instances2-b-codfw"
77 |     "eqiad1"    = "cloud-instances2-b-eqiad"
78 |   }
79 | }
80 | variable "workers" {
81 |   type = map(any)
82 |   default = {
83 |     "codfw1dev" = "2"
84 |     "eqiad1"    = "5"
85 |   }
86 | }
87 | 


--------------------------------------------------------------------------------
/toolinfo.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "name" : "wikimedia-paws",
 3 |     "title" : "PAWS",
 4 |     "description" : "PAWS: A Web Shell (PAWS) is a Jupyter notebook deployment hosted by Wikimedia.\n\nA Jupyter notebook is a popular Open Source tool that allows to create and share documents that contain live code. You can use Jupyter notebooks to run scripts that help you perform essential technical tasks on wikis, use data to create visualizations, graphs, and more, and to write techinical documentation and tutorials that help others work on Wikimedia projects.\n\nJupyter notebooks are used both by experienced programmers and technically curious newcomers. There's no need to download software or set up a development environment. All of your work is done in your browser.\n\nOur deployment is using JupyterHub, which can serve live Jupyter notebooks to multiple users.",
 5 |     "url" : "https://hub.paws.wmcloud.org/",
 6 |     "keywords" : "paws, pywikibot, jupyter notebook, python",
 7 |     "author" : "Yuvi Panda",
 8 |     "repository" : "https://github.com/toolforge/paws",
 9 |     "for_wikis": ["*"],
10 |     "icon": "https://commons.wikimedia.org/wiki/File:PAWS_(no_text).svg",
11 |     "license": "MIT",
12 |     "sponsor": ["Wikimedia Foundation"],
13 |     "available_ui_languages": ["en"],
14 |     "technology_used": ["Jupyter Notebook", "pywikibot", "Python 3", "R", "SPARQL"],
15 |     "tool_type": "web app",
16 |     "developer_docs_url": [
17 |         {
18 |             "url": "https://wikitech.wikimedia.org/wiki/PAWS/PAWS_maintenance_and_administration",
19 |             "language": "en"
20 |         }
21 |     ],
22 |     "user_docs_url": [
23 |         {
24 |             "language": "en",
25 |             "url": "https://wikitech.wikimedia.org/wiki/PAWS"
26 |         }
27 |     ],
28 |     "bugtracker_url": "https://phabricator.wikimedia.org/project/board/1648/",
29 |     "_language": "en",
30 |     "_schema": "/toolinfo/1.2.0"
31 | }
32 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | [tox]
 2 | envlist = flake8, black, yamllint
 3 | minversion = 1.6
 4 | skipsdist = True
 5 | 
 6 | [default]
 7 | basepython = python3
 8 | 
 9 | [testenv:flake8]
10 | basepython = {[default]basepython}
11 | commands = flake8 images build.py
12 | deps = flake8
13 | 
14 | [flake8]
15 | max-line-length = 100
16 | # E121, E123, E126, E133, E226, E241, E242, E704 and W503 are default ignores
17 | # E124: closing bracket does not match visual indentation
18 | # E203: non-pep8 compliant brainfart of flake8
19 | ignore = E121,E123,E126,E133,E203,E226,E241,E242,E704,W503,E124
20 | 
21 | [testenv:black]
22 | description = check black formatter
23 | basepython = {[default]basepython}
24 | commands = black -l 80 -t py36 --check \
25 |            images \
26 |            build.py
27 | deps = black
28 | 
29 | [testenv:yamllint]
30 | description = check yaml with yamllint
31 | basepython = {[default]basepython}
32 | commands = bash ./tox_scripts/yamllint.sh
33 | deps = yamllint
34 | allowlist_externals = bash
35 | 


--------------------------------------------------------------------------------
/tox_scripts/yamllint.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # yamllint doesn't handle helm templates well, in particular {{- bits, so we
 4 | # exclude them with some sed. In order to keep from making a mess of the working
 5 | # directory of anyone running this test we copy them to a tmp directory. Then we
 6 | # switch to that directory to do the test as it looks more like we are actually
 7 | # running in the base dir, rather than displaying things like
 8 | # /tmp/tmpdir/paws/values.yaml will show ./paws/values.yaml in output.
 9 | 
10 | export TEMP_DIR=$(mktemp -d -p "/tmp/")
11 | cp -r . ${TEMP_DIR} 
12 | cd ${TEMP_DIR}
13 | find . -not -path "./.tox/*" -type f -regex ".*\.ya?ml" -exec sed -i "s/{{/# /" {} \;
14 | yamllint -c .yamllint.conf .
15 | 


--------------------------------------------------------------------------------