├── .flake8 ├── .gitattributes ├── .github └── workflows │ ├── jobber.yaml │ ├── linter.yaml │ ├── minesweeper.yaml │ ├── nbserve.yaml │ ├── notification.yaml │ ├── paws-hub.yaml │ ├── renderer.yaml │ ├── singleuser.yaml │ ├── tofu-fmt.yaml │ └── update-container-tags.yaml ├── .gitignore ├── .gitmodules ├── .yamllint.conf ├── LICENSE ├── README.md ├── ansible ├── ansible.cfg ├── files │ ├── csi-secret-cinderplugin.yaml.codfw1dev.crypt │ ├── csi-secret-cinderplugin.yaml.eqiad1.crypt │ └── sc.yaml ├── paws.yaml ├── templates │ └── prometheus-ingress.yaml.j2 └── vars │ ├── codfw1dev.yaml │ └── eqiad1.yaml ├── build.py ├── deploy.sh ├── images ├── jobber │ ├── Dockerfile │ └── README.md ├── minesweeper │ ├── Dockerfile │ ├── LICENSE │ ├── README.md │ ├── requirements.in │ └── requirements.txt ├── nbserve │ ├── Dockerfile │ ├── README.md │ └── robots.txt ├── paws-hub │ ├── Dockerfile │ ├── PAWS.svg │ ├── README.md │ └── paws-favicon.ico ├── renderer │ ├── Dockerfile │ ├── README.md │ ├── basic.tpl │ ├── full.tpl │ ├── renderer.py │ └── requirements.txt └── singleuser │ ├── Dockerfile │ ├── banner │ ├── hide_clusters_tab.css │ ├── install-extensions │ ├── install-julia │ ├── install-sql-tool │ ├── r │ ├── Rprofile.site │ ├── cran.gpg │ ├── cran.list │ └── rsession.conf │ ├── requirements.txt │ ├── user-config.py │ └── user-fixes.py ├── paws ├── .helmignore ├── Chart.yaml ├── codfw-secrets.yaml ├── codfw.yaml ├── files │ └── minesweeper │ │ ├── minesweeper.py │ │ └── secrets │ │ ├── ban.py │ │ ├── herorat.py │ │ └── minesweeper.yaml ├── production.yaml ├── secrets.yaml ├── templates │ ├── NOTES.txt │ ├── frontpage.yaml │ ├── localdev.yaml │ ├── minesweeper │ │ ├── configmap.yaml │ │ ├── daemonset.yaml │ │ └── rbac.yaml │ ├── nbserve │ │ └── nginx.yaml │ └── public.yaml └── values.yaml ├── pyproject.toml ├── secrets-codfw1dev.sh ├── secrets-eqiad1.sh ├── tests ├── helm-lint │ └── Dockerfile └── tox │ └── Dockerfile ├── tofu ├── 127a.tf ├── codfw1dev-backend.conf ├── eqiad1-backend.conf ├── main.tf ├── secrets.tf └── vars.tf ├── toolinfo.json ├── tox.ini └── tox_scripts └── yamllint.sh /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 80 3 | per-file-ignores = 4 | images/nbserve/nginx.py: E501 5 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Directory entries are not enough to encrypt fines beneath it 2 | # https://github.com/AGWA/git-crypt#gitattributes-file 3 | paws/secrets.yaml filter=git-crypt diff=git-crypt 4 | ansible/files/csi-secret-cinderplugin.yaml.eqiad1.crypt filter=git-crypt diff=git-crypt 5 | ansible/files/csi-secret-cinderplugin.yaml.codfw1dev.crypt filter=git-crypt diff=git-crypt 6 | paws/codfw-secrets.yaml filter=git-crypt diff=git-crypt 7 | paws/files/minesweeper/secrets/** filter=git-crypt diff=git-crypt 8 | tofu/secrets.tf filter=git-crypt diff=git-crypt 9 | secrets-eqiad1.sh filter=git-crypt diff=git-crypt 10 | secrets-codfw1dev.sh filter=git-crypt diff=git-crypt 11 | -------------------------------------------------------------------------------- /.github/workflows/jobber.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | name: build and push jobber 4 | 5 | 'on': 6 | pull_request_target: 7 | paths: 8 | - images/jobber/** 9 | 10 | jobs: 11 | build-and-push: 12 | name: build and push jobber 13 | uses: toolforge/github-actions/.github/workflows/build-and-push.yaml@build-and-push-v4 14 | with: 15 | imagename: jobber 16 | secrets: 17 | quay_user: ${{ secrets.QUAY_USER }} 18 | quay_password: ${{ secrets.QUAY_PASSWORD }} 19 | -------------------------------------------------------------------------------- /.github/workflows/linter.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | name: "linter" 4 | 5 | 'on': 6 | pull_request: 7 | 8 | jobs: 9 | tox: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@v2 13 | 14 | - name: build tox container 15 | run: | 16 | docker build -f tests/tox/Dockerfile -t tox:01 . 17 | - name: run tox container 18 | run: | 19 | docker run tox:01 20 | 21 | helm-lint: 22 | runs-on: ubuntu-latest 23 | steps: 24 | - uses: actions/checkout@v2 25 | 26 | - name: build helm-lint container 27 | run: | 28 | docker build -f tests/helm-lint/Dockerfile -t helm-lint:01 . 29 | - name: run helm-lint container 30 | run: | 31 | docker run helm-lint:01 32 | -------------------------------------------------------------------------------- /.github/workflows/minesweeper.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | name: build and push minesweeper 4 | 5 | 'on': 6 | pull_request_target: 7 | paths: 8 | - images/minesweeper/** 9 | 10 | jobs: 11 | build-and-push: 12 | name: build and push minesweeper 13 | uses: toolforge/github-actions/.github/workflows/build-and-push.yaml@build-and-push-v4 14 | with: 15 | imagename: minesweeper 16 | secrets: 17 | quay_user: ${{ secrets.QUAY_USER }} 18 | quay_password: ${{ secrets.QUAY_PASSWORD }} 19 | -------------------------------------------------------------------------------- /.github/workflows/nbserve.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | name: build and push nbserve 4 | 5 | 'on': 6 | pull_request_target: 7 | paths: 8 | - images/nbserve/** 9 | 10 | jobs: 11 | build-and-push: 12 | name: build and push nbserve 13 | uses: toolforge/github-actions/.github/workflows/build-and-push.yaml@build-and-push-v4 14 | with: 15 | imagename: nbserve 16 | secrets: 17 | quay_user: ${{ secrets.QUAY_USER }} 18 | quay_password: ${{ secrets.QUAY_PASSWORD }} 19 | -------------------------------------------------------------------------------- /.github/workflows/notification.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | name: "PR Notification" 4 | 5 | 'on': 6 | pull_request: 7 | types: [opened, closed] 8 | 9 | jobs: 10 | irc-notification: 11 | runs-on: ubuntu-latest 12 | name: Notify on IRC 13 | steps: 14 | - name: irc notification 15 | run: | 16 | sudo apt-get update 17 | sudo apt-get install netcat-traditional -y 18 | message="${{ github.actor }} ${{ github.event.action }} ${{ github.event.pull_request._links.html.href }}" 19 | echo -e "USER notefromgithub notefromgithub notefromgithub :notefromgithub\nNICK notefromgithub\nJOIN #wikimedia-cloud-feed\nPRIVMSG #wikimedia-cloud-feed :${message}\nQUIT\n" | nc irc.libera.chat 6667 20 | phabricator-comment: 21 | runs-on: ubuntu-latest 22 | name: Comment on Phabricator 23 | steps: 24 | - uses: actions/checkout@v2 25 | - name: post to phabricator 26 | run: | 27 | message="${{ github.actor }} ${{ github.event.action }} ${{ github.event.pull_request._links.html.href }}" 28 | echo "${message}" 29 | inital_commit=$(git log origin/main..origin/${{ github.head_ref }} --pretty=%H | tail -n1) 30 | task=$(curl ${{ github.event.pull_request._links.commits.href }} | jq .[0].commit.message -r | grep "^Bug: T[0-9]*$" | head -1 | awk '{print $2}') 31 | 32 | if [ -n "${task}" ]; then 33 | curl https://phabricator.wikimedia.org/api/maniphest.edit \ 34 | -d api.token=${{ secrets.TOOLFORGE_PHAB_BOT_KEY }} \ 35 | -d transactions[0][type]=comment \ 36 | -d transactions[0][value]="${message}" \ 37 | -d objectIdentifier=${task} 38 | fi 39 | -------------------------------------------------------------------------------- /.github/workflows/paws-hub.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | name: build and push paws-hub 4 | 5 | 'on': 6 | pull_request_target: 7 | paths: 8 | - images/paws-hub/** 9 | 10 | jobs: 11 | build-and-push: 12 | name: build and push paws-hub 13 | uses: toolforge/github-actions/.github/workflows/build-and-push.yaml@build-and-push-v4 14 | with: 15 | imagename: paws-hub 16 | secrets: 17 | quay_user: ${{ secrets.QUAY_USER }} 18 | quay_password: ${{ secrets.QUAY_PASSWORD }} 19 | -------------------------------------------------------------------------------- /.github/workflows/renderer.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | name: build and push renderer 4 | 5 | 'on': 6 | pull_request_target: 7 | paths: 8 | - images/renderer/** 9 | 10 | jobs: 11 | build-and-push: 12 | name: build and push renderer 13 | uses: toolforge/github-actions/.github/workflows/build-and-push.yaml@build-and-push-v4 14 | with: 15 | imagename: renderer 16 | secrets: 17 | quay_user: ${{ secrets.QUAY_USER }} 18 | quay_password: ${{ secrets.QUAY_PASSWORD }} 19 | -------------------------------------------------------------------------------- /.github/workflows/singleuser.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | name: build and push singleuser 4 | 5 | 'on': 6 | pull_request_target: 7 | paths: 8 | - images/singleuser/** 9 | 10 | 11 | jobs: 12 | build-and-push: 13 | name: build and push singleuser 14 | uses: toolforge/github-actions/.github/workflows/build-and-push.yaml@build-and-push-v4 15 | with: 16 | imagename: singleuser 17 | secrets: 18 | quay_user: ${{ secrets.QUAY_USER }} 19 | quay_password: ${{ secrets.QUAY_PASSWORD }} 20 | -------------------------------------------------------------------------------- /.github/workflows/tofu-fmt.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | name: "tofu fmt" 4 | 5 | 'on': 6 | pull_request: 7 | 8 | jobs: 9 | tf-fmt: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@v2 13 | 14 | - name: install tofu 15 | run: | 16 | sudo apt-get update && sudo apt-get install -y gnupg software-properties-common 17 | curl -s https://packagecloud.io/install/repositories/opentofu/tofu/script.deb.sh?any=true -o /tmp/tofu-repository-setup.sh 18 | sudo bash /tmp/tofu-repository-setup.sh 19 | sudo apt install tofu 20 | - name: tofu fmt 21 | run: | 22 | shopt -s extglob 23 | cd tofu 24 | if ! tofu fmt -check -diff !(secrets).tf ; then 25 | echo "please update your tofu code to match the above."; 26 | echo 'or run `tofu fmt` to have tofu reformat it.'; 27 | exit 1; 28 | fi 29 | -------------------------------------------------------------------------------- /.github/workflows/update-container-tags.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | name: update container tags 4 | 5 | 'on': 6 | pull_request_target: 7 | 8 | jobs: 9 | update-container-tags: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@v2 13 | with: 14 | token: ${{ secrets.PERSONAL_ACCESS_TOKEN }} 15 | repository: ${{ github.event.pull_request.head.repo.full_name }} 16 | 17 | - name: git fetch 18 | run: | 19 | git fetch 20 | - name: git checkout 21 | run: | 22 | git checkout ${{ github.head_ref }} 23 | 24 | # compare to upstream main branch 25 | - name: git add remote 26 | run: | 27 | git remote add toolforgepaws https://github.com/toolforge/paws.git 28 | - name: git remote update 29 | run: | 30 | git remote update 31 | 32 | - name: update values.yaml 33 | run: | 34 | for i in renderer nbserve paws-hub jobber singleuser minesweeper ; do 35 | if [[ $(git diff remotes/toolforgepaws/main -- images/${i}/) ]]; then 36 | sed -i "s/tag: .* # ${i} tag managed by github actions$/tag: pr-${{ github.event.number }} # ${i} tag managed by github actions/" paws/values.yaml 37 | fi 38 | done 39 | 40 | - uses: EndBug/add-and-commit@v7 41 | with: 42 | add: 'paws/values.yaml' 43 | author_name: Github Action 44 | author_email: auto@github.com 45 | branch: ${{ github.head_ref }} 46 | message: 'auto update of ${{ inputs.imagename }} tag' 47 | pull: --rebase --autostash 48 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | venv/ 2 | .vscode/ 3 | __pycache__/ 4 | *.pyc 5 | .mypy_cache/ 6 | charts/ 7 | requirements.lock 8 | Chart.lock 9 | dev-values.yaml 10 | *.swp 11 | terraform.tfstate* 12 | .terraform* 13 | tofu/kube.config 14 | .venv* 15 | ansible/collections/* 16 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/toolforge/paws/9c42a38a368b3ad460fd122e60851c67335c3ed2/.gitmodules -------------------------------------------------------------------------------- /.yamllint.conf: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | yaml-files: 4 | - '*.yaml' 5 | - '*.yml' 6 | - '.yamllint' 7 | 8 | rules: 9 | # with all the templates braces get weird 10 | braces: disable 11 | brackets: enable 12 | colons: enable 13 | commas: enable 14 | comments: 15 | level: warning 16 | min-spaces-from-content: 1 17 | comments-indentation: 18 | level: warning 19 | document-end: disable 20 | document-start: 21 | level: warning 22 | empty-lines: enable 23 | empty-values: disable 24 | hyphens: enable 25 | indentation: enable 26 | key-duplicates: enable 27 | key-ordering: disable 28 | line-length: disable 29 | new-line-at-end-of-file: enable 30 | new-lines: enable 31 | octal-values: disable 32 | quoted-strings: disable 33 | trailing-spaces: enable 34 | truthy: 35 | level: warning 36 | 37 | # don't try to parse encrypted files. Gives error like: 38 | # UnicodeDecodeError: 'utf-8' codec can't decode byte 0x99 in position 10: invalid start byte 39 | ignore: | 40 | paws/secrets.yaml 41 | paws/codfw-secrets.yaml 42 | paws/files/minesweeper/secrets/ 43 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Yuvi Panda 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PAWS 2 | 3 | PAWS: A Web Shell (PAWS) is a Jupyter notebooks deployment that has been customized to make interacting with Wikimedia wikis easier. It allows users to create and share documents that contain live code, visualizations such as graphs, rich text, etc. The user created notebooks are a powerful tool that enables data analysis and scientific research, and also transforms the way in which programmers write code - by enabling an exploratory environment with a quick feedback loop, and a low barrier for entry through it's easy to use graphical interface. 4 | 5 | ## Contributing 6 | 7 | Bugs, issues and feature requests are found on [Wikimedia Foundation's Phabricator](https://phabricator.wikimedia.org/). 8 | There is a [workboard](https://phabricator.wikimedia.org/project/view/1648/) and a project tag of `#paws` to use for related work. You can reference code and commits from this repo at the Phabricator mirror of the code [here](https://phabricator.wikimedia.org/diffusion/PAWS/browse/main/), but please do not clone or try to use that mirror directly. 9 | 10 | To contribute to this project's code, please fork the repo on [GitHub](https://github.com/toolforge/paws/) and submit a pull request. 11 | 12 | If you have push access to the project, we ask that new changes be reviewed by one other 13 | project member by using either a feature branch on the https://github.com/toolforge/paws repo 14 | to trigger a pull request or using a fork to set up a pull request. 15 | 16 | ### Pull Requests and CI 17 | 18 | When a pull request is opened a few things are run automatically. Any container that was modified in /images will be built and pushed to quay.io. Your branch will be updated with an additional commit, updating the values.yaml file to point to the new image tags. And a linter will be run. These workflows, and their status, will be visible in the github pull request page. At this point you, or anyone else, will be able to pull down the branch in the PR and run it locally in minikube as described below. 19 | 20 | If your PR originates from a fork, please be sure "Allow edits and access to secrets by maintainers" is enabled such that the CI can function. Alternatively please manually edit the values.yaml to match the PR number for any containers that your code updates. 21 | 22 | ### Settings up a development environment 23 | 24 | It is possible to run a fully-functioning PAWS system inside [minikube](https://minikube.sigs.k8s.io/docs/)! You don't need 25 | access to the secrets.yaml file to do it either, since the defaults mostly support it. 26 | 27 | You will need to install minikube (tested on minikube v1.33.1) and [helm](https://helm.sh) and kubectl on your system. When you are confident those are working, start minikube with: 28 | - `minikube start --kubernetes-version=v1.27.8` 29 | - `minikube addons enable ingress` 30 | (from the top level of this repo): 31 | install the dependencies for the PAWS dev environment with these steps: 32 | - `helm repo add jupyterhub https://jupyterhub.github.io/helm-chart/` 33 | - `helm dep up paws/` 34 | - `kubectl create namespace paws-dev` 35 | - `helm -n paws-dev upgrade --install dev paws/ --timeout=50m` 36 | 37 | The rest of the setup instructions will display on screen as long as the install is successful. 38 | Please refer to the helm documentation from there. 39 | 40 | If you are experiencing issues with the installation, you can try changing the driver configuration in minikube: https://minikube.sigs.k8s.io/docs/drivers/ 41 | 42 | - First delete the current cluster: 43 | 44 | `minikube delete` 45 | 46 | - Start a new cluster with the driver you want to use (e.g. docker, virtualbox, hyperkit, etc.): 47 | 48 | `minikube start --driver=docker --kubernetes-version=v1.20.11` 49 | 50 | Another possible solution if minikube is acting weird might be to upgrade minikube, or even to 51 | increase the default memory: 52 | 53 | `minikube config set memory 4096` 54 | 55 | #### Working with images 56 | Keep in mind that opening a PR will, attempt to, build any image that has changed in the PR branch. This method is fine to build and test the resulting container. Though it is often easier to build, and rebuild, a container locally for testing. The following describes how to build and use a container locally. 57 | 58 | There are 8 images that are part of PAWS, in particular in the images/ directory. If you start a dev environment, it will pull those images from quay.io by default, just like in Wikimedia Cloud Services. If you are making changes to the images and testing those locally, you'll need to build them and tag them for your local environment, possibly setting them in your local values file with the tags you set. 59 | 60 | If you are using minikube, you need to make sure you are using minikube's docker, not your system's docker with `eval $(minikube docker-env)`. Now your docker commands will operate on the minikube environment. 61 | 62 | For example, let's say you wanted to update the singleuser image (which is the actual notebook server image): 63 | - `cd images/singleuser` 64 | - `docker build -t tag-you-are-going-to-use:whatever .` 65 | 66 | And then you should have the image with a tag of `tag-you-are-going-to-use:whatever` that you could edit into your values.yaml file for local helm work. 67 | ## Useful libraries 68 | ### Accessing Database Replicas With Pandas and Sqlalchemy 69 | 70 | Pandas is a lovely high level library for in-memory data manipulations. In order to get the result of a SQL query as a pandas dataframe use: 71 | ``` 72 | from sqlalchemy import create_engine 73 | import sys, os 74 | import pandas as pd 75 | 76 | constr = 'mysql+pymysql://{user}:{pwd}@{host}'.format(user=os.environ['MYSQL_USERNAME'], 77 | pwd=os.environ['MYSQL_PASSWORD'], 78 | host=os.environ['MYSQL_HOST']) 79 | con = create_engine(constr) 80 | 81 | df = pd.read_sql('select * from plwiki_p.logging limit 10', con) 82 | ``` 83 | 84 | ### Storage space 85 | Publishing space 86 | 87 | A notebook can be turned into a public notebook by publishing a link to it. This works as the notebook is made available in a read only mode. An example might be …revisions-sql.ipynb?kernel_name=python3. It could be wise to add the kernel name to the link, even if it isn't necessary in some cases. 88 | 89 | If you want to run the copy yourself, or do interactive changes, you must download the notebook and reupload on your own account. Downloading the raw format of the previous example can be done by adding format=raw to the previous example …revisions-sql.ipynb?format=raw. This download-reupload-process is somewhat awkward. 90 | 91 | Note that a notebook will always be published, as the link can be guessed, so don't add any private information. 92 | 93 | ### Running tests locally 94 | PAWS tests are managed through docker. You can find all of the tests in the tests directory. If you want to build and run them locally run: 95 | `docker build -f tests//Dockerfile -t test:01 . ; docker run test:01` 96 | Where is any of the directories in tests. For example: 97 | `docker build -f tests/tox/Dockerfile -t test:01 . ; docker run test:01` 98 | Would run tox. 99 | 100 | ### To know more about PAWS have a look at: 101 | https://wikitech.wikimedia.org/wiki/PAWS 102 | 103 | ### Comment to Phabricator 104 | To have a PR make comments to an associated phabricator ticket have the last line of the commit look like: 105 | 106 | Bug: 107 | 108 | For example: 109 | Bug: T318182 110 | 111 | ### Deployment ### 112 | ``` 113 | bash deploy.sh 114 | ``` 115 | 116 | update the web proxy in horizon to point to current cluster. 117 | 118 | https://wikitech.wikimedia.org/wiki/PAWS/Admin#Deployment 119 | 120 | #### Disaster Recovery 121 | If the entire project is removed two parts of paws are not managed by tofu/ansible. 122 | Object storage container: An object storage container named "tofu-state" will need to be generated in horizon. This is where the state file for tofu resides. 123 | NFS: The NFS server is not included. And a fresh NFS server will be needed for paws to operate. 124 | 125 | # backup prometheus 126 | see ansible/files/prometheus-data.sh for example of backup/restore 127 | -------------------------------------------------------------------------------- /ansible/ansible.cfg: -------------------------------------------------------------------------------- 1 | [defaults] 2 | # Better error output 3 | stdout_callback=debug 4 | stderr_callback=debug 5 | 6 | collections_path=./collections/ansible_collections 7 | 8 | # we're only using localhost, no need for the warning. 9 | localhost_warning=False 10 | 11 | [inventory] 12 | # Only using localhost, so no inventory 13 | inventory_unparsed_warning=False 14 | -------------------------------------------------------------------------------- /ansible/files/csi-secret-cinderplugin.yaml.codfw1dev.crypt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/toolforge/paws/9c42a38a368b3ad460fd122e60851c67335c3ed2/ansible/files/csi-secret-cinderplugin.yaml.codfw1dev.crypt -------------------------------------------------------------------------------- /ansible/files/csi-secret-cinderplugin.yaml.eqiad1.crypt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/toolforge/paws/9c42a38a368b3ad460fd122e60851c67335c3ed2/ansible/files/csi-secret-cinderplugin.yaml.eqiad1.crypt -------------------------------------------------------------------------------- /ansible/files/sc.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | apiVersion: storage.k8s.io/v1 4 | kind: StorageClass 5 | metadata: 6 | name: standard 7 | annotations: 8 | storageclass.kubernetes.io/is-default-class: "true" 9 | provisioner: cinder.csi.openstack.org 10 | parameters: 11 | availability: nova 12 | -------------------------------------------------------------------------------- /ansible/paws.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | - name: Deploy PAWS to k8s cluster 4 | gather_facts: false 5 | hosts: localhost 6 | tasks: 7 | - name: include env vars 8 | include_vars: vars/{{ datacenter }}.yaml 9 | 10 | - name: Temporary directory for cloud provider 11 | ansible.builtin.tempfile: 12 | state: directory 13 | suffix: paws 14 | register: paws_dir 15 | changed_when: False 16 | 17 | - name: Clone cloud-provider-openstack 18 | ansible.builtin.git: 19 | repo: https://github.com/kubernetes/cloud-provider-openstack.git 20 | dest: "{{ paws_dir.path }}" 21 | version: v1.26.3 22 | changed_when: False 23 | 24 | - name: Copy file with owner and permissions 25 | ansible.builtin.copy: 26 | src: files/csi-secret-cinderplugin.yaml.{{ datacenter }}.crypt 27 | dest: "{{ paws_dir.path }}/manifests/cinder-csi-plugin/csi-secret-cinderplugin.yaml" 28 | changed_when: False 29 | 30 | - name: install the things! 31 | kubernetes.core.k8s: 32 | state: present 33 | src: "{{ paws_dir.path }}/manifests/cinder-csi-plugin/{{ item }}" 34 | loop: 35 | - "csi-secret-cinderplugin.yaml" 36 | - "cinder-csi-controllerplugin-rbac.yaml" 37 | - "cinder-csi-controllerplugin.yaml" 38 | - "cinder-csi-nodeplugin-rbac.yaml" 39 | - "cinder-csi-nodeplugin.yaml" 40 | - "csi-cinder-driver.yaml" 41 | 42 | - name: Delete temporary directory 43 | ansible.builtin.file: 44 | state: absent 45 | path: "{{ paws_dir.path }}" 46 | changed_when: False 47 | 48 | - name: and sc.yaml 49 | kubernetes.core.k8s: 50 | state: present 51 | src: "files/sc.yaml" 52 | 53 | - name: Deploy ingress-nginx 54 | kubernetes.core.helm: 55 | name: ingress-nginx 56 | chart_ref: ingress-nginx 57 | chart_repo_url: https://kubernetes.github.io/ingress-nginx 58 | chart_version: 4.12.1 59 | release_namespace: ingress-nginx 60 | create_namespace: true 61 | set_values: 62 | - value: controller.service.type=NodePort 63 | value_type: string 64 | - value: controller.service.enableHttps=false 65 | value_type: string 66 | - value: controller.service.nodePorts.http=30001 67 | value_type: string 68 | - value: controller.config.proxy-body-size=4m 69 | value_type: string 70 | - value: controller.config.allow-snippet-annotations=true 71 | value_type: string 72 | 73 | - name: Add jupyterhub chart repo 74 | kubernetes.core.helm_repository: 75 | name: jupyterhub 76 | repo_url: "https://jupyterhub.github.io/helm-chart/" 77 | 78 | - name: Deploy paws 79 | kubernetes.core.helm: 80 | name: paws 81 | chart_ref: ../paws 82 | release_namespace: "{{ namespace }}" 83 | create_namespace: true 84 | values_files: 85 | - ../paws/{{ secret_file }}.yaml 86 | - ../paws/{{ env_file }}.yaml 87 | timeout: '50m' 88 | dependency_update: true 89 | 90 | - name: Create metrics namespace for prometheus 91 | kubernetes.core.k8s: 92 | name: metrics 93 | kind: Namespace 94 | state: present 95 | 96 | - name: Prometheus 97 | kubernetes.core.helm: 98 | name: prometheus 99 | chart_ref: prometheus 100 | chart_repo_url: https://prometheus-community.github.io/helm-charts 101 | chart_version: 25.26.0 102 | release_namespace: metrics 103 | create_namespace: true 104 | set_values: 105 | - value: prometheus.retention=30d 106 | value_type: string 107 | 108 | - name: Ingress for prometheus 109 | kubernetes.core.k8s: 110 | state: present 111 | template: "templates/prometheus-ingress.yaml.j2" 112 | namespace: metrics 113 | -------------------------------------------------------------------------------- /ansible/templates/prometheus-ingress.yaml.j2: -------------------------------------------------------------------------------- 1 | apiVersion: networking.k8s.io/v1 2 | kind: Ingress 3 | metadata: 4 | name: prometheus 5 | spec: 6 | ingressClassName: nginx 7 | rules: 8 | {% if datacenter == 'codfw1dev' %} 9 | - host: prometheus-paws.codfw1dev.wmcloud.org 10 | {% else %} 11 | - host: prometheus-paws.wmcloud.org 12 | {% endif %} 13 | http: 14 | paths: 15 | - backend: 16 | service: 17 | name: prometheus-server 18 | port: 19 | number: 80 20 | path: / 21 | pathType: Prefix 22 | -------------------------------------------------------------------------------- /ansible/vars/codfw1dev.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | secret_file: 'codfw-secrets' 4 | namespace: 'codfw1dev' 5 | env_file: 'codfw' 6 | -------------------------------------------------------------------------------- /ansible/vars/eqiad1.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | secret_file: 'secrets' 4 | namespace: 'prod' 5 | env_file: 'production' 6 | -------------------------------------------------------------------------------- /build.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | import os 3 | import argparse 4 | import subprocess 5 | 6 | 7 | def last_git_modified(path, n=1): 8 | return ( 9 | subprocess.check_output( 10 | ["git", "log", "-n", str(n), "--pretty=format:%h", path] 11 | ) 12 | .decode("utf-8") 13 | .split("\n")[-1] 14 | ) 15 | 16 | 17 | def image_touched(image, commit_range): 18 | return ( 19 | subprocess.check_output( 20 | [ 21 | "git", 22 | "diff", 23 | "--name-only", 24 | commit_range, 25 | os.path.join("images", image), 26 | ] 27 | ) 28 | .decode("utf-8") 29 | .strip() 30 | != "" 31 | ) 32 | 33 | 34 | def build_images(prefix, images, commit_range=None, push=False): 35 | for image in images: 36 | if commit_range: 37 | if not image_touched(image, commit_range): 38 | print( 39 | "Skipping {}, not touched in {}".format(image, commit_range) 40 | ) 41 | continue 42 | 43 | # Pull last available version of image to maximize cache use 44 | try_count = 0 45 | while try_count < 50: 46 | last_image_tag = last_git_modified( 47 | os.path.join("images", image), try_count + 2 48 | ) 49 | last_image_spec = "{}{}:{}".format(prefix, image, last_image_tag) 50 | try: 51 | subprocess.check_call(["docker", "pull", last_image_spec]) 52 | break 53 | except subprocess.CalledProcessError: 54 | try_count += 1 55 | pass 56 | image_path = os.path.join("images", image) 57 | tag = last_git_modified(image_path) 58 | image_spec = "{}{}:{}".format(prefix, image, tag) 59 | 60 | subprocess.check_call( 61 | [ 62 | "docker", 63 | "build", 64 | "-t", 65 | image_spec, 66 | "--cache-from", 67 | last_image_spec, 68 | image_path, 69 | ] 70 | ) 71 | if push: 72 | subprocess.check_call(["docker", "push", image_spec]) 73 | 74 | 75 | def deploy(prefix, images, release, install): 76 | image_map = { 77 | "paws-hub": "jupyterhub.hub.image", 78 | "singleuser": "jupyterhub.singleuser.image", 79 | } 80 | 81 | args = [] 82 | 83 | # Set up helm! 84 | subprocess.check_call(["helm", "init", "--client-only"]) 85 | subprocess.check_call( 86 | [ 87 | "helm", 88 | "repo", 89 | "add", 90 | "jupyterhub", 91 | "https://jupyterhub.github.io/helm-chart", 92 | ] 93 | ) 94 | subprocess.check_call(["helm", "dep", "up"], cwd="paws") 95 | 96 | for image in images: 97 | image_path = os.path.join("images", image) 98 | image_name = prefix + image 99 | tag = last_git_modified(image_path) 100 | args.append("--set={}.name={}".format(image_map[image], image_name)) 101 | args.append("--set={}.tag={}".format(image_map[image], tag)) 102 | 103 | if install: 104 | helm = [ 105 | "helm", 106 | "install", 107 | "--name", 108 | release, 109 | "--namespace", 110 | release, 111 | "paws/", 112 | "-f", 113 | "paws/secrets.yaml", 114 | ] 115 | else: 116 | helm = ["helm", "upgrade", release, "paws/", "-f", "paws/secrets.yaml"] 117 | 118 | subprocess.check_call(helm + args) 119 | 120 | 121 | def main(): 122 | argparser = argparse.ArgumentParser() 123 | argparser.add_argument( 124 | "--image-prefix", default="quay.io/wikimedia-paws-prod/" 125 | ) 126 | subparsers = argparser.add_subparsers(dest="action") 127 | 128 | build_parser = subparsers.add_parser( 129 | "build", description="Build & Push images" 130 | ) 131 | build_parser.add_argument( 132 | "--commit-range", 133 | help="Range of commits to consider when building images", 134 | ) 135 | build_parser.add_argument("--push", action="store_true") 136 | 137 | deploy_parser = subparsers.add_parser( 138 | "deploy", description="Deploy with helm" 139 | ) 140 | deploy_parser.add_argument("release", default="prod") 141 | deploy_parser.add_argument("--install", action="store_true") 142 | 143 | args = argparser.parse_args() 144 | 145 | images = ["paws-hub", "singleuser"] 146 | if args.action == "build": 147 | build_images(args.image_prefix, images, args.commit_range, args.push) 148 | else: 149 | deploy(args.image_prefix, images, args.release, args.install) 150 | 151 | 152 | main() 153 | -------------------------------------------------------------------------------- /deploy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | if [ "${1}" = 'eqiad1' ] 6 | then 7 | datacenter=${1} 8 | elif [ "${1}" = 'codfw1dev' ] 9 | then 10 | datacenter=${1} 11 | else 12 | echo "Please enter datacenter." 13 | echo "Usage:" 14 | echo "${0} " 15 | exit 16 | fi 17 | 18 | if [ -n "${2}" ] 19 | then 20 | if [ "${2}" = 'tofu' ] 21 | then 22 | # exit after tofu 23 | tofuonly=1 24 | fi 25 | fi 26 | 27 | 28 | if ! command -v kubectl ; then 29 | echo "please install kubectl" 30 | exit 1 31 | fi 32 | 33 | if ! command -v helm ; then 34 | echo "please install helm" 35 | exit 1 36 | fi 37 | 38 | if ! command -v tofu ; then 39 | echo "please install tofu" 40 | exit 1 41 | fi 42 | 43 | source secrets-${datacenter}.sh 44 | 45 | python3 -m venv .venv/deploy 46 | source .venv/deploy/bin/activate 47 | pip install ansible==10.3.0 kubernetes==26.1.0 48 | # install helm diff. Needed to keep helm module idempotent 49 | helm plugin install https://github.com/databus23/helm-diff || true 50 | 51 | cd tofu 52 | AWS_ACCESS_KEY_ID=${ACCESS_KEY} AWS_SECRET_ACCESS_KEY=${SECRET_KEY} tofu init -backend-config=${datacenter}-backend.conf 53 | AWS_ACCESS_KEY_ID=${ACCESS_KEY} AWS_SECRET_ACCESS_KEY=${SECRET_KEY} tofu apply -var datacenter=${datacenter} 54 | export KUBECONFIG=$(pwd)/kube.config 55 | 56 | if [ "${tofuonly}" = '1' ] 57 | then 58 | exit 59 | fi 60 | 61 | cd ../ansible 62 | # install collections here to take advantage of ansible.cfg configs 63 | ansible-galaxy collection install -U kubernetes.core -p ./collections 64 | 65 | ansible-playbook paws.yaml --extra-vars "datacenter=${datacenter}" 66 | -------------------------------------------------------------------------------- /images/jobber/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM alpine:3.20.0 2 | 3 | RUN apk --no-cache add curl jq 4 | 5 | WORKDIR /opt/mediawiki 6 | -------------------------------------------------------------------------------- /images/jobber/README.md: -------------------------------------------------------------------------------- 1 | This container is used to correct the permissions in the host volume on the minikube dev env. 2 | 3 | More info in: 4 | paws/templates/localdev.yaml 5 | -------------------------------------------------------------------------------- /images/minesweeper/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM alpine:3.20.0 2 | 3 | RUN apk add --no-cache procps python3 py3-pip py3-psutil 4 | RUN python3 -mpip install --break-system-packages --no-cache --upgrade pip 5 | COPY requirements.txt /tmp/requirements.txt 6 | RUN python3 -mpip install --break-system-packages --no-cache -r /tmp/requirements.txt 7 | 8 | # always! 9 | ENV PYTHONUNBUFFERED=1 10 | -------------------------------------------------------------------------------- /images/minesweeper/LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2017, JupyterHub 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | * Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /images/minesweeper/README.md: -------------------------------------------------------------------------------- 1 | # minesweeper docker image 2 | 3 | docker image with basic dependencies for admin tasks on a kubernetes cluster 4 | (ps, python, python-psutil, python-kubernetes) 5 | -------------------------------------------------------------------------------- /images/minesweeper/requirements.in: -------------------------------------------------------------------------------- 1 | kubernetes 2 | -------------------------------------------------------------------------------- /images/minesweeper/requirements.txt: -------------------------------------------------------------------------------- 1 | # 2 | # This file is autogenerated by pip-compile with Python 3.9 3 | # by the following command: 4 | # 5 | # pip-compile --output-file=./requirements.txt ./requirements.in 6 | # 7 | cachetools==5.3.1 8 | # via google-auth 9 | certifi==2024.7.4 10 | # via 11 | # kubernetes 12 | # requests 13 | charset-normalizer==3.2.0 14 | # via requests 15 | google-auth==2.22.0 16 | # via kubernetes 17 | idna==3.7 18 | # via requests 19 | kubernetes==27.2.0 20 | # via -r requirements.in 21 | oauthlib==3.2.2 22 | # via 23 | # kubernetes 24 | # requests-oauthlib 25 | pyasn1==0.5.0 26 | # via 27 | # pyasn1-modules 28 | # rsa 29 | pyasn1-modules==0.3.0 30 | # via google-auth 31 | python-dateutil==2.8.2 32 | # via kubernetes 33 | pyyaml==6.0.1 34 | # via kubernetes 35 | requests==2.32.0 36 | # via 37 | # kubernetes 38 | # requests-oauthlib 39 | requests-oauthlib==1.3.1 40 | # via kubernetes 41 | rsa==4.9 42 | # via google-auth 43 | six==1.16.0 44 | # via 45 | # google-auth 46 | # kubernetes 47 | # python-dateutil 48 | urllib3==1.26.19 49 | # via 50 | # google-auth 51 | # kubernetes 52 | # requests 53 | websocket-client==1.6.1 54 | # via kubernetes 55 | -------------------------------------------------------------------------------- /images/nbserve/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:24.04 2 | 3 | ENV DEBIAN_FRONTEND=noninteractive 4 | RUN apt-get update \ 5 | && apt-get install -y --no-install-recommends \ 6 | build-essential \ 7 | curl \ 8 | libgd-dev \ 9 | libpcre3-dev \ 10 | libssl-dev \ 11 | luarocks \ 12 | make \ 13 | perl \ 14 | unzip \ 15 | ca-certificates \ 16 | git \ 17 | libxml2-dev \ 18 | libxslt1-dev \ 19 | python3 \ 20 | && apt-get clean \ 21 | && rm -rf /var/lib/apt/lists/* 22 | 23 | ARG RESTY_VERSION="1.25.3.1" 24 | ARG RESTY_J="1" 25 | # ARG RESTY_OPENSSL_VERSION="1.1.1d" 26 | ARG RESTY_CONFIG_OPTIONS="\ 27 | --with-compat \ 28 | --with-file-aio \ 29 | --with-http_addition_module \ 30 | --with-http_auth_request_module \ 31 | --with-http_dav_module \ 32 | --with-http_flv_module \ 33 | --with-http_gunzip_module \ 34 | --with-http_gzip_static_module \ 35 | --with-http_image_filter_module=dynamic \ 36 | --with-http_mp4_module \ 37 | --with-http_random_index_module \ 38 | --with-http_realip_module \ 39 | --with-http_secure_link_module \ 40 | --with-http_slice_module \ 41 | --with-http_ssl_module \ 42 | --with-http_stub_status_module \ 43 | --with-http_sub_module \ 44 | --with-http_v2_module \ 45 | --with-http_xslt_module=dynamic \ 46 | --with-ipv6 \ 47 | --with-mail \ 48 | --with-mail_ssl_module \ 49 | --with-md5-asm \ 50 | --with-pcre-jit \ 51 | --with-sha1-asm \ 52 | --with-stream \ 53 | --with-stream_ssl_module \ 54 | --with-threads \ 55 | --add-module=./ngx-fancyindex \ 56 | " 57 | 58 | RUN cd /tmp \ 59 | && curl -fSL https://openresty.org/download/openresty-${RESTY_VERSION}.tar.gz -o openresty-${RESTY_VERSION}.tar.gz \ 60 | && tar xzf openresty-${RESTY_VERSION}.tar.gz \ 61 | && cd /tmp/openresty-${RESTY_VERSION} \ 62 | && git clone https://github.com/aperezdc/ngx-fancyindex \ 63 | && eval ./configure -j${RESTY_J} ${RESTY_CONFIG_OPTIONS} \ 64 | && make -j${RESTY_J} \ 65 | && make -j${RESTY_J} install \ 66 | && cd /tmp \ 67 | && rm -rf \ 68 | openresty-${RESTY_VERSION}.tar.gz openresty-${RESTY_VERSION} \ 69 | && mkdir -p /var/run/openresty 70 | 71 | RUN ln -sf /dev/stdout /usr/local/openresty/nginx/logs/access.log 72 | RUN ln -sf /dev/stderr /usr/local/openresty/nginx/logs/error.log 73 | 74 | # Add additional binaries into PATH for convenience 75 | ENV PATH=$PATH:/usr/local/openresty/luajit/bin:/usr/local/openresty/nginx/sbin:/usr/local/openresty/bin 76 | 77 | RUN luarocks install lua-resty-http 78 | RUN luarocks install lua-cjson 79 | 80 | RUN apt-get clean \ 81 | && rm -rf /var/lib/apt/lists/* 82 | 83 | EXPOSE 8000 84 | 85 | ADD robots.txt /var/www/robots.txt 86 | 87 | CMD ["/usr/local/openresty/bin/openresty", "-c", "/mnt/nginx.conf"] 88 | 89 | # Use SIGQUIT instead of default SIGTERM to cleanly drain requests 90 | # See https://github.com/openresty/docker-openresty/blob/master/README.md#tips--pitfalls 91 | STOPSIGNAL SIGQUIT 92 | -------------------------------------------------------------------------------- /images/nbserve/README.md: -------------------------------------------------------------------------------- 1 | This container is used for managing the public-paws.wmcloud.org traffic. So public viewing of notebooks and files. 2 | 3 | It also redirects files to the renderer container to be built. Also for public viewing. 4 | -------------------------------------------------------------------------------- /images/nbserve/robots.txt: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: / 3 | -------------------------------------------------------------------------------- /images/paws-hub/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM jupyterhub/k8s-hub:3.3.8 2 | ARG NB_USER=tools.paws 3 | ARG NB_UID=52771 4 | ARG HOME=/home/paws 5 | USER root 6 | RUN adduser --disabled-password \ 7 | --gecos "Default user" \ 8 | --uid ${NB_UID} \ 9 | --home ${HOME} \ 10 | --force-badname \ 11 | ${NB_USER} 12 | 13 | COPY PAWS.svg /srv/jupyterhub 14 | COPY paws-favicon.ico /usr/local/share/jupyterhub/static/favicon.ico 15 | 16 | RUN chown -R ${NB_USER}:${NB_USER} /srv/jupyterhub 17 | USER ${NB_USER} 18 | 19 | CMD ["jupyterhub", "--config", "/srv/jupyterhub_config.py"] 20 | -------------------------------------------------------------------------------- /images/paws-hub/PAWS.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 9 | 10 | 11 | 12 | 13 | 14 | 16 | 28 | 29 | 32 | 33 | 34 | 35 | 36 | 39 | 41 | 43 | 50 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /images/paws-hub/README.md: -------------------------------------------------------------------------------- 1 | This is the jupyterhub image with a few branding changes for PAWS. 2 | 3 | The image is built for production by a github action. It can be built locally with: 4 | docker build . 5 | from this directory. 6 | 7 | The image itself would be deployed into PAWS through helm. Defined in the values.yaml file under the jupyterhub.hub definition. 8 | -------------------------------------------------------------------------------- /images/paws-hub/paws-favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/toolforge/paws/9c42a38a368b3ad460fd122e60851c67335c3ed2/images/paws-hub/paws-favicon.ico -------------------------------------------------------------------------------- /images/renderer/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:24.04 2 | 3 | RUN apt-get update \ 4 | && DEBIAN_FRONTEND=noninteractive apt-get install -y \ 5 | python3-pip \ 6 | python3-setuptools \ 7 | python3-wheel \ 8 | uwsgi \ 9 | uwsgi-plugin-python3 \ 10 | && apt-get clean \ 11 | && rm -rf /var/lib/apt/lists/* 12 | 13 | COPY requirements.txt /tmp/requirements.txt 14 | 15 | # We don't care about breaking system packages since this is a container 16 | RUN pip3 --no-cache-dir install --break-system-packages -r /tmp/requirements.txt 17 | 18 | # something about the --no-cache-dir keeps pyrsistent from visibly installing 19 | RUN pip3 install --break-system-packages pyrsistent 20 | 21 | COPY renderer.py /srv/renderer.py 22 | COPY basic.tpl /srv/basic.tpl 23 | COPY full.tpl /srv/full.tpl 24 | 25 | WORKDIR /srv 26 | 27 | CMD /usr/bin/uwsgi \ 28 | --plugins python3 \ 29 | --socket 0.0.0.0:8000 \ 30 | --wsgi-file /srv/renderer.py \ 31 | --master \ 32 | --processes 4 \ 33 | --die-on-term 34 | -------------------------------------------------------------------------------- /images/renderer/README.md: -------------------------------------------------------------------------------- 1 | This container manages the rendering of several file types. rst, md, and ipynb 2 | 3 | The files are sent to this container from the nbserve container here: 4 | https://github.com/toolforge/paws/blob/41f03a544041318f1fad479b32ae46ac9e816a55/images/nbserve/nginx.py#L101 5 | -------------------------------------------------------------------------------- /images/renderer/basic.tpl: -------------------------------------------------------------------------------- 1 | {%- extends 'display_priority.tpl' -%} 2 | 3 | 4 | {% block codecell %} 5 |
6 | {{ super() }} 7 |
8 | {%- endblock codecell %} 9 | 10 | {% block input_group -%} 11 |
12 | {{ super() }} 13 |
14 | {% endblock input_group %} 15 | 16 | {% block output_group %} 17 |
18 |
19 | {{ super() }} 20 |
21 |
22 | {% endblock output_group %} 23 | 24 | {% block in_prompt -%} 25 | {%- endblock in_prompt %} 26 | 27 | {% block empty_in_prompt -%} 28 | {%- endblock empty_in_prompt %} 29 | 30 | {# 31 | output_prompt doesn't do anything in HTML, 32 | because there is a prompt div in each output area (see output block) 33 | #} 34 | {% block output_prompt %} 35 | {% endblock output_prompt %} 36 | 37 | {% block input %} 38 |
39 |
40 | {{ cell.source | highlight_code(metadata=cell.metadata) }} 41 |
42 |
43 | {%- endblock input %} 44 | 45 | {% block output %} 46 |
47 | {{ super() }} 48 |
49 | {% endblock output %} 50 | 51 | {% block markdowncell scoped %} 52 |
53 | {{ self.empty_in_prompt() }} 54 |
55 |
56 | {{ cell.source | markdown2html | strip_files_prefix }} 57 |
58 |
59 |
60 | {%- endblock markdowncell %} 61 | 62 | {% block unknowncell scoped %} 63 | unknown type {{ cell.type }} 64 | {% endblock unknowncell %} 65 | 66 | {% block execute_result -%} 67 | {%- set extra_class="output_execute_result" -%} 68 | {% block data_priority scoped %} 69 | {{ super() }} 70 | {% endblock %} 71 | {%- set extra_class="" -%} 72 | {%- endblock execute_result %} 73 | 74 | {% block stream_stdout -%} 75 |
76 |
 77 | {{- output.text | ansi2html -}}
 78 | 
79 |
80 | {%- endblock stream_stdout %} 81 | 82 | {% block stream_stderr -%} 83 |
84 |
 85 | {{- output.text | ansi2html -}}
 86 | 
87 |
88 | {%- endblock stream_stderr %} 89 | 90 | {% block data_svg scoped -%} 91 |
92 | {%- if output.svg_filename %} 93 | 98 | {%- endblock data_svg %} 99 | 100 | {% block data_html scoped -%} 101 |
102 | {{ output.data['text/html'] }} 103 |
104 | {%- endblock data_html %} 105 | 106 | {% block data_markdown scoped -%} 107 |
108 | {{ output.data['text/markdown'] | markdown2html }} 109 |
110 | {%- endblock data_markdown %} 111 | 112 | {% block data_png scoped %} 113 |
114 | {%- if 'image/png' in output.metadata.get('filenames', {}) %} 115 | 131 |
132 | {%- endblock data_png %} 133 | 134 | {% block data_jpg scoped %} 135 |
136 | {%- if 'image/jpeg' in output.metadata.get('filenames', {}) %} 137 | 153 |
154 | {%- endblock data_jpg %} 155 | 156 | {% block data_latex scoped %} 157 |
158 | {{ output.data['text/latex'] }} 159 |
160 | {%- endblock data_latex %} 161 | 162 | {% block error -%} 163 |
164 |
165 | {{- super() -}}
166 | 
167 |
168 | {%- endblock error %} 169 | 170 | {%- block traceback_line %} 171 | {{ line | ansi2html }} 172 | {%- endblock traceback_line %} 173 | 174 | {%- block data_text scoped %} 175 |
176 |
177 | {{- output.data['text/plain'] | ansi2html -}}
178 | 
179 |
180 | {%- endblock -%} 181 | 182 | {%- block data_javascript scoped %} 183 | {% set div_id = uuid4() %} 184 |
185 |
186 | 190 |
191 | {%- endblock -%} 192 | -------------------------------------------------------------------------------- /images/renderer/full.tpl: -------------------------------------------------------------------------------- 1 | {%- extends 'basic.tpl' -%} 2 | {% from 'mathjax.tpl' import mathjax %} 3 | 4 | 5 | {%- block header -%} 6 | 7 | 8 | 9 | {%- block html_head -%} 10 | 11 | {{resources['metadata']['name']}} 12 | 13 | 14 | 15 | 16 | {% for css in resources.inlining.css -%} 17 | 20 | {% endfor %} 21 | 22 | 53 | 54 | 55 | 56 | 57 | 58 | {{ mathjax('https://tools-static.wmflabs.org/cdnjs/ajax/libs/mathjax/2.6.1/MathJax.js?config=TeX-AMS_HTML') }} 59 | {%- endblock html_head -%} 60 | 61 | {%- endblock header -%} 62 | 63 | {% block body %} 64 | 65 |
66 |
67 | {{ super() }} 68 |
69 |
70 | 71 | {%- endblock body %} 72 | 73 | {% block footer %} 74 | 75 | {% endblock footer %} 76 | -------------------------------------------------------------------------------- /images/renderer/renderer.py: -------------------------------------------------------------------------------- 1 | from werkzeug.wrappers import Request, Response 2 | from nbconvert.exporters import HTMLExporter 3 | 4 | import os 5 | import markdown 6 | import docutils.core 7 | 8 | BASE_PATH = os.environ["BASE_PATH"] 9 | URL_PREFIX = os.environ["URL_PREFIX"] 10 | 11 | 12 | def get_extension(path, format): 13 | """ 14 | Return the extension of the path, if any 15 | """ 16 | splits = path.split(".") 17 | if len(splits) == 1: 18 | # This means there's no two parts - so either no ., or nothing before 19 | # or after the .. Easier to handle by just saying we found no extensions 20 | return "" 21 | return splits[-1] 22 | 23 | 24 | def render_ipynb(full_path, format): 25 | """ 26 | Render a given ipynb file 27 | """ 28 | exporter = HTMLExporter() 29 | with open(full_path, encoding="utf-8") as file_handle: 30 | html, _ = exporter.from_file(file_handle) 31 | return Response(html, mimetype="text/html") 32 | 33 | 34 | def render_md(full_path, format): 35 | """ 36 | Render a given markdown file 37 | """ 38 | with open(full_path, encoding="utf-8") as file_handle: 39 | text = file_handle.read() 40 | html = markdown.markdown(text) 41 | return Response(html, mimetype="text/html") 42 | 43 | 44 | def render_rst(full_path, format): 45 | """ 46 | Render a given reStructuredText file 47 | """ 48 | with open(full_path, encoding="utf-8") as file_handle: 49 | text = file_handle.read() 50 | html = docutils.core.publish_string(source=text, writer_name="html") 51 | return Response(html, mimetype="text/html") 52 | 53 | 54 | # Map of extensions to functions to call for handling them 55 | handlers = { 56 | "rst": render_rst, 57 | "md": render_md, 58 | "ipynb": render_ipynb, 59 | } 60 | 61 | 62 | @Request.application 63 | def application(request): 64 | file_path = request.path.lstrip(URL_PREFIX) 65 | full_path = os.path.join(BASE_PATH, file_path) 66 | # Protect against path traversal attacks, if they make it this far. 67 | if not full_path.startswith(BASE_PATH): 68 | # DANGER! 69 | return Response("Suspicious url", status=403) 70 | format = request.args.get("format", None) 71 | if format == "raw": 72 | # Let nginx serve raw files 73 | accel_path = os.path.join("/accelredir/", file_path) 74 | return Response("", headers={"X-Accel-Redirect": accel_path}) 75 | 76 | try: 77 | extension = get_extension(full_path, format) 78 | if extension and extension in handlers: 79 | return handlers[extension](full_path, format) 80 | else: 81 | return Response("No handlers for format %s" % extension, status=400) 82 | except FileNotFoundError: 83 | return Response("Not found", status=404) 84 | return Response(full_path) 85 | 86 | 87 | if __name__ == "__main__": 88 | from werkzeug.serving import run_simple 89 | 90 | run_simple("localhost", 4000, application) 91 | -------------------------------------------------------------------------------- /images/renderer/requirements.txt: -------------------------------------------------------------------------------- 1 | nbconvert 2 | ipython 3 | werkzeug 4 | markdown 5 | docutils 6 | -------------------------------------------------------------------------------- /images/singleuser/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:24.04 2 | 3 | ENV PYWIKIBOT_VERSION=9.6.3 4 | ENV EDITOR=/bin/nano 5 | ENV PYWIKIBOT_DIR=/srv/paws 6 | ENV DEBIAN_FRONTEND=noninteractive 7 | 8 | ## Begin minimal setup ## 9 | # Use bash as default shell, rather than sh 10 | ENV SHELL /bin/bash 11 | 12 | # Set up user 13 | ENV NB_USER tools.paws 14 | ENV NB_UID 52771 15 | ENV HOME /home/paws 16 | 17 | RUN useradd \ 18 | --uid ${NB_UID} \ 19 | --home-dir ${HOME} \ 20 | ${NB_USER} 21 | WORKDIR ${HOME} 22 | 23 | RUN apt-get update && \ 24 | apt-get install --yes \ 25 | python3-venv \ 26 | pip \ 27 | python3 28 | 29 | ENV LC_ALL en_US.UTF-8 30 | ENV LANG en_US.UTF-8 31 | ENV LANGUAGE en_US.UTF-8 32 | 33 | # Create venv directory, and let users install into it 34 | ENV VENV_DIR /srv/paws 35 | RUN install -d -o ${NB_USER} -g ${NB_USER} ${VENV_DIR} 36 | 37 | ENV PATH=/srv/paws/pwb:/srv/paws/bin:/srv/paws:$PATH 38 | 39 | USER ${NB_USER} 40 | RUN python3 -m venv /srv/paws 41 | RUN pip --no-cache-dir install -U pip setuptools wheel 42 | 43 | # Install base notebook packages 44 | RUN pip install --prefix=/srv/paws --no-cache-dir \ 45 | jupyterhub==4.1.1 \ 46 | jupyterlab==4.4.0 47 | 48 | ## End minimal setup ## 49 | 50 | USER root 51 | 52 | # Base building utilities that'll always be required, probably 53 | RUN apt-get update && \ 54 | apt-get install --yes \ 55 | git \ 56 | locales \ 57 | pkg-config \ 58 | build-essential \ 59 | gcc \ 60 | apt-transport-https 61 | 62 | RUN apt-get update --yes && \ 63 | apt-get install --yes \ 64 | python3-dev \ 65 | openjdk-11-jdk 66 | 67 | # Utilities 68 | RUN apt-get install --yes \ 69 | curl \ 70 | wget \ 71 | less \ 72 | dnsutils \ 73 | emacs \ 74 | links \ 75 | nano \ 76 | vim \ 77 | lsof \ 78 | mariadb-client \ 79 | unrar 80 | 81 | RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - 82 | RUN apt-get install -y nodejs 83 | 84 | # pyaudio 85 | RUN apt-get install --yes \ 86 | portaudio19-dev 87 | 88 | RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && \ 89 | locale-gen 90 | 91 | ## Install R ## 92 | # Use newer version of R 93 | # Binary packages from packagemanager.rstudio.com work against this. 94 | # Base R from Focal is only 3.6. 95 | ADD r/cran.gpg /etc/apt/trusted.gpg.d/cran.gpg 96 | ADD r/cran.list /etc/apt/sources.list.d/cran.list 97 | 98 | # Install languages needed and their core dev packages 99 | RUN apt-get update --yes && \ 100 | apt-get install --yes \ 101 | r-recommended \ 102 | r-base-dev \ 103 | r-cran-littler \ 104 | git \ 105 | curl \ 106 | gdebi \ 107 | # For R's mysql 108 | libmariadb-dev \ 109 | # For R's curl 110 | libcurl4-openssl-dev \ 111 | # for ipython kernels 112 | libzmq3-dev \ 113 | # For R's devtools 114 | libssl-dev 115 | 116 | # Install rstudio-server 117 | ENV RSTUDIO_SERVER_URL https://download2.rstudio.org/server/jammy/amd64/rstudio-server-2024.12.0-467-amd64.deb 118 | RUN curl --silent --location --fail ${RSTUDIO_SERVER_URL} > /tmp/rstudio-server.deb 119 | RUN gdebi -n /tmp/rstudio-server.deb && rm /tmp/rstudio-server.deb 120 | 121 | 122 | # Create user owned R libs dir 123 | # This lets users temporarily install packages 124 | ENV R_LIBS_USER /srv/r 125 | RUN install -d -o ${NB_USER} -g ${NB_USER} ${R_LIBS_USER} 126 | 127 | # R_LIBS_USER is set by default in /etc/R/Renviron, which RStudio loads. 128 | # We uncomment the default, and set what we wanna - so it picks up 129 | # the packages we install. Without this, RStudio doesn't see the packages 130 | # that R does. 131 | # Stolen from https://github.com/jupyterhub/repo2docker/blob/6a07a48b2df48168685bb0f993d2a12bd86e23bf/repo2docker/buildpacks/r.py 132 | RUN sed -i -e '/^R_LIBS_USER=/s/^/#/' /etc/R/Renviron && \ 133 | echo "R_LIBS_USER=${R_LIBS_USER}" >> /etc/R/Renviron 134 | 135 | USER ${NB_USER} 136 | RUN pip install --no-cache-dir \ 137 | jupyter-server-proxy \ 138 | jupyter-rsession-proxy 139 | 140 | # Set CRAN mirror to rspm before we install anything 141 | COPY r/Rprofile.site /usr/lib/R/etc/Rprofile.site 142 | # RStudio needs its own config 143 | COPY r/rsession.conf /etc/rstudio/rsession.conf 144 | 145 | USER root 146 | # Install the R Kernel 147 | RUN r -e "install.packages('IRkernel', version='1.3.2')" && \ 148 | r -e "IRkernel::installspec(prefix='${VENV_DIR}')" && \ 149 | rm -rf /tmp/downloaded_packages 150 | 151 | ## Done installing R 152 | 153 | USER root 154 | 155 | ## Setup OpenRefine 156 | ENV OPENREFINE_DIR /srv/openrefine 157 | ENV PATH=$PATH:$OPENREFINE_DIR 158 | RUN mkdir -p ${OPENREFINE_DIR} && cd ${OPENREFINE_DIR} && \ 159 | curl -L 'https://github.com/OpenRefine/OpenRefine/releases/download/3.9.0/openrefine-linux-3.9.0.tar.gz' | tar xzf - --strip=1 160 | 161 | USER root 162 | RUN apt-get install --yes unzip 163 | 164 | USER ${NB_USER} 165 | ENV REFINE_DIR /home/paws 166 | RUN pip install --no-cache-dir nb_serverproxy_openrefine 167 | 168 | USER root 169 | RUN wget https://github.com/OpenRefine/CommonsExtension/releases/download/v0.1.3/openrefine-commons-extension-0.1.3.zip -O /tmp/openrefine-commonsextension.zip 170 | RUN unzip /tmp/openrefine-commonsextension.zip -d ${OPENREFINE_DIR}/webapp/extensions/ 171 | 172 | ## Done setting up OpenRefine 173 | 174 | ## install quarto 175 | 176 | USER root 177 | RUN wget https://github.com/quarto-dev/quarto-cli/releases/download/v1.4.554/quarto-1.4.554-linux-amd64.deb -O /tmp/quarto.deb 178 | RUN dpkg -i /tmp/quarto.deb 179 | 180 | ## done install quarto 181 | 182 | USER root 183 | 184 | RUN npm install -g wikibase-cli 185 | 186 | # Machine-learning type stuff 187 | RUN apt-get update && \ 188 | apt-get install --yes \ 189 | # For scipy & friends 190 | libblas-dev \ 191 | liblapack-dev \ 192 | libquadmath0 \ 193 | gfortran \ 194 | # for lxml 195 | libxml2-dev \ 196 | libxslt1-dev \ 197 | # for matplotlib 198 | libfreetype6-dev \ 199 | libpng-dev \ 200 | # for ipython kernels 201 | libzmq3-dev \ 202 | libreadline-dev \ 203 | # For PDFs and stuff 204 | pandoc \ 205 | texlive-xetex 206 | 207 | 208 | ## Install Julia 209 | # Install Julia directories and depot path 210 | 211 | ENV PATH=$PATH:/srv/julia/bin 212 | ENV JULIA_DEPOT_PATH /srv/julia-depot/ 213 | RUN install -d -o ${NB_USER} -g ${NB_USER} /srv/julia 214 | RUN install -d -o ${NB_USER} -g ${NB_USER} ${JULIA_DEPOT_PATH} 215 | 216 | USER ${NB_USER} 217 | 218 | # install julia and julia kernel 219 | COPY install-julia /tmp/install-julia 220 | RUN /tmp/install-julia 221 | 222 | ## Done Installing Julia 223 | 224 | # install sql access tool 225 | COPY install-sql-tool /tmp/install-sql-tool 226 | RUN /tmp/install-sql-tool 227 | 228 | ## Done installing sql access tool 229 | 230 | # Install the bash kernel 231 | RUN pip install bash_kernel 232 | RUN python -m bash_kernel.install --sys-prefix 233 | 234 | # Install mass amount of python libraries! 235 | COPY --chown=tools.paws:tools.paws requirements.txt /tmp/requirements.txt 236 | 237 | RUN pip --no-cache-dir install -r /tmp/requirements.txt 238 | 239 | # Install pywikibot 240 | RUN git clone \ 241 | --branch $PYWIKIBOT_VERSION \ 242 | --depth 1 \ 243 | --recurse-submodules \ 244 | --shallow-submodules \ 245 | https://gerrit.wikimedia.org/r/pywikibot/core.git \ 246 | /srv/paws/pwb 247 | RUN pip install --no-cache-dir \ 248 | /srv/paws/pwb[eventstreams,graphviz,google,isbn,memento,mysql,mwoauth,html] 249 | COPY --chown=tools.paws:tools.paws user-config.py /srv/paws/ 250 | COPY --chown=tools.paws:tools.paws user-fixes.py /srv/paws/ 251 | 252 | 253 | COPY install-extensions /usr/local/bin/ 254 | RUN /usr/local/bin/install-extensions 255 | 256 | COPY banner /etc/bash.bashrc 257 | 258 | # use custom css to hide clusters tab 259 | COPY --chown=tools.paws:tools.paws hide_clusters_tab.css /home/paws/.jupyter/custom/custom.css 260 | 261 | EXPOSE 8888 262 | -------------------------------------------------------------------------------- /images/singleuser/banner: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Make history more useful and permanent! 4 | # Stolen from http://mywiki.wooledge.org/BashFAQ/088 5 | shopt -s histappend 6 | HISTFILESIZE=400000000 7 | HISTSIZE=10000 8 | PROMPT_COMMAND="history -a" 9 | 10 | # PS1 11 | PS1='${JPY_USER}@PAWS:\w$ ' 12 | 13 | echo "Welcome to PAWS!" 14 | echo "Please behave responsibly" 15 | echo "Getting Started: https://www.mediawiki.org/wiki/Manual:Pywikibot/PAWS" 16 | echo "Questions? Need help? Find us on #wikimedia-cloud on IRC on libera.chat!" 17 | echo "File bugs at https://phabricator.wikimedia.org/maniphest/task/create/?projects=PAWS" 18 | -------------------------------------------------------------------------------- /images/singleuser/hide_clusters_tab.css: -------------------------------------------------------------------------------- 1 | /* 2 | * Placeholder for custom user CSS 3 | * mainly to be overriden in profile/static/custom/custom.css 4 | * This will always be an empty file in IPython 5 | */ 6 | 7 | /* Hide clusters tab */ 8 | .nav > li > .clusters_tab_link { 9 | display: none; 10 | } 11 | -------------------------------------------------------------------------------- /images/singleuser/install-extensions: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #set -euo pipefail 3 | # we're getting closer to the end of life for setup.py pipefail now fails on it. 4 | # To keep labpawspublic working we should update to use wheel 5 | 6 | pip install --no-cache-dir \ 7 | pythreejs \ 8 | ipyleaflet \ 9 | bqplot \ 10 | RISE \ 11 | py-heat-magic \ 12 | jupyter-resource-usage \ 13 | git+https://github.com/toolforge/labpawspublic@10ba684789ff5b6e0e2e2122ce68be101266acb8 \ 14 | # git+https://github.com/toolforge/paws-favicon@v1.0 \ 15 | 16 | # Rebuild JupyterLab for plotly-dash extension 17 | # Should be fixed by https://github.com/plotly/jupyter-dash/issues/49 18 | jupyter lab build 19 | 20 | pip install --no-cache-dir git+https://github.com/toolforge/ipynb-paws@147bbeb4dda35e 21 | -------------------------------------------------------------------------------- /images/singleuser/install-julia: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # from: https://github.com/berkeley-dsep-infra/datahub/blob/staging/deployments/julia/image/install-julia.bash 4 | set -euo pipefail 5 | 6 | export JULIA_PATH=/srv/julia/ 7 | export JUPYTER_DATA_DIR=${VENV_DIR}/share/jupyter/ 8 | 9 | curl --silent --location --fail https://julialang-s3.julialang.org/bin/linux/x64/1.10/julia-1.10.2-linux-x86_64.tar.gz | tar xvz -C ${JULIA_PATH} --strip-components=1 10 | julia -e 'using Pkg; Pkg.add("IJulia"); Pkg.build("IJulia"); using IJulia; installkernel("Julia");' 11 | -------------------------------------------------------------------------------- /images/singleuser/install-sql-tool: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | curl https://gerrit.wikimedia.org/r/plugins/gitiles/labs/toollabs/+/refs/heads/master/misctools/sql?format=TEXT | base64 -d > /srv/paws/bin/sql 4 | chmod 755 /srv/paws/bin/sql 5 | -------------------------------------------------------------------------------- /images/singleuser/r/Rprofile.site: -------------------------------------------------------------------------------- 1 | # Use RStudio's CRAN mirror to get binary packages 2 | # 'latest' just means it has all available versions. 3 | # We can specify version numbers in devtools::install_version 4 | options(repos = c(CRAN = "https://packagemanager.rstudio.com/all/__linux__/noble/latest")) 5 | 6 | # RStudio's CRAN mirror needs this to figure out which binary package to serve. 7 | # If not set properly, it will just serve up source packages 8 | # Quite hilarious, IMO. 9 | # See https://docs.rstudio.com/rspm/admin/binaries.html 10 | options(HTTPUserAgent = sprintf("R/%s R (%s)", getRversion(), paste(getRversion(), R.version$platform, R.version$arch, R.version$os))) 11 | -------------------------------------------------------------------------------- /images/singleuser/r/cran.gpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/toolforge/paws/9c42a38a368b3ad460fd122e60851c67335c3ed2/images/singleuser/r/cran.gpg -------------------------------------------------------------------------------- /images/singleuser/r/cran.list: -------------------------------------------------------------------------------- 1 | deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/cran.gpg] https://cloud.r-project.org/bin/linux/ubuntu noble-cran40/ 2 | -------------------------------------------------------------------------------- /images/singleuser/r/rsession.conf: -------------------------------------------------------------------------------- 1 | # Use binary packages! 2 | r-cran-repos=https://packagemanager.rstudio.com/all/__linux__/noble/latest 3 | -------------------------------------------------------------------------------- /images/singleuser/requirements.txt: -------------------------------------------------------------------------------- 1 | # Mediawiki related stuff 2 | mwapi 3 | mwdb 4 | mwxml 5 | mwreverts 6 | mwsessions 7 | mwdiffs 8 | mwoauth 9 | mwtypes 10 | mwpersistence 11 | mwparserfromhell 12 | git+https://github.com/yuvipanda/python-wdqs.git 13 | 14 | # visualization libraries 15 | ipywidgets 16 | bokeh 17 | matplotlib 18 | seaborn 19 | ipyleaflet 20 | pythreejs 21 | bqplot 22 | plotly 23 | holoviews[recommended] 24 | streamlit 25 | 26 | # scientific stuff 27 | pandas 28 | scipy 29 | scikit-learn 30 | 31 | # Scraping 32 | lxml 33 | beautifulsoup4 34 | 35 | # SQL! 36 | pymysql 37 | mycli 38 | wmpaws 39 | 40 | # Web dev stuff 41 | flask 42 | fastapi 43 | 44 | # mwpersistence has a dep which pulls in PyYAML 5.4.1, which has 45 | # packaging issues with Cython 3.0.0 (also present in PyYAML 6.0.0). 46 | # Pin a higher version. 47 | pyyaml>6.0.0 48 | -------------------------------------------------------------------------------- /images/singleuser/user-config.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | mylang = "test" 4 | family = "wikipedia" 5 | 6 | custom_path = os.path.expanduser("~/user-config.py") 7 | if os.path.exists(custom_path): 8 | with open(custom_path, "rb") as f: 9 | exec(compile(f.read(), custom_path, "exec"), globals()) 10 | 11 | del f 12 | # Clean up temp variables, since pwb issues a warning otherwise 13 | # to help people catch misspelt config 14 | del custom_path 15 | 16 | # Things that should be non-easily-overridable 17 | for fam in ( 18 | "wikipedia", 19 | "commons", 20 | "meta", 21 | "wikibooks", 22 | "wikimedia", 23 | "wikiquote", 24 | "wikisource", 25 | "wikisource", 26 | "wiktionary", 27 | "wikiversity", 28 | "wikidata", 29 | "mediawiki", 30 | "wikivoyage", 31 | "wikinews", 32 | "species", 33 | "wikifunctions", 34 | "wikitech", 35 | ): 36 | usernames[fam]["*"] = os.environ["USER"] # noqa: F821 37 | 38 | del fam 39 | 40 | # If OAuth integration is available, take it 41 | if "CLIENT_ID" in os.environ: 42 | authenticate["*"] = ( # noqa: F821 43 | os.environ["CLIENT_ID"], 44 | os.environ["CLIENT_SECRET"], 45 | os.environ["ACCESS_KEY"], 46 | os.environ["ACCESS_SECRET"], 47 | ) 48 | -------------------------------------------------------------------------------- /images/singleuser/user-fixes.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | custom_path = os.path.expanduser("~/user-fixes.py") 4 | if os.path.exists(custom_path): 5 | with open(custom_path, "rb") as f: 6 | exec(compile(f.read(), custom_path, "exec"), globals()) 7 | 8 | del f 9 | 10 | # Clean up temp variables, since pwb issues a warning otherwise 11 | # to help people catch misspelt config 12 | del custom_path 13 | -------------------------------------------------------------------------------- /paws/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *~ 18 | # Various IDEs 19 | .project 20 | .idea/ 21 | *.tmproj 22 | -------------------------------------------------------------------------------- /paws/Chart.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | apiVersion: v2 4 | description: A Helm chart for PAWS 5 | name: paws 6 | version: 3.0.0 7 | dependencies: 8 | - name: jupyterhub 9 | version: 3.3.8 10 | repository: "https://jupyterhub.github.io/helm-chart" 11 | -------------------------------------------------------------------------------- /paws/codfw-secrets.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/toolforge/paws/9c42a38a368b3ad460fd122e60851c67335c3ed2/paws/codfw-secrets.yaml -------------------------------------------------------------------------------- /paws/codfw.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | mysql: 4 | domain: analytics.db.svc.eqiad.wmflabs 5 | # TODO: remove this when the multiinstance replica proxy is removed 6 | host: enwiki.analytics.db.svc.eqiad.wmflabs 7 | jupyterhub: 8 | hub: 9 | extraVolumes: 10 | - name: homes 11 | nfs: 12 | server: pawsdev-nfs.pawsdev.codfw1dev.wikimedia.cloud 13 | path: /srv/paws/project 14 | - name: dumps 15 | nfs: 16 | server: pawsdev-nfs.pawsdev.codfw1dev.wikimedia.cloud 17 | path: / 18 | # Without this, dumps becomes inaccessible and can hang the host 19 | - name: dumps-src1 20 | nfs: 21 | server: pawsdev-nfs.pawsdev.codfw1dev.wikimedia.cloud 22 | path: / 23 | - name: dumps-src2 24 | nfs: 25 | server: pawsdev-nfs.pawsdev.codfw1dev.wikimedia.cloud 26 | path: / 27 | extraConfig: 28 | 00-myConfig: | 29 | localdev = False 30 | nfs_home = 'pawsdev-nfs.pawsdev.codfw1dev.wikimedia.cloud' 31 | dumps_src1 = 'pawsdev-nfs.pawsdev.codfw1dev.wikimedia.cloud' 32 | dumps_src2 = 'pawsdev-nfs.pawsdev.codfw1dev.wikimedia.cloud' 33 | ingress: 34 | enabled: true 35 | hosts: 36 | - hub-paws-dev.codfw1dev.wmcloud.org 37 | singleuser: 38 | extraEnv: 39 | HUB_DOMAIN: "hub-paws-dev.codfw1dev.wmcloud.org" # Check jupyterhub.ingress.hosts 40 | minesweeper: 41 | enabled: true 42 | localdev: 43 | enabled: false 44 | pawspublic: 45 | nbserve: 46 | requests: 47 | memory: "200Mi" 48 | ingress: 49 | host: public-paws-dev.codfw1dev.wmcloud.org 50 | renderer: 51 | requests: 52 | cpu: "50m" 53 | memory: "10Mi" 54 | -------------------------------------------------------------------------------- /paws/files/minesweeper/minesweeper.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | minesweeper script 4 | 5 | Continuous process, on each node via DaemonSet, 6 | to identify processes that could be considered for termination: 7 | 8 | - determine which processes are "suspicious" (see herorat.py) 9 | - produce report on suspicious pods: 10 | - show running processes (`ps aux`) 11 | - tail pod logs 12 | - automatically terminate pods likely to be abuse, etc. 13 | 14 | Downloaded originally from https://raw.githubusercontent.com/jupyterhub/mybinder.org-deploy/6364f6e2ad3fefd8fa00937749f456528489ec76/mybinder/files/minesweeper/minesweeper.py 15 | 16 | """ 17 | 18 | import asyncio 19 | import copy 20 | import glob 21 | import json 22 | import os 23 | import pprint 24 | import re 25 | import socket 26 | import sys 27 | import threading 28 | from concurrent.futures import ThreadPoolExecutor 29 | from functools import partial 30 | from operator import attrgetter 31 | from textwrap import indent 32 | 33 | import kubernetes.client 34 | import kubernetes.config 35 | from kubernetes.stream import stream 36 | 37 | import psutil 38 | 39 | # herorat located in secrets/minesweeper/ 40 | import herorat 41 | from herorat import inspect_pod 42 | from herorat import inspect_process 43 | 44 | 45 | kubernetes.config.load_incluster_config() 46 | kube = kubernetes.client.CoreV1Api() 47 | local = threading.local() 48 | config = {} 49 | hostname = os.environ.get("NODE_NAME", socket.gethostname()) 50 | 51 | default_config = { 52 | "userid": 1000, 53 | "inspect_procs_without_pod": False, 54 | "threads": 8, 55 | "interval": 300, 56 | "namespace": os.environ.get("NAMESPACE", "default"), 57 | "pod_selectors": { 58 | "label_selector": "component=singleuser-server", 59 | "field_selector": f"spec.nodeName={hostname}", 60 | }, 61 | "log_tail_lines": 100, 62 | # process attributes to retrieve 63 | # see psutil.as_dict docs for available fields: 64 | # https://psutil.readthedocs.io/en/latest/#psutil.Process.as_dict 65 | "proc_attrs": [ 66 | "cmdline", 67 | "cpu_percent", 68 | "cpu_times", 69 | "exe", 70 | "memory_info", 71 | "name", 72 | "pid", 73 | "ppid", 74 | "status", 75 | "uids", 76 | ], 77 | } 78 | 79 | default_config.update(herorat.default_config) 80 | 81 | 82 | def get_kube(): 83 | """Get thread-local kubernetes client 84 | 85 | kubernetes client objects aren't threadsafe, I guess 86 | """ 87 | if not hasattr(local, "kube"): 88 | local.kube = kubernetes.client.CoreV1Api() 89 | return local.kube 90 | 91 | 92 | class Proc(dict): 93 | """Proc is a dict subclass with attribute-access for keys 94 | 95 | suspicious and should_terminate are added via inspection. 96 | They can be booleans or truthy strings explaining 97 | why they are suspicious or should be terminated. 98 | """ 99 | 100 | def __init__(self, **kwargs): 101 | kwargs.setdefault("suspicious", False) 102 | kwargs.setdefault("should_terminate", False) 103 | super().__init__(**kwargs) 104 | 105 | # secondary derived fields 106 | # cmd is the command-line as a single string 107 | self["cmd"] = " ".join(self["cmdline"]) 108 | # cpu_total is the sum of cpu times (user, system, children, etc.) 109 | self["cpu_total"] = sum(kwargs.get("cpu_times", [])) 110 | 111 | def __repr__(self): 112 | key_fields = ", ".join( 113 | [ 114 | f"{key}={self.get(key)}" 115 | for key in [ 116 | "pid", 117 | "status", 118 | "suspicious", 119 | "should_terminate", 120 | "cmd", 121 | ] 122 | if self.get(key) is not None 123 | ] 124 | ) 125 | return f"{self.__class__.__name__}({key_fields})" 126 | 127 | def __getattr__(self, key): 128 | return self[key] 129 | 130 | def __setattr__(self, key, value): 131 | self[key] = value 132 | 133 | 134 | def get_procs(userid): 135 | """Get all container processes running with a given user id""" 136 | procs = [] 137 | for p in psutil.process_iter(attrs=config["proc_attrs"]): 138 | # TODO: should we filter to userid? 139 | # For now: skip userid filtering, because we 140 | # want to catch all processes in pods, even if they 141 | # ran setuid 142 | # if p.info["uids"].real != userid: 143 | # continue 144 | if not p.info["cmdline"]: 145 | # ignore empty commands, e.g. kernel processes 146 | continue 147 | 148 | proc = Proc(**p.info) 149 | procs.append(proc) 150 | 151 | procs = sorted(procs, key=attrgetter("cpu_percent"), reverse=True) 152 | return procs 153 | 154 | 155 | def get_pods(): 156 | """Get all the pods in our namespace""" 157 | kube = get_kube() 158 | namespace = config["namespace"] 159 | # _preload_content=False doesn't even json-parse list results?? 160 | resp = kube.list_namespaced_pod( 161 | namespace, 162 | _preload_content=False, 163 | **config["pod_selectors"], 164 | ) 165 | return json.loads(resp.read().decode("utf8"))["items"] 166 | 167 | 168 | def pods_by_uid(pods): 169 | """Construct a dict of pods, keyed by pod uid""" 170 | return {pod["metadata"]["uid"]: pod for pod in pods} 171 | 172 | 173 | def get_all_pod_uids(): 174 | """Return mapping of pid to pod uid""" 175 | 176 | pod_uids = {} 177 | for cgroup_file in glob.glob("/proc/[0-9]*/cgroup"): 178 | pid = int(cgroup_file.split("/")[-2]) 179 | 180 | try: 181 | with open(cgroup_file) as f: 182 | cgroups = f.read() 183 | 184 | except FileNotFoundError: 185 | # process deleted, ignore 186 | continue 187 | 188 | m = re.search("/pod([^/]+)", cgroups) 189 | if m is None: 190 | # not a pod proc 191 | continue 192 | pod_uids[pid] = m.group(1) 193 | return pod_uids 194 | 195 | 196 | def associate_pods_procs(pods, procs): 197 | """Associate pods and processes 198 | For all pods, defines pod["minesweeper"]["procs"] = list_of_procs_in_pod 199 | 200 | Returns (pods, procs_without_pods) 201 | """ 202 | for pod in pods.values(): 203 | pod["minesweeper"] = { 204 | "procs": [], 205 | } 206 | procs_without_pods = [] 207 | pod_uids = get_all_pod_uids() 208 | for proc in procs: 209 | pod_uid = pod_uids.get(proc.pid) 210 | pod = pods.get(pod_uid) 211 | if not pod: 212 | procs_without_pods.append(proc) 213 | else: 214 | pod["minesweeper"]["procs"].append(proc) 215 | 216 | return pods, procs_without_pods 217 | 218 | 219 | def ps_pod(pod, userid=1000): 220 | """Get ps output from a single pod""" 221 | kube = get_kube() 222 | try: 223 | client = stream( 224 | kube.connect_get_namespaced_pod_exec, 225 | pod["metadata"]["name"], 226 | namespace=pod["metadata"]["namespace"], 227 | command=["ps", "aux"], 228 | stderr=True, 229 | stdin=False, 230 | stdout=True, 231 | _preload_content=False, 232 | ) 233 | client.run_forever(timeout=60) 234 | stderr = client.read_stderr() 235 | if stderr.strip(): 236 | print(f"err! {stderr}", file=sys.stderr) 237 | stdout = client.read_stdout() 238 | 239 | returncode = client.returncode 240 | if returncode: 241 | raise RuntimeError(f"stdout={stdout}\nstderr={stderr}") 242 | return stdout 243 | except Exception as e: 244 | return f"Error reporting on ps in {pod['metadata']['name']}: {e}" 245 | 246 | 247 | def log_pod(pod): 248 | """Return the logs for a suspicious pod""" 249 | kube = get_kube() 250 | try: 251 | return kube.read_namespaced_pod_log( 252 | pod["metadata"]["name"], 253 | namespace=pod["metadata"]["namespace"], 254 | tail_lines=config["log_tail_lines"], 255 | ) 256 | except Exception as e: 257 | return f"Error collecting logs for {pod['metadata']['name']}: {e}" 258 | 259 | 260 | async def report_pod(pod): 261 | """Produce a report on a single pod""" 262 | pod_name = pod["metadata"]["name"] 263 | ps_future = in_pool(lambda: ps_pod(pod)) 264 | logs_future = in_pool(lambda: log_pod(pod)) 265 | ps, logs = await asyncio.gather(ps_future, logs_future) 266 | print( 267 | "\n".join( 268 | [ 269 | pod_name, 270 | f"ps {pod_name}:", 271 | indent(ps, " "), 272 | f"logs {pod_name}:", 273 | indent(logs, " "), 274 | ] 275 | ) 276 | ) 277 | 278 | 279 | def terminate_pod(pod): 280 | """Call in a thread to terminate a pod""" 281 | namespace = pod["metadata"]["namespace"] 282 | name = pod["metadata"]["name"] 283 | print(f"Deleting pod {name}") 284 | kube = get_kube() 285 | kube.delete_namespaced_pod(name=name, namespace=namespace) 286 | 287 | 288 | async def node_report(pods=None, userid=1000): 289 | """Print a report of suspicious processes on a single node""" 290 | if pods is None: 291 | pods = pods_by_uid(await in_pool(get_pods)) 292 | procs = await in_pool(lambda: get_procs(userid)) 293 | print(f"Total processes for {hostname}: {len(procs)}\n", end="") 294 | pods, procs_without_pod = associate_pods_procs(pods, procs) 295 | 296 | # inspect all procs in our pods 297 | user_procs = [] 298 | for pod in pods.values(): 299 | user_procs.extend(pod["minesweeper"]["procs"]) 300 | pod["minesweeper"]["procs"] = [ 301 | inspect_process(p) for p in pod["minesweeper"]["procs"] 302 | ] 303 | print(f"Total user pods for {hostname}: {len(pods)}\n", end="") 304 | print(f"Total user processes for {hostname}: {len(user_procs)}\n", end="") 305 | suspicious_pods = [pod for pod in pods.values() if inspect_pod(pod)["suspicious"]] 306 | 307 | print(f"Pods of interest for {hostname}: {len(suspicious_pods)}") 308 | 309 | # report on all suspicious pods 310 | report_futures = [] 311 | for pod in suspicious_pods: 312 | fut = asyncio.ensure_future(report_pod(pod)) 313 | report_futures.append(fut) 314 | await asyncio.sleep(0) 315 | 316 | # report on suspicious processes with no matching pod 317 | suspicious_procs_without_pod = [] 318 | if config["inspect_procs_without_pod"]: 319 | procs_without_pod = [inspect_process(p) for p in procs_without_pod] 320 | suspicious_procs_without_pod = [p for p in procs_without_pod if p.suspicious] 321 | 322 | if suspicious_procs_without_pod: 323 | print( 324 | f"No pods found for {len(suspicious_procs_without_pod)} suspicious processes on {hostname}:" 325 | ) 326 | for proc in suspicious_procs_without_pod: 327 | print(f" {proc.pid}: {proc.cmd}") 328 | 329 | if report_futures: 330 | await asyncio.gather(*report_futures) 331 | 332 | # finally, terminate pods that meet the immediate termination condition 333 | pods_to_terminate = [ 334 | pod for pod in suspicious_pods if pod["minesweeper"]["should_terminate"] 335 | ] 336 | if pods_to_terminate: 337 | terminate_futures = [ 338 | in_pool(partial(terminate_pod, pod)) for pod in pods_to_terminate 339 | ] 340 | await asyncio.gather(*terminate_futures) 341 | 342 | 343 | def get_pool(n=None): 344 | """Get the global thread pool executor""" 345 | if get_pool._pool is None: 346 | get_pool._pool = ThreadPoolExecutor(config["threads"]) 347 | return get_pool._pool 348 | 349 | 350 | get_pool._pool = None 351 | 352 | 353 | def in_pool(func): 354 | f = get_pool().submit(func) 355 | return asyncio.wrap_future(f) 356 | 357 | 358 | def load_config(): 359 | """load config from mounted config map 360 | 361 | may change during run, so reload from file each time 362 | """ 363 | global config 364 | prior_config = copy.deepcopy(config) 365 | config.update(default_config) 366 | config_file = "/etc/minesweeper/minesweeper.json" 367 | if os.path.isfile(config_file): 368 | with open(config_file) as f: 369 | file_config = json.load(f) 370 | config.update(file_config) 371 | # sync global config with herorat 372 | herorat.config = config 373 | else: 374 | print(f"No such file: {config_file}") 375 | 376 | if config != prior_config: 377 | print("Loaded config:") 378 | pprint.pprint(config) 379 | 380 | return config 381 | 382 | 383 | async def main(): 384 | """Main entrypoint: run node_report periodically forever""" 385 | while True: 386 | # reload since configmap can change 387 | load_config() 388 | await node_report(userid=config["userid"]) 389 | await asyncio.sleep(config["interval"]) 390 | 391 | 392 | if __name__ == "__main__": 393 | asyncio.run(main()) 394 | -------------------------------------------------------------------------------- /paws/files/minesweeper/secrets/ban.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/toolforge/paws/9c42a38a368b3ad460fd122e60851c67335c3ed2/paws/files/minesweeper/secrets/ban.py -------------------------------------------------------------------------------- /paws/files/minesweeper/secrets/herorat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/toolforge/paws/9c42a38a368b3ad460fd122e60851c67335c3ed2/paws/files/minesweeper/secrets/herorat.py -------------------------------------------------------------------------------- /paws/files/minesweeper/secrets/minesweeper.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/toolforge/paws/9c42a38a368b3ad460fd122e60851c67335c3ed2/paws/files/minesweeper/secrets/minesweeper.yaml -------------------------------------------------------------------------------- /paws/production.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | mysql: 4 | domain: analytics.db.svc.eqiad.wmflabs 5 | # TODO: remove this when the multiinstance replica proxy is removed 6 | host: enwiki.analytics.db.svc.eqiad.wmflabs 7 | jupyterhub: 8 | hub: 9 | extraVolumes: 10 | - name: homes 11 | nfs: 12 | server: paws-nfs.svc.paws.eqiad1.wikimedia.cloud 13 | path: /srv/paws/project 14 | - name: dumps 15 | nfs: 16 | server: clouddumps1002.wikimedia.org 17 | path: / 18 | # Without this, dumps becomes inaccessible and can hang the host 19 | - name: dumps-src1 20 | nfs: 21 | server: clouddumps1001.wikimedia.org 22 | path: / 23 | - name: dumps-src2 24 | nfs: 25 | server: clouddumps1002.wikimedia.org 26 | path: / 27 | extraConfig: 28 | 00-myConfig: | 29 | localdev = False 30 | nfs_home = 'paws-nfs.svc.paws.eqiad1.wikimedia.cloud' 31 | dumps_src1 = 'clouddumps1001.wikimedia.org' 32 | dumps_src2 = 'clouddumps1002.wikimedia.org' 33 | ingress: 34 | enabled: true 35 | hosts: 36 | - hub-paws.wmcloud.org 37 | singleuser: 38 | extraEnv: 39 | HUB_DOMAIN: "hub-paws.wmcloud.org" # Check jupyterhub.ingress.hosts 40 | minesweeper: 41 | enabled: true 42 | localdev: 43 | enabled: false 44 | pawspublic: 45 | nbserve: 46 | requests: 47 | memory: "2000Mi" 48 | ingress: 49 | host: public-paws.wmcloud.org 50 | renderer: 51 | requests: 52 | cpu: "500m" 53 | memory: "1000Mi" 54 | -------------------------------------------------------------------------------- /paws/secrets.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/toolforge/paws/9c42a38a368b3ad460fd122e60851c67335c3ed2/paws/secrets.yaml -------------------------------------------------------------------------------- /paws/templates/NOTES.txt: -------------------------------------------------------------------------------- 1 | Thank you for installing {{ .Chart.Name }}! 2 | 3 | An essential part of setting up your dev environment is adding a hosts file entry. 4 | Get the IP of minikube with: 5 | minikube ip 6 | Add these lines to your hosts file: 7 | {{ index .Values.jupyterhub.ingress.hosts 0 }} 8 | {{ .Values.pawspublic.ingress.host }} 9 | 10 | You can override any values you need in a file called dev-values.yaml. 11 | 12 | If you create one, run: 13 | helm -n {{ .Release.Namespace }} upgrade {{ .Release.Name }} paws/ -f dev-values.yaml 14 | Happy hacking. 15 | -------------------------------------------------------------------------------- /paws/templates/frontpage.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | {{ if .Values.paws.frontPageEnabled }} 4 | apiVersion: networking.k8s.io/v1 5 | kind: Ingress 6 | metadata: 7 | labels: 8 | name: paws-front-page 9 | ingress.paws.wmcloud.org: front 10 | annotations: 11 | nginx.ingress.kubernetes.io/permanent-redirect: {{ .Values.paws.ingress.frontRedirect | quote }} 12 | name: paws-front-page 13 | spec: 14 | ingressClassName: nginx 15 | rules: 16 | - host: {{ .Values.paws.ingress.frontHost | quote }} 17 | {{ end }} 18 | -------------------------------------------------------------------------------- /paws/templates/localdev.yaml: -------------------------------------------------------------------------------- 1 | {{ if .Values.localdev.enabled }} 2 | # We assume this is only enabled in a local dev environment, so... 3 | --- 4 | apiVersion: v1 5 | kind: PersistentVolume 6 | metadata: 7 | name: userhomes 8 | spec: 9 | accessModes: 10 | - ReadWriteMany 11 | capacity: 12 | storage: 1Gi 13 | hostPath: 14 | path: /srv/paws/project/paws/userhomes 15 | --- 16 | apiVersion: v1 17 | kind: PersistentVolume 18 | metadata: 19 | name: dumps1 20 | spec: 21 | accessModes: 22 | - ReadOnlyMany 23 | capacity: 24 | storage: 1Gi 25 | hostPath: 26 | path: /mnt/nfs/dumps-clouddumps1001.wikimedia.org 27 | --- 28 | kind: PersistentVolume 29 | apiVersion: v1 30 | metadata: 31 | name: dumps2 32 | spec: 33 | accessModes: 34 | - ReadOnlyMany 35 | capacity: 36 | storage: 1Gi 37 | hostPath: 38 | path: /mnt/nfs/dumps-clouddumps1002.wikimedia.org 39 | --- 40 | kind: PersistentVolume 41 | apiVersion: v1 42 | metadata: 43 | name: dumps 44 | spec: 45 | accessModes: 46 | - ReadOnlyMany 47 | capacity: 48 | storage: 1Gi 49 | hostPath: 50 | path: /mnt/public/dumps 51 | --- 52 | apiVersion: batch/v1 53 | kind: Job 54 | metadata: 55 | name: scary-host-vol-perm-fix 56 | labels: 57 | app.kubernetes.io/managed-by: {{ .Release.Service | quote }} 58 | app.kubernetes.io/instance: {{ .Release.Name | quote }} 59 | annotations: 60 | "helm.sh/hook": post-install 61 | "helm.sh/hook-weight": "-1" 62 | "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded 63 | spec: 64 | template: 65 | metadata: 66 | name: {{ printf "permission-fix-%s" .Release.Name | quote }} 67 | labels: 68 | app.kubernetes.io/managed-by: {{ .Release.Service | quote }} 69 | app.kubernetes.io/instance: {{ .Release.Name | quote }} 70 | helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" 71 | spec: 72 | restartPolicy: Never 73 | containers: 74 | - name: fixingperms-host-vols 75 | image: {{ tpl .Values.localdev.image.template . | quote }} 76 | command: 77 | - chown 78 | - -R 79 | - 52771:52771 80 | - /data/project/paws/userhomes 81 | volumeMounts: 82 | - mountPath: /data/project/paws/userhomes 83 | name: security-disaster-only-for-minikube 84 | volumes: 85 | - name: security-disaster-only-for-minikube 86 | hostPath: 87 | path: /srv/paws/project/paws/userhomes 88 | {{ end }} 89 | -------------------------------------------------------------------------------- /paws/templates/minesweeper/configmap.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | {{- /* configmap for minesweeper source files */}} 4 | kind: ConfigMap 5 | apiVersion: v1 6 | metadata: 7 | name: minesweeper-src 8 | labels: 9 | app: minesweeper 10 | component: minesweeper-src 11 | heritage: {{ .Release.Service | quote }} 12 | release: {{ .Release.Name | quote }} 13 | data: 14 | {{- (.Files.Glob "files/minesweeper/*").AsConfig | nindent 2 }} 15 | {{- (.Files.Glob "files/minesweeper/secrets/*").AsConfig | nindent 2 }} 16 | {{- /* configmap for minesweeper configuration from values */}} 17 | --- 18 | apiVersion: v1 19 | kind: ConfigMap 20 | metadata: 21 | name: minesweeper-config 22 | labels: 23 | app: minesweeper 24 | component: minesweeper-config 25 | heritage: {{ .Release.Service }} 26 | release: {{ .Release.Name }} 27 | data: 28 | "minesweeper.json": {{ toJson .Values.minesweeper | quote }} 29 | -------------------------------------------------------------------------------- /paws/templates/minesweeper/daemonset.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | {{- if .Values.minesweeper.enabled -}} 4 | apiVersion: apps/v1 5 | kind: DaemonSet 6 | metadata: 7 | name: minesweeper 8 | spec: 9 | selector: 10 | matchLabels: 11 | name: minesweeper 12 | release: {{ .Release.Name }} 13 | template: 14 | metadata: 15 | labels: 16 | name: minesweeper 17 | app: binder 18 | component: minesweeper 19 | release: {{ .Release.Name }} 20 | heritage: {{ .Release.Service }} 21 | annotations: 22 | checksum/configmap: {{ include (print $.Template.BasePath "/minesweeper/configmap.yaml") . | sha256sum }} 23 | 24 | spec: 25 | hostPID: true 26 | securityContext: 27 | # run as same uid as user pods to limit privileges 28 | # we may need to run as root in the future 29 | # to access certain fields. 30 | # we could limit further with runAsUser: nobody 31 | # but that takes away some fields we use 32 | runAsUser: {{ .Values.jupyterhub.singleuser.uid }} 33 | # root group gets us read-only access to what we want for now 34 | runAsGroup: 0 35 | tolerations: 36 | - effect: NoSchedule 37 | key: hub.jupyter.org/dedicated 38 | operator: Equal 39 | value: user 40 | - effect: NoSchedule 41 | key: hub.jupyter.org_dedicated 42 | operator: Equal 43 | value: user 44 | serviceAccountName: minesweeper 45 | 46 | {{- with .Values.imagePullSecrets }} 47 | imagePullSecrets: 48 | {{- . | toYaml | nindent 8 }} 49 | {{- end }} 50 | 51 | containers: 52 | - name: minesweeper 53 | image: {{ tpl .Values.minesweeper.image.template . | quote}} 54 | securityContext: 55 | # we are running with hostPID, but want extremely limited capabilities 56 | capabilities: 57 | drop: 58 | - all 59 | {{- with .Values.minesweeper.resources }} 60 | resources: {{ toJson . }} 61 | {{- end }} 62 | volumeMounts: 63 | - name: config 64 | mountPath: /etc/minesweeper 65 | readOnly: true 66 | - name: src 67 | mountPath: /srv/minesweeper 68 | readOnly: true 69 | command: 70 | - python3 71 | - /srv/minesweeper/minesweeper.py 72 | env: 73 | - name: NODE_NAME 74 | valueFrom: 75 | fieldRef: 76 | fieldPath: spec.nodeName 77 | - name: NAMESPACE 78 | value: {{ .Release.Namespace }} 79 | terminationGracePeriodSeconds: 0 80 | volumes: 81 | - name: src 82 | configMap: 83 | name: minesweeper-src 84 | - name: config 85 | configMap: 86 | name: minesweeper-config 87 | {{- end }} 88 | -------------------------------------------------------------------------------- /paws/templates/minesweeper/rbac.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | kind: Role 3 | apiVersion: rbac.authorization.k8s.io/v1 4 | metadata: 5 | name: minesweeper 6 | rules: 7 | # needs read/exec/logs/delete access 8 | - apiGroups: [""] 9 | resources: ["pods"] 10 | verbs: ["get", "watch", "list", "create", "delete"] 11 | - apiGroups: [""] 12 | resources: ["pods/log"] 13 | verbs: ["get"] 14 | - apiGroups: [""] 15 | resources: ["pods/exec"] 16 | verbs: ["create", "get"] 17 | --- 18 | kind: RoleBinding 19 | apiVersion: rbac.authorization.k8s.io/v1 20 | metadata: 21 | name: minesweeper 22 | subjects: 23 | - kind: ServiceAccount 24 | namespace: {{ .Release.Namespace }} 25 | name: minesweeper 26 | roleRef: 27 | kind: Role 28 | name: minesweeper 29 | apiGroup: rbac.authorization.k8s.io 30 | --- 31 | apiVersion: v1 32 | kind: ServiceAccount 33 | metadata: 34 | name: minesweeper 35 | -------------------------------------------------------------------------------- /paws/templates/nbserve/nginx.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | apiVersion: v1 4 | kind: ConfigMap 5 | metadata: 6 | name: nbserve-nginx 7 | data: 8 | nginx.conf: | 9 | # Let nginx automatically determine the number of worker processes 10 | # to run. This defaults to number of cores on the host. 11 | worker_processes auto; 12 | 13 | # Do not daemonize - we'll either run this under a supervisor 14 | # ourselves, or jupyterhub will manage the process, restarting 15 | # it when it dies as necessary 16 | daemon off; 17 | 18 | # Set number of connections accepted per worker 19 | events { 20 | worker_connections 768; 21 | } 22 | 23 | # This needs to be in 'main' since otherwise nginx 24 | # will try to write to /var/log/nginx/error.log and failed 25 | # because it does not have permissions 26 | error_log stderr info; 27 | 28 | # We do not really need / care about a pidfile 29 | pid /dev/null; 30 | 31 | http { 32 | sendfile on; 33 | tcp_nopush on; 34 | tcp_nodelay on; 35 | keepalive_timeout 65; 36 | # Some complex notebooks take a long time to render 37 | proxy_read_timeout 180s; 38 | proxy_connect_timeout 180s; 39 | uwsgi_read_timeout 180s; 40 | types_hash_max_size 2048; 41 | # server_tokens off; 42 | 43 | # These are varilous temp file paths, many that we do not use. 44 | # They are by default set to /var/lib/nginx/*, which causes 45 | # problems when running as non-root, as we are here. So we 46 | # shall set them all to /tmp. FIXME: Find proper paths for 47 | # these somewhere (perhaps on current-dir?) 48 | client_body_temp_path /tmp; 49 | proxy_temp_path /tmp; 50 | fastcgi_temp_path /tmp; 51 | uwsgi_temp_path /tmp; 52 | scgi_temp_path /tmp; 53 | 54 | # access_log does not support 'stderr' directive directly 55 | access_log /dev/stderr; 56 | 57 | # nginx needs an async way to resolve hostnames to IPs, and 58 | # the default `gethostbyname` setup does not allow for this. 59 | # While ideally nginx should parse /etc/resolv.conf itself, 60 | # it does not do so at this time, and needs us to set the DNS 61 | # server explicitly. 62 | # openresty allows a "local=on" which pulls from 63 | # /etc/resolv.conf 64 | 65 | # NOTE: This causes issues when resolving localhost and 66 | # other hostnames traditionally set in /etc/hosts, since 67 | # DNS servers respond erratically to queries for them. 68 | resolver local=on ipv6=off; 69 | 70 | # This is used to support websocket proxying. We need to set 71 | # the 'Upgrade' HTTP header to either 'upgrade' (for websockets) 72 | # or 'close' (for everything else). 73 | # See https://www.nginx.com/resources/admin-guide/reverse-proxy/ 74 | # for more details. 75 | map $http_upgrade $connection_upgrade { 76 | default upgrade; 77 | '' close; 78 | } 79 | 80 | # Shared memory area for caching username to id mappings 81 | lua_shared_dict usernamemapping 16m; 82 | 83 | lua_ssl_trusted_certificate /etc/ssl/certs/ca-certificates.crt; 84 | lua_ssl_verify_depth 10; 85 | 86 | # Serve things with appropriate mimetypes 87 | include /usr/local/openresty/nginx/conf/mime.types; 88 | 89 | # This is the 'regular' server, that sees all public 90 | # traffic and proxies them to the appropriate backend server. 91 | server { 92 | listen 0.0.0.0:8000; 93 | 94 | location ~ \/\. { 95 | deny all; 96 | } 97 | 98 | location = /robots.txt { 99 | alias /var/www/robots.txt; 100 | } 101 | 102 | # No port numbes in redirects 103 | port_in_redirect off; 104 | 105 | location ~ ^/user/([^/]+)/notebooks/(.*)$ { 106 | rewrite /user/([^/]+)/notebooks/(.*)$ /User:$1/$2 permanent; 107 | } 108 | 109 | # Only after the User: redirect! Otherwise our backend can't find the file. 110 | location ~ ^/\d+/.*\.(rst|md|ipynb)$ { 111 | include /usr/local/openresty/nginx/conf/uwsgi_params; 112 | uwsgi_pass uwsgi://renderer.{{ .Release.Namespace }}.svc.cluster.local:8000; 113 | } 114 | 115 | location / { 116 | index 2987347263023847928376409; 117 | fancyindex on; 118 | 119 | alias /data/project/paws/userhomes/; 120 | } 121 | 122 | 123 | location /accelredir { 124 | internal; 125 | 126 | alias /data/project/paws/userhomes/; 127 | } 128 | 129 | # this section is what allows: 130 | # https://public.hub.paws.local/User:VRook_(WMF)/awesome.ipynb 131 | # to present whatever is at: 132 | # https://public.hub.paws.local/67158682/awesome.ipynb 133 | location /User: { 134 | rewrite_by_lua_block { 135 | local m = ngx.re.match(ngx.var.uri, "/User:([^/]+)(.*)"); 136 | if m then 137 | local userid = ngx.shared.usernamemapping:get(m[1]); 138 | if userid == nil then 139 | local http = require "resty.http"; 140 | local httpc = http.new(); 141 | local apiurl = "https://meta.wikimedia.org/w/api.php?" .. 142 | "action=query&format=json&formatversion=2" .. 143 | "&prop=&list=users&meta=&usprop=centralids" .. 144 | "&ususers=" .. ngx.escape_uri(m[1]); 145 | 146 | local res, err = httpc:request_uri(apiurl); 147 | local cjson = require "cjson"; 148 | local resp_data = cjson.decode(res.body); 149 | 150 | ngx.log(ngx.ERR, res.body); 151 | if resp_data["query"]["users"][1]["missing"] then 152 | ngx.exit(404); 153 | end 154 | userid = resp_data["query"]["users"][1]["centralids"]["CentralAuth"] 155 | 156 | ngx.shared.usernamemapping:set(m[1], userid); 157 | end 158 | ngx.req.set_uri("/" .. userid .. m[2], true, true); 159 | end 160 | } 161 | 162 | proxy_http_version 1.1; 163 | 164 | # This is required for websockets to be proxied correctly 165 | proxy_set_header Upgrade $http_upgrade; 166 | proxy_set_header Connection $connection_upgrade; 167 | 168 | # This is required for the target servers to know what 169 | # exactly the original protocol / URI / Host was. 170 | proxy_set_header X-Forwarded-Proto $scheme; 171 | proxy_set_header X-Original-URI $request_uri; 172 | proxy_set_header Host $host:$server_port; 173 | } 174 | } 175 | } 176 | -------------------------------------------------------------------------------- /paws/templates/public.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | {{ if .Values.pawsPublicEnabled }} 4 | apiVersion: v1 5 | kind: ServiceAccount 6 | metadata: 7 | name: paws-public 8 | --- 9 | apiVersion: apps/v1 10 | kind: Deployment 11 | metadata: 12 | labels: 13 | name: nbserve 14 | name: nbserve 15 | spec: 16 | replicas: {{ .Values.pawspublic.nbserve.replicas }} 17 | selector: 18 | matchLabels: 19 | name: nbserve 20 | template: 21 | metadata: 22 | creationTimestamp: null 23 | labels: 24 | name: nbserve 25 | spec: 26 | serviceAccount: paws-public 27 | containers: 28 | - image: {{ tpl .Values.pawspublic.nbserve.image.template . | quote }} 29 | imagePullPolicy: Always 30 | name: nbserve 31 | ports: 32 | - containerPort: 8000 33 | name: nbserve 34 | protocol: TCP 35 | volumeMounts: 36 | - mountPath: /data/project/paws/userhomes 37 | name: pawshomes 38 | - name: nginx-conf 39 | mountPath: /mnt 40 | resources: 41 | requests: 42 | memory: {{ .Values.pawspublic.nbserve.requests.memory }} 43 | cpu: "50m" 44 | 45 | volumes: 46 | {{ if .Values.localdev.enabled }} 47 | - hostPath: 48 | path: /srv/paws/project/paws/userhomes 49 | {{ else }} 50 | - nfs: 51 | {{- with (index .Values.jupyterhub.hub.extraVolumes 0) }} 52 | server: {{ .nfs.server }} 53 | {{- end }} 54 | path: /srv/paws/project/paws/userhomes 55 | {{ end }} 56 | name: pawshomes 57 | - name: nginx-conf 58 | configMap: 59 | name: nbserve-nginx 60 | 61 | --- 62 | 63 | apiVersion: apps/v1 64 | kind: Deployment 65 | metadata: 66 | labels: 67 | name: renderer 68 | name: renderer 69 | spec: 70 | selector: 71 | matchLabels: 72 | name: renderer 73 | template: 74 | metadata: 75 | labels: 76 | name: renderer 77 | spec: 78 | serviceAccount: paws-public 79 | containers: 80 | - env: 81 | - name: BASE_PATH 82 | value: /data/project/paws/userhomes/ 83 | - name: URL_PREFIX 84 | value: / 85 | image: {{ tpl .Values.pawspublic.renderer.image.template . | quote }} 86 | imagePullPolicy: Always 87 | name: renderer 88 | ports: 89 | - containerPort: 8000 90 | name: nbserve 91 | protocol: TCP 92 | volumeMounts: 93 | - mountPath: /data/project/paws/userhomes 94 | name: pawshomes 95 | resources: 96 | requests: 97 | memory: {{ .Values.pawspublic.renderer.requests.memory }} 98 | cpu: {{ .Values.pawspublic.renderer.requests.cpu }} 99 | limits: 100 | cpu: 2000m 101 | volumes: 102 | {{ if .Values.localdev.enabled }} 103 | - hostPath: 104 | path: /srv/paws/project/paws/userhomes 105 | {{ else }} 106 | - nfs: 107 | {{- with (index .Values.jupyterhub.hub.extraVolumes 0) }} 108 | server: {{ .nfs.server }} 109 | {{- end }} 110 | path: /srv/paws/project/paws/userhomes 111 | {{ end }} 112 | name: pawshomes 113 | --- 114 | apiVersion: autoscaling/v1 115 | kind: HorizontalPodAutoscaler 116 | metadata: 117 | name: renderer 118 | spec: 119 | scaleTargetRef: 120 | apiVersion: apps/v1 121 | kind: Deployment 122 | name: renderer 123 | minReplicas: 1 124 | maxReplicas: 6 125 | targetCPUUtilizationPercentage: 200 126 | --- 127 | apiVersion: v1 128 | kind: Service 129 | metadata: 130 | labels: 131 | name: paws-public 132 | name: paws-public 133 | spec: 134 | ports: 135 | - name: http 136 | port: 8000 137 | protocol: TCP 138 | targetPort: 8000 139 | selector: 140 | name: nbserve 141 | --- 142 | apiVersion: v1 143 | kind: Service 144 | metadata: 145 | labels: 146 | name: renderer 147 | name: renderer 148 | spec: 149 | ports: 150 | - name: http 151 | port: 8000 152 | protocol: TCP 153 | targetPort: 8000 154 | selector: 155 | name: renderer 156 | --- 157 | apiVersion: networking.k8s.io/v1 158 | kind: Ingress 159 | metadata: 160 | labels: 161 | name: paws-public-custom 162 | ingress.paws.wmcloud.org: public 163 | annotations: 164 | name: paws-public-custom 165 | spec: 166 | ingressClassName: nginx 167 | rules: 168 | - host: {{ .Values.pawspublic.ingress.host | quote }} 169 | http: 170 | paths: 171 | - backend: 172 | service: 173 | name: paws-public 174 | port: 175 | number: 8000 176 | pathType: Prefix 177 | path: / 178 | {{ end }} 179 | -------------------------------------------------------------------------------- /paws/values.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | # pawsPublicEnabled enables the anonymous viewing service for notebooks 4 | pawsPublicEnabled: true 5 | pawspublic: 6 | nbserve: 7 | image: 8 | name: quay.io/wikimedia-paws-prod/nbserve 9 | tag: pr-419 # nbserve tag managed by github actions 10 | # pawspublic.nbserve.image.template safely defines image:tag name in yaml 11 | template: "{{ .Values.pawspublic.nbserve.image.name}}:{{.Values.pawspublic.nbserve.image.tag }}" 12 | replicas: 1 13 | requests: 14 | memory: "20Mi" 15 | renderer: 16 | image: 17 | name: quay.io/wikimedia-paws-prod/renderer 18 | tag: pr-418 # renderer tag managed by github actions 19 | # pawspublic.nbserve.image.template safely defines image:tag name in yaml 20 | template: "{{ .Values.pawspublic.renderer.image.name}}:{{.Values.pawspublic.renderer.image.tag }}" 21 | requests: 22 | cpu: "10m" # give a token amount for local dev 23 | memory: "10Mi" 24 | ingress: 25 | host: public.hub.paws.local 26 | paws: 27 | # frontPageEnabled switches the URL path of / to a redirect to paws.ingress.frontRedirect 28 | frontPageEnabled: true 29 | ingress: 30 | # paws.ingress.frontHost should be the domain the URL path of / uses to redirect to docs 31 | frontHost: paws.wmcloud.org 32 | # paws.ingress.frontRedirect should be the destination for URL path of / at paws.ingress.frontHost 33 | frontRedirect: https://wikitech.wikimedia.org/wiki/PAWS 34 | jupyterhub: 35 | prePuller: 36 | containerSecurityContext: 37 | runAsUser: 52771 38 | runAsGroup: 52771 39 | hook: 40 | containerSecurityContext: 41 | runAsUser: 52771 42 | runAsGroup: 52771 43 | pause: 44 | containerSecurityContext: 45 | runAsUser: 52771 46 | runAsGroup: 52771 47 | proxy: 48 | chp: 49 | resources: 50 | requests: 51 | memory: "200Mi" 52 | cpu: .2 53 | containerSecurityContext: 54 | runAsUser: 52771 55 | runAsGroup: 52771 56 | # jupyterhub.proxy.secretToken is a valid dummy value for development 57 | secretToken: "23f542cc4b1af000e68088f1acc7ca8275a67cf496bae15ead6a79b8c6702597" 58 | service: 59 | nodePorts: 60 | http: 32611 61 | type: NodePort 62 | cull: 63 | timeout: 86400 64 | hub: 65 | config: 66 | # updated auth object for chart version 0.11.0+ this is the local dev values 67 | MWOAuthenticator: 68 | client_id: fea321f1b6b5aed9fa83d5362839cd3d 69 | client_secret: 6b17e5b87ae5ee893f5d4ba8b0e2377c6c0c3fcc 70 | mw_index_url: https://meta.wikimedia.org/w/index.php 71 | JupyterHub: 72 | authenticator_class: mediawiki 73 | db: 74 | # jupyterhub.hub.db values are overridden in Cloud VPS 75 | url: sqlite:// 76 | type: sqlite-pvc 77 | upgrade: true 78 | extraVolumes: 79 | - name: homes 80 | hostPath: 81 | path: /srv/paws/project 82 | - name: dumps 83 | hostPath: 84 | path: /mnt/public/dumps 85 | # Without this, dumps becomes inaccessible and can hang the host 86 | - name: dumps-src1 87 | hostPath: 88 | path: /mnt/nfs/dumps-clouddumps1001.wikimedia.org 89 | type: DirectoryOrCreate 90 | - name: dumps-src2 91 | hostPath: 92 | path: /mnt/nfs/dumps-clouddumps1002.wikimedia.org 93 | type: DirectoryOrCreate 94 | extraVolumeMounts: 95 | - name: homes 96 | mountPath: /data/project 97 | - name: dumps 98 | mountPath: /public/dumps 99 | readOnly: true 100 | - name: dumps-src1 101 | mountPath: /mnt/nfs/dumps-clouddumps1001.wikimedia.org 102 | readOnly: true 103 | - name: dumps-src2 104 | mountPath: /mnt/nfs/dumps-clouddumps1002.wikimedia.org 105 | readOnly: true 106 | extraConfig: 107 | fixLabels: | 108 | def fix_labels(spawner, pod): 109 | del pod.metadata.labels['hub.jupyter.org/username'] 110 | return pod 111 | 112 | c.KubeSpawner.modify_pod_hook = fix_labels 113 | 00-myConfig: | 114 | localdev = True 115 | 10-myConfig: | 116 | import hmac 117 | import hashlib 118 | import subprocess 119 | import os 120 | import json 121 | from oauthenticator.mediawiki import MWOAuthenticator 122 | from tornado import gen 123 | 124 | from tornado.escape import url_escape 125 | from tornado.httpclient import AsyncHTTPClient 126 | 127 | 128 | class Auth(MWOAuthenticator): 129 | enable_auth_state = True 130 | def normalize_username(self, username): 131 | return username 132 | 133 | async def refresh_user(self, user, handler=None): 134 | client = AsyncHTTPClient() 135 | try: 136 | response = await client.fetch(f"https://meta.wikimedia.org/w/api.php?action=query&format=json&formatversion=2&meta=globaluserinfo&guiuser={url_escape(user.name)}", 137 | user_agent="PAWS-authenticator/0.1 (https://phabricator.wikimedia.org/tag/paws/)" ) 138 | locked = bool(json.loads(response.body)['query']['globaluserinfo'].get("locked", False)) 139 | if locked: 140 | await user.spawner.stop(now=True) 141 | return False 142 | else: 143 | return True 144 | except Exception as e: 145 | self.log.error(f"Error checking for Wikimedia lock on user {user.name}: {e}") 146 | return False # Notebook cookies keep user logged in 147 | 148 | @gen.coroutine 149 | # more information about where this comes from found here: 150 | # https://jupyterhub-kubespawner.readthedocs.io/en/latest/spawner.html#kubespawner.KubeSpawner.volumes 151 | def pre_spawn_start(self, user, spawner): 152 | auth_state = yield user.get_auth_state() 153 | identity = auth_state['MEDIAWIKI_USER_IDENTITY'] 154 | spawner.environment['ACCESS_KEY'] = auth_state['ACCESS_TOKEN_KEY'] 155 | spawner.environment['ACCESS_SECRET'] = auth_state['ACCESS_TOKEN_SECRET'] 156 | spawner.environment['CLIENT_ID'] = self.client_id 157 | spawner.environment['CLIENT_SECRET'] = self.client_secret 158 | spawner.environment['USER'] = identity['username'] 159 | # Set rather than use .extend! 160 | # Since otherwise the volumes list will grow each time 161 | # the spawner stops and starts! 162 | 163 | homedir = '/data/project/paws/userhomes/{}'.format(identity['sub']) 164 | homenfs = '/srv/paws/project/paws/userhomes/{}'.format(identity['sub']) 165 | # Create the homedir so docker doesn't do it as root 166 | os.makedirs(homedir, mode=0o755, exist_ok=True) 167 | if localdev == True: 168 | spawner.volumes = [ 169 | { 170 | 'name': 'home', 171 | 'hostPath': { 'path': homenfs } 172 | }, 173 | { 174 | 'name': 'dumps', 175 | 'hostPath': { 'path': '/public/dumps' } 176 | }, 177 | { 178 | 'name': 'dumps-src1', 179 | 'hostPath': { 'path': '/mnt/nfs/dumps-clouddumps1001.wikimedia.org' } 180 | }, 181 | { 182 | 'name': 'dumps-src2', 183 | 'hostPath': { 'path': '/mnt/nfs/dumps-clouddumps1002.wikimedia.org' } 184 | } 185 | ] 186 | else: 187 | spawner.volumes = [ 188 | { 189 | 'name': 'home', 190 | 'nfs': { 'server': nfs_home, 'path': homenfs } 191 | }, 192 | { 193 | 'name': 'dumps', 194 | 'nfs': { 'server': dumps_src1, 'path': '/' } 195 | }, 196 | { 197 | 'name': 'dumps-src1', 198 | 'nfs': { 'server': dumps_src1, 'path': '/' } 199 | }, 200 | { 201 | 'name': 'dumps-src2', 202 | 'nfs': { 'server': dumps_src2, 'path': '/' } 203 | } 204 | ] 205 | 206 | spawner.volume_mounts = [ 207 | { 208 | 'name': 'home', 209 | 'mountPath': '/home/paws' 210 | }, 211 | { 212 | 'name': 'dumps', 213 | 'mountPath': '/public/dumps/public', 214 | 'readOnly': True 215 | }, 216 | { 217 | 'name': 'dumps-src1', 218 | 'mountPath': '/mnt/nfs/dumps-clouddumps1001.wikimedia.org', 219 | 'readOnly': True 220 | }, 221 | { 222 | 'name': 'dumps-src2', 223 | 'mountPath': '/mnt/nfs/dumps-clouddumps1002.wikimedia.org', 224 | 'readOnly': True 225 | }, 226 | ] 227 | 228 | c.OAuthenticator.admin_users = { 229 | "BDavis (WMF)", 230 | "VRook (WMF)", 231 | "ABorrero (WMF)", 232 | "NSkaggs (WMF)", 233 | "Andrewbogott", 234 | "Chicocvenancio" 235 | } 236 | c.OAuthenticator.allow_all = True 237 | c.JupyterHub.authenticator_class = Auth 238 | c.JupyterHub.authenticate_prometheus = False 239 | c.JupyterHub.logo_file = '/srv/jupyterhub/PAWS.svg' 240 | c.JupyterHub.template_vars = { 241 | 'announcement': ('' 242 | 'Welcome to PAWS. ' 243 | 'Please ' 245 | ' report any issues on Phabricator, you can also give feedback here' 246 | '') 247 | } 248 | extraEnv: 249 | USER: tools.paws 250 | JUPYTERHUB_CRYPT_KEY: "4849a4d92a49cdf9a80b49486293e29966c4f02daefa0f5597cf14546bab09f8" 251 | MYSQL_HMAC_KEY: "9a33d49db4bb823e87187a11e4f6296bee41bc35c41dc195634dff440c1870f0" 252 | cookieSecret: 827902ad187337f83adc565dadfb4c095ce1962442aae043ac78948f9b216a8f 253 | podSecurityContext: 254 | fsGroup: 52771 255 | image: 256 | name: quay.io/wikimedia-paws-prod/paws-hub 257 | tag: pr-448 # paws-hub tag managed by github actions 258 | containerSecurityContext: 259 | runAsUser: 52771 260 | resources: 261 | requests: 262 | memory: "200Mi" 263 | cpu: .2 264 | ingress: 265 | enabled: true 266 | hosts: 267 | - hub.paws.local 268 | ingressClassName: "nginx" 269 | # We are not on an autoscaling cluster, so we don't want this 270 | scheduling: 271 | userScheduler: 272 | enabled: false 273 | userPlaceholder: 274 | containerSecurityContext: 275 | runAsUser: 52771 276 | runAsGroup: 52771 277 | singleuser: 278 | cmd: 279 | - jupyterhub-singleuser 280 | fsGid: 52771 281 | image: 282 | name: quay.io/wikimedia-paws-prod/singleuser 283 | tag: pr-486 # singleuser tag managed by github actions 284 | pullPolicy: Always 285 | memory: 286 | guarantee: 0.70G 287 | limit: 3G 288 | cpu: 289 | guarantee: .15 290 | limit: 1 291 | storage: 292 | type: none 293 | uid: 52771 294 | # This must be false or this whole thing cannot work with restrictive PSP 295 | cloudMetadata: 296 | blockWithIptables: false 297 | extraEnv: 298 | HUB_DOMAIN: "hub.paws.local" # Check jupyterhub.ingress.hosts 299 | REFINE_DOMAIN: "*" # Check jupyterhub.ingress.hosts 300 | networkPolicy: 301 | egressAllowRules: 302 | privateIPs: true # Allow all connections to private IPs, needed for access to replicas 303 | nonPrivateIPs: false # Block all connections to non-private IPs, except the ones allowed below 304 | egress: 305 | # Allow connections to non-private IPs only for TCP ports 80 and 443 306 | # and for UDP ports 53 (DNS) and 123 (NTP) 307 | - ports: 308 | - protocol: TCP 309 | port: 80 310 | - protocol: TCP 311 | port: 443 312 | - protocol: UDP 313 | port: 53 314 | - protocol: UDP 315 | port: 123 316 | # mysql configures the wiki replica backend variables 317 | mysql: 318 | domain: "svc.cluster.local" 319 | username: s52771 320 | password: "iAmNotSecret0" 321 | minesweeper: 322 | enabled: false # most local-dev testers won't have the key to configs 323 | image: 324 | name: quay.io/wikimedia-paws-prod/minesweeper 325 | tag: pr-433 # minesweeper tag managed by github actions 326 | template: "{{ .Values.minesweeper.image.name }}:{{ .Values.minesweeper.image.tag }}" 327 | # If not deployed for prod use, we use the some hacks for testing 328 | localdev: 329 | enabled: true 330 | image: 331 | name: quay.io/wikimedia-paws-prod/jobber 332 | tag: pr-420 # jobber tag managed by github actions 333 | # mediawikiHacks.image.template safely defines image:tag name in yaml 334 | template: "{{ .Values.localdev.image.name}}:{{.Values.localdev.image.tag }}" 335 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.black] 2 | line-length = 80 3 | target_version = ['py37'] 4 | -------------------------------------------------------------------------------- /secrets-codfw1dev.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/toolforge/paws/9c42a38a368b3ad460fd122e60851c67335c3ed2/secrets-codfw1dev.sh -------------------------------------------------------------------------------- /secrets-eqiad1.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/toolforge/paws/9c42a38a368b3ad460fd122e60851c67335c3ed2/secrets-eqiad1.sh -------------------------------------------------------------------------------- /tests/helm-lint/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:24.04 2 | 3 | RUN apt-get update \ 4 | && DEBIAN_FRONTEND=noninteractive apt-get install -y \ 5 | curl \ 6 | ca-certificates 7 | 8 | COPY . /paws 9 | WORKDIR /paws 10 | 11 | RUN curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash 12 | RUN helm repo add jupyterhub https://jupyterhub.github.io/helm-chart/ 13 | RUN helm repo add bitnami https://charts.bitnami.com/bitnami 14 | RUN helm dependency update paws/ 15 | 16 | 17 | ENTRYPOINT ["helm", "lint", "paws/"] 18 | -------------------------------------------------------------------------------- /tests/tox/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:24.04 2 | 3 | RUN apt-get update \ 4 | && DEBIAN_FRONTEND=noninteractive apt-get install -y \ 5 | tox 6 | 7 | COPY . /paws 8 | WORKDIR /paws 9 | 10 | 11 | ENTRYPOINT ["tox"] 12 | -------------------------------------------------------------------------------- /tofu/127a.tf: -------------------------------------------------------------------------------- 1 | resource "openstack_containerinfra_cluster_v1" "k8s_127a" { 2 | name = "paws${var.name[var.datacenter]}-127a" 3 | cluster_template_id = resource.openstack_containerinfra_clustertemplate_v1.template_127a.id 4 | master_count = 1 5 | node_count = var.workers[var.datacenter] 6 | } 7 | 8 | resource "local_file" "kube_config" { 9 | content = resource.openstack_containerinfra_cluster_v1.k8s_127a.kubeconfig.raw_config 10 | filename = "kube.config" 11 | } 12 | 13 | resource "openstack_containerinfra_clustertemplate_v1" "template_127a" { 14 | name = "paws${var.name[var.datacenter]}-127a" 15 | coe = "kubernetes" 16 | dns_nameserver = "8.8.8.8" 17 | docker_storage_driver = "overlay2" 18 | docker_volume_size = var.volume_size[var.datacenter] 19 | external_network_id = var.external_network_id[var.datacenter] 20 | fixed_subnet = var.fixed_subnet[var.datacenter] 21 | fixed_network = var.fixed_network[var.datacenter] 22 | flavor = var.worker_flavor[var.datacenter] 23 | floating_ip_enabled = "false" 24 | image = "Fedora-CoreOS-38" 25 | master_flavor = var.control_flavor[var.datacenter] 26 | network_driver = "calico" 27 | 28 | labels = { 29 | kube_tag = "v1.27.8-rancher2" 30 | container_runtime = "containerd" 31 | containerd_version = "1.6.28" 32 | containerd_tarball_sha256 = "f70736e52d61e5ad225f4fd21643b5ca1220013ab8b6c380434caeefb572da9b" 33 | cloud_provider_tag = "v1.27.3" 34 | cinder_csi_plugin_tag = "v1.27.3" 35 | k8s_keystone_auth_tag = "v1.27.3" 36 | magnum_auto_healer_tag = "v1.27.3" 37 | octavia_ingress_controller_tag = "v1.27.3" 38 | calico_tag = "v3.26.4" 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /tofu/codfw1dev-backend.conf: -------------------------------------------------------------------------------- 1 | # https://github.com/hashicorp/terraform/issues/13022 2 | 3 | region = "codfw1dev" 4 | bucket = "d34805fb49a44a82a0b9668ad1d1227a:tofu-state" 5 | endpoint = "https://object.codfw1dev.wikimediacloud.org" 6 | key = "state/main" 7 | 8 | 9 | skip_region_validation = true 10 | skip_credentials_validation = true 11 | use_path_style = true 12 | -------------------------------------------------------------------------------- /tofu/eqiad1-backend.conf: -------------------------------------------------------------------------------- 1 | # https://github.com/hashicorp/terraform/issues/13022 2 | 3 | region = "eqiad1" 4 | bucket = "paws:tofu-state" 5 | endpoint = "https://object.eqiad1.wikimediacloud.org" 6 | key = "state/main" 7 | 8 | skip_region_validation = true 9 | skip_credentials_validation = true 10 | force_path_style = true 11 | -------------------------------------------------------------------------------- /tofu/main.tf: -------------------------------------------------------------------------------- 1 | terraform { 2 | required_version = ">= 1.6.0" 3 | backend "s3" {} 4 | required_providers { 5 | openstack = { 6 | source = "terraform-provider-openstack/openstack" 7 | version = "~> 1.51.0" 8 | } 9 | } 10 | } 11 | 12 | provider "openstack" { 13 | auth_url = var.auth-url[var.datacenter] 14 | tenant_id = var.tenant_id[var.datacenter] 15 | application_credential_id = var.application_credential_id[var.datacenter] 16 | application_credential_secret = var.application_credential_secret[var.datacenter] 17 | } 18 | -------------------------------------------------------------------------------- /tofu/secrets.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/toolforge/paws/9c42a38a368b3ad460fd122e60851c67335c3ed2/tofu/secrets.tf -------------------------------------------------------------------------------- /tofu/vars.tf: -------------------------------------------------------------------------------- 1 | variable "datacenter" { 2 | type = string 3 | } 4 | 5 | # name codfw1dev artifacts with '-dev' names 6 | variable "name" { 7 | type = map(any) 8 | default = { 9 | "codfw1dev" = "-dev" 10 | "eqiad1" = "" 11 | } 12 | } 13 | 14 | # connection vars 15 | variable "auth-url" { 16 | type = map(any) 17 | default = { 18 | "codfw1dev" = "https://openstack.codfw1dev.wikimediacloud.org:25000" 19 | "eqiad1" = "https://openstack.eqiad1.wikimediacloud.org:25000" 20 | } 21 | } 22 | variable "tenant_id" { 23 | type = map(any) 24 | default = { 25 | "codfw1dev" = "pawsdev" 26 | "eqiad1" = "paws" 27 | } 28 | } 29 | variable "application_credential_id" { 30 | type = map(any) 31 | default = { 32 | "codfw1dev" = "6b404a11241446c7a52c04f39983eda6" 33 | "eqiad1" = "43edf67449c740538af78c1743cb72c3" 34 | } 35 | } 36 | 37 | # magnum vars 38 | variable "worker_flavor" { 39 | type = map(any) 40 | default = { 41 | "codfw1dev" = "g4.cores1.ram2.disk20" 42 | "eqiad1" = "g4.cores8.ram32.disk20" 43 | } 44 | } 45 | variable "control_flavor" { 46 | type = map(any) 47 | default = { 48 | "codfw1dev" = "g4.cores1.ram2.disk20" 49 | "eqiad1" = "g4.cores2.ram4.disk20" 50 | } 51 | } 52 | variable "volume_size" { 53 | type = map(any) 54 | default = { 55 | "codfw1dev" = "20" 56 | "eqiad1" = "80" 57 | } 58 | } 59 | variable "external_network_id" { 60 | type = map(any) 61 | default = { 62 | "codfw1dev" = "wan-transport-codfw" 63 | "eqiad1" = "wan-transport-eqiad" 64 | } 65 | } 66 | variable "fixed_network" { 67 | type = map(any) 68 | default = { 69 | "codfw1dev" = "lan-flat-cloudinstances2b" 70 | "eqiad1" = "lan-flat-cloudinstances2b" 71 | } 72 | } 73 | variable "fixed_subnet" { 74 | type = map(any) 75 | default = { 76 | "codfw1dev" = "cloud-instances2-b-codfw" 77 | "eqiad1" = "cloud-instances2-b-eqiad" 78 | } 79 | } 80 | variable "workers" { 81 | type = map(any) 82 | default = { 83 | "codfw1dev" = "2" 84 | "eqiad1" = "5" 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /toolinfo.json: -------------------------------------------------------------------------------- 1 | { 2 | "name" : "wikimedia-paws", 3 | "title" : "PAWS", 4 | "description" : "PAWS: A Web Shell (PAWS) is a Jupyter notebook deployment hosted by Wikimedia.\n\nA Jupyter notebook is a popular Open Source tool that allows to create and share documents that contain live code. You can use Jupyter notebooks to run scripts that help you perform essential technical tasks on wikis, use data to create visualizations, graphs, and more, and to write techinical documentation and tutorials that help others work on Wikimedia projects.\n\nJupyter notebooks are used both by experienced programmers and technically curious newcomers. There's no need to download software or set up a development environment. All of your work is done in your browser.\n\nOur deployment is using JupyterHub, which can serve live Jupyter notebooks to multiple users.", 5 | "url" : "https://hub.paws.wmcloud.org/", 6 | "keywords" : "paws, pywikibot, jupyter notebook, python", 7 | "author" : "Yuvi Panda", 8 | "repository" : "https://github.com/toolforge/paws", 9 | "for_wikis": ["*"], 10 | "icon": "https://commons.wikimedia.org/wiki/File:PAWS_(no_text).svg", 11 | "license": "MIT", 12 | "sponsor": ["Wikimedia Foundation"], 13 | "available_ui_languages": ["en"], 14 | "technology_used": ["Jupyter Notebook", "pywikibot", "Python 3", "R", "SPARQL"], 15 | "tool_type": "web app", 16 | "developer_docs_url": [ 17 | { 18 | "url": "https://wikitech.wikimedia.org/wiki/PAWS/PAWS_maintenance_and_administration", 19 | "language": "en" 20 | } 21 | ], 22 | "user_docs_url": [ 23 | { 24 | "language": "en", 25 | "url": "https://wikitech.wikimedia.org/wiki/PAWS" 26 | } 27 | ], 28 | "bugtracker_url": "https://phabricator.wikimedia.org/project/board/1648/", 29 | "_language": "en", 30 | "_schema": "/toolinfo/1.2.0" 31 | } 32 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = flake8, black, yamllint 3 | minversion = 1.6 4 | skipsdist = True 5 | 6 | [default] 7 | basepython = python3 8 | 9 | [testenv:flake8] 10 | basepython = {[default]basepython} 11 | commands = flake8 images build.py 12 | deps = flake8 13 | 14 | [flake8] 15 | max-line-length = 100 16 | # E121, E123, E126, E133, E226, E241, E242, E704 and W503 are default ignores 17 | # E124: closing bracket does not match visual indentation 18 | # E203: non-pep8 compliant brainfart of flake8 19 | ignore = E121,E123,E126,E133,E203,E226,E241,E242,E704,W503,E124 20 | 21 | [testenv:black] 22 | description = check black formatter 23 | basepython = {[default]basepython} 24 | commands = black -l 80 -t py36 --check \ 25 | images \ 26 | build.py 27 | deps = black 28 | 29 | [testenv:yamllint] 30 | description = check yaml with yamllint 31 | basepython = {[default]basepython} 32 | commands = bash ./tox_scripts/yamllint.sh 33 | deps = yamllint 34 | allowlist_externals = bash 35 | -------------------------------------------------------------------------------- /tox_scripts/yamllint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # yamllint doesn't handle helm templates well, in particular {{- bits, so we 4 | # exclude them with some sed. In order to keep from making a mess of the working 5 | # directory of anyone running this test we copy them to a tmp directory. Then we 6 | # switch to that directory to do the test as it looks more like we are actually 7 | # running in the base dir, rather than displaying things like 8 | # /tmp/tmpdir/paws/values.yaml will show ./paws/values.yaml in output. 9 | 10 | export TEMP_DIR=$(mktemp -d -p "/tmp/") 11 | cp -r . ${TEMP_DIR} 12 | cd ${TEMP_DIR} 13 | find . -not -path "./.tox/*" -type f -regex ".*\.ya?ml" -exec sed -i "s/{{/# /" {} \; 14 | yamllint -c .yamllint.conf . 15 | --------------------------------------------------------------------------------