├── .gitattributes ├── .gitignore ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md └── initsmnb ├── .ipython └── profile_default │ └── startup │ └── 01-osx-jupyterlab-keys.py ├── QUICKSTART-CDK.md ├── TEMPLATE-setup-my-sagemaker.sh ├── adjust-sm-git.sh ├── change-docker-data-root.sh ├── change-docker-tmp-dir.sh ├── change-jlab-ui.sh ├── duf.sh ├── enable-sm-local-mode.sh ├── ensure-smnb.sh ├── final-check.sh ├── fix-ipython.sh ├── fix-osx-keymap.sh ├── fix-pyspark-smnb.sh ├── init-vim.sh ├── install-cdk.sh ├── install-cli.sh ├── install-code-server.sh ├── install-initsmnb.sh ├── install-pipx.sh ├── mount-efs-accesspoint.sh ├── patch-bash-config.sh ├── patch-jupyter-config.sh ├── restart-docker.sh ├── s5cmd.sh └── upgrade-jupyter.sh /.gitattributes: -------------------------------------------------------------------------------- 1 | # Set the default behavior, in case people don't have core.autocrlf set. 2 | * text=input 3 | 4 | # Explicitly declare text files you want to always be normalized and converted 5 | # to native line endings on checkout. 6 | *.c text 7 | *.h text 8 | 9 | # Declare files that will always have CRLF line endings on checkout. 10 | *.sln text eol=crlf 11 | *.bat text eol=crlf 12 | 13 | # Denote all files that are truly binary and should not be modified. 14 | *.png binary 15 | *.jpg binary 16 | *.docx binary 17 | *.pptx binary 18 | *.xlsx binary 19 | *.tar.gz binary 20 | *.pickle binary 21 | *.zip binary 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Jupyter notebook's checkpoints 2 | .ipynb_checkpoints 3 | 4 | # Byte-compiled / optimized / DLL files 5 | __pycache__/ 6 | *.py[cod] 7 | *$py.class 8 | 9 | # C extensions 10 | *.so 11 | 12 | # MS-Office temporary files 13 | ~$* 14 | 15 | # OSX specific 16 | .DS_Store 17 | 18 | # VIM temporary files 19 | .*.sw[po] 20 | *.sw[po] 21 | 22 | # Python distribution / packaging 23 | MANIFEST 24 | .Python 25 | env/ 26 | build/ 27 | develop-eggs/ 28 | dist/ 29 | downloads/ 30 | eggs/ 31 | .eggs/ 32 | lib/ 33 | lib64/ 34 | parts/ 35 | sdist/ 36 | var/ 37 | wheels/ 38 | *.egg-info/ 39 | .installed.cfg 40 | *.egg 41 | 42 | # PyInstaller 43 | # Usually these files are written by a python script from a template 44 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 45 | *.manifest 46 | *.spec 47 | 48 | # Installer logs 49 | pip-log.txt 50 | pip-delete-this-directory.txt 51 | 52 | # Unit test / coverage reports 53 | htmlcov/ 54 | .tox/ 55 | .coverage 56 | .coverage.* 57 | .cache 58 | nosetests.xml 59 | coverage.xml 60 | *.cover 61 | .hypothesis/ 62 | .pytest_cache/ 63 | 64 | # Translations 65 | *.mo 66 | *.pot 67 | 68 | # Django stuff: 69 | *.log 70 | local_settings.py 71 | db.sqlite3 72 | 73 | # Flask stuffs 74 | instance/ 75 | .webassets-cache 76 | 77 | # Scrappy stuffs 78 | .scrapy 79 | 80 | # pyenv 81 | .python-version 82 | 83 | # celery beat schedule file 84 | celerybeat-schedule 85 | 86 | # SageMath parsed files 87 | *.sage.py 88 | 89 | # Sphinx documentation 90 | docs/_build/ 91 | 92 | # mkdocs documentation 93 | /site 94 | 95 | # Database 96 | *.db 97 | *.rdb 98 | 99 | # PyBuilder 100 | target/ 101 | 102 | # Mypy cache 103 | .mypy_cache/ 104 | 105 | # Environments 106 | .env 107 | .venv 108 | env/ 109 | venv/ 110 | ENV/ 111 | env.bak/ 112 | venv.bak/ 113 | 114 | # IDE 115 | .c9/ 116 | .idea/ 117 | .spyproject 118 | .spyderproject 119 | .ropeproject 120 | .vscode 121 | 122 | # pyspark 123 | derby.log 124 | metastore_db 125 | spark-warehouse 126 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## Code of Conduct 2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 4 | opensource-codeofconduct@amazon.com with any additional questions or comments. 5 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional 4 | documentation, we greatly value feedback and contributions from our community. 5 | 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary 7 | information to effectively respond to your bug report or contribution. 8 | 9 | 10 | ## Reporting Bugs/Feature Requests 11 | 12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features. 13 | 14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already 15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: 16 | 17 | * A reproducible test case or series of steps 18 | * The version of our code being used 19 | * Any modifications you've made relevant to the bug 20 | * Anything unusual about your environment or deployment 21 | 22 | 23 | ## Contributing via Pull Requests 24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: 25 | 26 | 1. You are working against the latest source on the *main* branch. 27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. 28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted. 29 | 30 | To send us a pull request, please: 31 | 32 | 1. Fork the repository. 33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. 34 | 3. Ensure local tests pass. 35 | 4. Commit to your fork using clear commit messages. 36 | 5. Send us a pull request, answering any default questions in the pull request interface. 37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. 38 | 39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and 40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). 41 | 42 | 43 | ## Finding contributions to work on 44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start. 45 | 46 | 47 | ## Code of Conduct 48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 50 | opensource-codeofconduct@amazon.com with any additional questions or comments. 51 | 52 | 53 | ## Security issue notifications 54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue. 55 | 56 | 57 | ## Licensing 58 | 59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution. 60 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | this software and associated documentation files (the "Software"), to deal in 5 | the Software without restriction, including without limitation the rights to 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 7 | the Software, and to permit persons to whom the Software is furnished to do so. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 10 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 11 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 12 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 13 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 14 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 15 | 16 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Sample Scripts to Customize SageMaker Notebook Instance 2 | 3 | Table of contents: 4 | 5 | - [1. Overview](#1-overview) 6 | - [2. Non-exhaustive list of customizations](#2-non-exhaustive-list-of-customizations) 7 | - [3. Installation](#3-installation) 8 | - [3.1. Installation from github](#31-installation-from-github) 9 | - [3.2. Installation from local source](#32-installation-from-local-source) 10 | - [4. Usage](#4-usage) 11 | - [5. Appendix](#5-appendix) 12 | - [5.1. Restart JupyterLab](#51-restart-jupyterlab) 13 | - [5.2. Change terminal font size](#52-change-terminal-font-size) 14 | - [5.3. Docker Tweaks](#53-docker-tweaks) 15 | - [5.4. Advance Jupyter Lab Tweaks](#54-advance-jupyter-lab-tweaks) 16 | - [6. Related Projects](#6-related-projects) 17 | - [7. Security](#7-security) 18 | - [8. License](#8-license) 19 | - [9. Acknowledgements](#9-acknowledgements) 20 | 21 | ## 1. Overview 22 | 23 | This repo contains scripts to re-run common tweaks on a fresh (i.e., newly 24 | created or rebooted) SageMaker **classic** notebook instance, to make the 25 | notebook instance a little bit more ergonomic for prolonged usage. 26 | 27 | After running these scripts your default command-line terminal will go from this: 28 | 29 | before_cli 30 | 31 | To something like this: 32 | 33 | after_cli 34 | 35 | Once installed, everytime you access a newly restarted notebook instance, you 36 | just need to perform these three short, simple steps to see and experience the 37 | customizations: 38 | 39 | 1. open a terminal, 40 | 2. run a one-liner command line `~/SageMaker/initsmnb/setup-my-sagemaker.sh`, 41 | 3. [restart the Jupyter process](#51-restart-jupyterlab). 42 | 43 | By supporting a simple one-liner command line, we hope that you can quickly test 44 | this repo as a *data scientist* (with your notebook instance as all that you 45 | need), rather than as an *infrastructure engineer* which typically works with 46 | more sophisticated automation tools or services. 47 | 48 | We hope that you find this repo useful to adopt into your daily work habits. 49 | 50 | ## 2. Non-exhaustive list of customizations 51 | 52 | Please note that tweaks marked with **\[Need sudo\]** can only be in-effect when 53 | your notebook instance enables 54 | [root access for notebook users](https://aws.amazon.com/blogs/machine-learning/control-root-access-to-amazon-sagemaker-notebook-instances/), and 55 | **\[Need internet\]** requires internet connection. 56 | 57 | - **\[Need sudo & internet\]** [code-server](https://github.com/coder/code-server) and opinionated 58 | extensions for Python data science works. 59 | 60 | - **\[Need sudo & internet\]** Docker: enable SageMaker local mode, and advance docker customizations. 61 | See [here](#53-docker-tweaks) for details, and how to disable. 62 | - Please note that you may need to increase your notebook instance's EBS to make sure that the 63 | `~/SageMaker/` has enough space to hold docker images, docker containers, and docker temp files. 64 | 65 | - Jupyter Lab: 66 | - **\[Need sudo & internet\]** Use the newest release of Jupyter Lab (3.x or newer), with advance 67 | customizations. See [here](#54-advance-jupyter-lab-tweaks) for details, and how choose to stay 68 | with the stock Jupyter Lab from the notebook instance. 69 | - **\[Need sudo\]** In addition to SageMaker's built-in conda environments, Jupyter Lab to also 70 | auto-scan `/home/ec2-user/SageMaker/envs/` for custom conda environments. 71 | 72 | This allows for a "persistent" conda environment under `/home/ec2-user/SageMaker/envs` that 73 | survives instance reboot. 74 | 75 | You can create a new custom conda environment as follows: 76 | `conda create --prefix /home/ec2-user/SageMaker/envs/MY_CUSTOM_ENV_NAME python=3.10 ipykernel`. 77 | Replace the environment name and python version with your choice. Please note that conda 78 | environment must have `ipykernel` package installed. Once the environment is created, you may 79 | need to [restart JupyterLab](#51-restart-jupyterlab) before you can see the environment 80 | listed as one of the kernels. 81 | - Reduce font size on Jupyter Lab, and show line numbers on editors. 82 | - **\[Need sudo\]** Terminal defaults to `bash` shell, dark theme, and smaller font. 83 | 84 | - Git: 85 | - Optionally change committer's name and email, which defaults to `ec2-user` 86 | - git aliases: `git lol`, `git lola`, `git lolc`, and `git lolac` 87 | - New repo (i.e., `git init`) defaults to branch `main` 88 | - `nbdime` for notebook-friendly diffs 89 | 90 | - Terminal: 91 | - `bash` shortcuts: `alt-.`, `alt-b`, `alt-d`, and `alt-f` work even when 92 | connecting from OSX. 93 | - **\[Need sudo & internet\]** Install command lines: `htop`, `tree`, `dos2unix`, 94 | `dstat`, `tig` (alinux only), `ranger` (the CLI file explorer), 95 | [cookiecutter](https://pypi.org/project/cookiecutter/), 96 | [pre-commit](https://pre-commit.com/), 97 | [s4cmd](https://github.com/bloomreach/s4cmd), 98 | [black-nb](https://github.com/tomcatling/black-nb), 99 | [black](https://github.com/psf/black), 100 | [jupytext](https://github.com/mwouts/jupytext), and 101 | [AWS CDK CLI](https://docs.aws.amazon.com/cdk/v2/guide/home.html). 102 | - `pre-commit` caches of hook repositories survive reboots 103 | - `ranger` is configured to use relative line numbers 104 | - Whenever possible, commands are installed to the persistent area under 105 | `~/SageMaker/.initsmnb.d/`, so that on reboot, the tweaking script can 106 | skip re-installing those commands to speed-up the tweaking time. 107 | 108 | - ipython run from Jupyter Lab's terminal: 109 | - shortcuts: `alt-.`, `alt-b`, `alt-d`, and `alt-f` work even when connecting 110 | from OSX. 111 | - recolor `o.__class__` from dark blue (nearly invisible on the dark theme) to 112 | a more sane color. 113 | 114 | - Some customizations on `vim`: 115 | - Notably, change window navigation shortcuts from `ctrl-w-{h,j,k,l}` to 116 | `ctrl-{h,j,k,l}`. 117 | 118 | Otherwise, `ctrl-w` is used by most browsers on Linux (and Windows?) to 119 | close a browser tab, which renders windows navigation in `vim` unusable. 120 | 121 | - Other opinionated changes; see `init-vim.sh`. 122 | 123 | - **\[Need sudo\]** Optionally mount one or more EFS. 124 | 125 | ## 3. Installation 126 | 127 | This step needs to be done **once** on a newly *created* notebook instance. 128 | 129 | You can choose to have the installation process automatically download the 130 | necessary files from this repo, provided that your SageMaker classic notebook 131 | instance has the necessary network access to this repo. 132 | 133 | Another choice is to bootstrap this repo into your SageMaker classic notebook 134 | instance, then invoke the install script in its local mode. 135 | 136 | ### 3.1. Installation from github 137 | 138 | Go to the Jupyter Lab on your SageMaker notebook instance. Open a terminal, 139 | then run this command: 140 | 141 | ```bash 142 | curl -sfL \ 143 | https://raw.githubusercontent.com/aws-samples/amazon-sagemaker-notebook-instance-customization/main/initsmnb/install-initsmnb.sh \ 144 | | bash -s -- --git-user 'First Last' --git-email 'ab@email.abc' 145 | ``` 146 | 147 | Both the `--git--user 'First Last` and `--git-email ab@email.abc` arguments are 148 | optional. If you're happy with SageMaker's preset (which uses `ec2-user` as 149 | the commiter name), you can drop these two arguments from the install command. 150 | 151 | If you want to auto-mount one or more EFS, install as follows: 152 | 153 | ```bash 154 | curl -sfL \ 155 | https://raw.githubusercontent.com/aws-samples/amazon-sagemaker-notebook-instance-customization/main/initsmnb/install-initsmnb.sh \ 156 | | bash -s -- \ 157 | --git-user 'First Last' \ 158 | --git-email 'ab@email.abc' \ 159 | --efs 'fs-123,fsap-123,my_efs_01' \ 160 | --efs 'fs-456,fsap-456,my_efs_02' 161 | ``` 162 | 163 | All mount points will live under `/home/ec2-user/mnt/`. Thus, the above example 164 | will install a script that can mount two EFS, the first one `fs-123` will be 165 | mounted as `/home/ec2-user/mnt/my_efs_01/`, while the second one `fs-456` will 166 | be mounted as `/home/ec2-user/mnt/my_efs_02/`. 167 | 168 | After the installation step finishes, you should see a new directory created: `/home/ec2-user/SageMaker/initsmnb/`. 169 | Your next step is to jump to section [Usage](#4-usage). 170 | 171 | ### 3.2. Installation from local source 172 | 173 | On your SageMaker notebook instance, open a terminal and run these commands: 174 | 175 | ```bash 176 | cd ~/SageMaker 177 | git clone https://github.com/aws-samples/amazon-sagemaker-notebook-instance-customization.git 178 | cd amazon-sagemaker-notebook-instance-customization/initsmnb 179 | ./install-initsmnb.sh --from-local --git-user 'First Last' --git-email 'ab@email.abc' 180 | ``` 181 | 182 | After the installation step finishes, you should see a new directory created: `/home/ec2-user/SageMaker/initsmnb/`. 183 | Your next step is to jump to section [Usage](#4-usage). 184 | 185 | ## 4. Usage 186 | 187 | Once installed, you should see file `/home/ec2-user/SageMaker/initsmnb/setup-my-sagemaker.sh`. 188 | 189 | To apply the customizations to the current session, open a terminal and run 190 | `~/SageMaker/initsmnb/setup-my-sagemaker.sh`. Once the script finishes, please 191 | follow the on-screen instruction to restart the Jupyter server (and after that, 192 | do remember to reload your browser tab). 193 | 194 | Due to how SageMaker notebook works, please re-run `setup-my-sagemaker.sh` on a 195 | newly *started* or *restarted* instance. You may even consider to automate this 196 | step using SageMaker lifecycle config. 197 | 198 | ## 5. Appendix 199 | 200 | ### 5.1. Restart JupyterLab 201 | 202 | On the Jupyter Lab's terminal, run this command: 203 | 204 | ```bash 205 | # For notebook instance with alinux 206 | sudo initctl restart jupyter-server --no-wait 207 | 208 | # Use this instead, for notebook instance with alinux2 209 | sudo systemctl restart jupyter-server 210 | ``` 211 | 212 | After issuing the command, your Jupyter interface will probably freeze, which 213 | is expected. 214 | 215 | Then, reload your browser tab, and enjoy the new experience. 216 | 217 | ### 5.2. Change terminal font size 218 | 219 | To change the terminal font size, after installation 220 | 221 | 1. open `/home/ec2-user/SageMaker/initsmnb/change-jlab-ui.sh` in a text editor, 222 | 2. go to the section that customizes the terminal, 223 | 3. then change the fontsize (default is 10) to another value of your choice. 224 | 225 | ### 5.3. Docker Tweaks 226 | 227 | - Enable SageMaker local mode. 228 | 229 | - Relocate docker's data-root to persistent area `~/SageMaker/.initsmnb.d/docker/`, so that after 230 | reboot, your `docker images` won't show empty images anymore (provided you've docker build or pull 231 | before). 232 | 233 | - Relocate docker's tmpdir to persistent area `~/SageMaker/.initsmnb.d/tmp/`, so that you can build 234 | large custom images that require more space than what `/tmp` (i.e., on root volume) provides. 235 | 236 | - A secondary benefit is to allow SageMaker local mode to run with S3 input that's larger than 237 | what `/tmp` (i.e., on root volume) provides. Please note SageMaker local mode will copy the 238 | S3 input to the docker's tmpdir, but upon completion the SDK won't remove the tmp dir. Hence, 239 | you need to manually remove the temporary S3 inputs from the persistent docker's tmpdir. 240 | 241 | Should you choose not to apply the docker tweaks, make sure to pass `--no-config-docker` to the 242 | `install-initsmnb.sh` script. 243 | 244 | ### 5.4. Advance Jupyter Lab Tweaks 245 | 246 | - Upgrade to the latest release of Jupyter Lab. 247 | 248 | - Tree-like file explorer (thanks to 249 | [jupyterlab-unfold](https://github.com/jupyterlab-contrib/jupyterlab-unfold)). 250 | 251 | ![Screenshot courtesy of jupyterlab-unfold](https://raw.githubusercontent.com/jupyterlab-contrib/jupyterlab-unfold/master/images/screenshot.png) 252 | 253 | - Enable code formatting (thanks to 254 | [jupyterlab-code-formatter](https://github.com/ryantam626/jupyterlab_code_formatter)), 255 | [black](https://github.com/psf/black), and [isort](https://github.com/PyCQA/isort). 256 | 257 | - Press `ctrl-shift-b` to reformat a notebook or text editor with `black`. 258 | - Press `ctrl-shift-i` to reformat a notebook or text editor with `isort`. 259 | - To apply `black` + `isort` at the same time to a notebook, you can also click a button on the 260 | toolbar as shown 261 | [here](https://ryantam626.github.io/jupyterlab_code_formatter/usage.html#for-the-entire-document). 262 | 263 | ![Screenshot courtesy of jupyterlab-code-formatter](https://github.com/ryantam626/jupyterlab_code_formatter/raw/master/docs_src/_static/format-all.gif) 264 | 265 | - Centralize notebook checkpoints to `/tmp/.ipynb_checkpoints/`. This prevents 266 | `.ipynb_checkpoints/` from making its way into the tarballs generated by 267 | SageMaker SDK for training, inference, and framework processing scripts, and 268 | model repack. 269 | 270 | - Disable the git extension for Jupyter Lab. This is aimed for power users who primarily use git 271 | from CLI, and do not want to be distracted by Jupyter Lab's frequent refreshes on the lower-left 272 | status bar. 273 | 274 | Should you choose not to apply these advance JLab tweaks (hence, continue to use the JLab version 275 | provided by the notebook instance), make sure to pass `--plain-old-jlab` to the 276 | `install-initsmnb.sh` script. 277 | 278 | ## 6. Related Projects 279 | 280 | Once you've customized your development environment on your SageMaker classic 281 | notebook instance, we invite you to explore related samples. 282 | 283 | 1. [aws-samples/python-data-science-template](https://github.com/aws-samples/python-data-science-template/) 284 | shows a one-liner command line that instantenously auto-generate a modular 285 | structure for your new Python-based data science project. 286 | 287 | 2. [aws-samples/amazon-sagemaker-entrypoint-utilities](https://github.com/aws-samples/amazon-sagemaker-entrypoint-utilities) 288 | is a sample library to help you quickly write a SageMaker **meta**-entrypoint 289 | script for training. This approach aims to reduce the amount of boilerplate 290 | codes you need to write for model training, such as argument parsings and 291 | logger configurations, which are repetitive and tedious. 292 | 293 | 3. [ML Max](https://github.com/awslabs/mlmax/) is a set of example templates to 294 | accelerate the delivery of custom ML solutions to production so you can get 295 | started quickly without having to make too many design choices. At present, 296 | it covers four pillars: training pipeline, inference pipeline, development 297 | environment, and data management/ETL. 298 | 299 | 4. Learn about a different mechanism to create custom Jupyter kernel on a SageMaker 300 | classic notebook instance, described in 301 | [aws-samples/aws-sagemaker-custom-jupyter-kernel](https://github.com/aws-samples/aws-sagemaker-custom-jupyter-kernel/). 302 | 303 | 5. *Wearing an "infrastructure engineer" hat* -- when you're ready or allowed to 304 | implement the customizations as a lifecycle configuration for your SageMaker 305 | notebook instance, feel free to further explore these 306 | [examples](https://github.com/aws-samples/amazon-sagemaker-notebook-instance-lifecycle-config-samples/). 307 | 308 | 6. [Data science on Amazon EC2 with vim, tmux and zsh](https://github.com/aws-samples/ec2-data-science-vim-tmux-zsh/) 309 | hosts a simple template to set up basic Vim, Tmux, Zsh for the Deep Learning 310 | AMI Amazon Linux 2 for data scientists. 311 | 312 | ## 7. Security 313 | 314 | See [CONTRIBUTING](CONTRIBUTING.md#security-issue-notifications) for more information. 315 | 316 | ## 8. License 317 | 318 | This library is licensed under the MIT-0 License. See the LICENSE file. 319 | 320 | ## 9. Acknowledgements 321 | 322 | [@yapweiyih](https://github.com/yapweiyih) (EFS, Streamlit), [@josiahdavis](https://github.com/josiahdavis) and [@kianho](https://github.com/kianho) (vim), [@theoldfather](https://github.com/theoldfather) (docker relocation), [@aws/amazon-sagemaker-examples](https://github.com/aws/amazon-sagemaker-examples) (SageMaker local mode), [@yinsong1986](https://github.com/yinsong1986) (persistent custom conda environment), [@verdimrc](https://github.com/verdimrc) (misc.), the originator of git lol & lola (earlier traceable could be this 323 | [blog](http://blog.kfish.org/2010/04/git-lola.html). 324 | -------------------------------------------------------------------------------- /initsmnb/.ipython/profile_default/startup/01-osx-jupyterlab-keys.py: -------------------------------------------------------------------------------- 1 | """Setup keybindings on ipython is started from JupyterLab terminal. 2 | See: 3 | - ipython keybindings: https://ipython.readthedocs.io/en/stable/config/details.html#keyboard-shortcuts 4 | - Named shortcuts: https://github.com/prompt-toolkit/python-prompt-toolkit/blob/master/prompt_toolkit/key_binding/bindings/named_commands.py 5 | """ 6 | 7 | from IPython import get_ipython 8 | from prompt_toolkit.enums import DEFAULT_BUFFER 9 | from prompt_toolkit.filters import HasFocus, HasSelection 10 | from prompt_toolkit.key_binding.bindings.named_commands import get_by_name 11 | 12 | ip = get_ipython() 13 | 14 | # Register the shortcut if IPython is using prompt_toolkit. 15 | if getattr(ip, 'pt_app', None): 16 | registry = ip.pt_app.key_bindings 17 | 18 | # OSX: option-f 19 | registry.add_binding( 20 | "ƒ", 21 | #filter=(HasFocus(DEFAULT_BUFFER) & ~HasSelection()) 22 | filter=HasFocus(DEFAULT_BUFFER) 23 | )(get_by_name('forward-word')) 24 | 25 | # OSX: option-b 26 | registry.add_binding( 27 | "∫", 28 | #filter=(HasFocus(DEFAULT_BUFFER) & ~HasSelection()) 29 | filter=HasFocus(DEFAULT_BUFFER) 30 | )(get_by_name('backward-word')) 31 | 32 | # OSX: option-b 33 | registry.add_binding( 34 | "∂", 35 | #filter=(HasFocus(DEFAULT_BUFFER) & ~HasSelection()) 36 | filter=HasFocus(DEFAULT_BUFFER) 37 | )(get_by_name('kill-word')) 38 | 39 | # OSX: option-. 40 | registry.add_binding( 41 | "≥", 42 | #filter=(HasFocus(DEFAULT_BUFFER) & ~HasSelection()) 43 | filter=HasFocus(DEFAULT_BUFFER) 44 | )(get_by_name('yank-last-arg')) 45 | -------------------------------------------------------------------------------- /initsmnb/QUICKSTART-CDK.md: -------------------------------------------------------------------------------- 1 | # Quickstart to CDK on Amazon SageMaker notebook instance 2 | 3 | This quickstart assumes that you've used 4 | [initsmnb](https://github.com/aws-samples/amazon-sagemaker-notebook-instance-customization) to 5 | install a persistent copy of CDK, nvm, and Node.js on your Amazon SageMaker notebook instance. 6 | 7 | Please note that the install script avoids reinstalling any of those software should it already 8 | exists under `$NVM_DIR` is detected. As such, in practice the actual installs happen you run 9 | `initsmnb` the first time on your fresh, brand new notebook instance. 10 | 11 | A few environment variables will be added to your `~/.bashrc`: 12 | 13 | - `$NVM_DIR` set to `/home/ec2-user/SageMaker/.initsmnb.d/.nvm` which is the directory that contains 14 | the installed cdk, nvm, and node.js. Because `$NVM_DIR` is under `~/SageMaker`, its contents 15 | survive reboot. 16 | - `$CDK_DEFAULT_ACCOUNT` set to your AWS account ID 17 | - `$CDK_DEFAULT_REGION` set to the region of your SageMaker notebook instance 18 | - `$EC2_AVAIL_ZONE` set to the availability zone of your SageMaker notebook instance 19 | 20 | ## Optional: Prepare helper environment variables 21 | 22 | Run this stanza when you want to override the defaults set by 23 | [initsmnb](https://github.com/aws-samples/amazon-sagemaker-notebook-instance-customization) in your 24 | `~/.bashrc`. 25 | 26 | ```bash 27 | export CDK_DEFAULT_ACCOUNT=111122223333 28 | export CDK_DEFAULT_REGION=ap-southeast-1 29 | ``` 30 | 31 | ## Bootstrap CDK to AWS account 32 | 33 | **Requirements**: make sure the instance where you run the cdk client has the necessary permissions 34 | to create an S3 bucket and deploy CloudFormation stacks. 35 | 36 | For more details, see 37 | 38 | ```bash 39 | # Cdk client will create an S3 bucket on account ID xxxxyyyyzzzz. 40 | cdk bootstrap aws://${CDK_DEFAULT_ACCOUNT}/${CDK_DEFAULT_REGION} 41 | ``` 42 | 43 | ## Create a CDK app 44 | 45 | ```bash 46 | mkdir -p ROOT_OF_MY_CDK_APP 47 | cd ROOT_OF_MY_CDK_APP 48 | cdk init app --language python 49 | ``` 50 | 51 | From here, we have deviate from the CDK-generated `README.md`: instead of virtualenv, we just use a 52 | Conda environment. 53 | 54 | ```bash 55 | conda create --prefix ~/SageMaker/envs/cdk-conda-env python=3.9 ipykernel 56 | conda activate ~/SageMaker/envs/cdk-conda-env 57 | 58 | pip install -r requirements.txt 59 | pip install -r requirements-dev.txt 60 | ``` 61 | 62 | ## Deploy Stack 63 | 64 | First time deployment: 65 | 66 | ```bash 67 | cdk synth 68 | cdk deploy 69 | ``` 70 | 71 | Your CDK template may need to create a few IAM roles, so make sure that the AWS credential (e.g., 72 | a SageMaker execution role) that deploys the stack has the necessary permissions to CRUD IAM 73 | permissions and/or roles. 74 | 75 | Below is just an example of what the AWS credential may need: 76 | 77 | ```text 78 | { 79 | "Sid": "VisualEditor0", 80 | "Effect": "Allow", 81 | "Action": [ 82 | "iam:CreateRole", 83 | "iam:GetRole", 84 | "iam:DeleteRole", 85 | "iam:AttachRolePolicy", 86 | "iam:DetachRolePolicy", 87 | "iam:PutRolePolicy", # For creating, e.g., lambda's execution roles 88 | "iam:DeleteRolePolicy", # For deleting, e.g., lambda's execution roles 89 | "kms:ListAliases", # To resolve key alias for SNS topic. 90 | ], 91 | "Resource": "*" # NOTE: you may want to further restrict the resources here. 92 | } 93 | ``` 94 | 95 | After modifications: 96 | 97 | ```bash 98 | # See the differences 99 | cdk diff 100 | 101 | # Update the existing stack 102 | cdk deploy 103 | ``` 104 | 105 | ## Stand-down Stack 106 | 107 | ```bash 108 | cdk destroy 109 | ``` 110 | -------------------------------------------------------------------------------- /initsmnb/TEMPLATE-setup-my-sagemaker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ################################################################################ 4 | # 000: Prolog 5 | ################################################################################ 6 | # Only support alinux2 7 | FLAVOR=$(grep PRETTY_NAME /etc/os-release | cut -d'"' -f 2) 8 | if [[ $FLAVOR != "Amazon Linux 2" ]]; then 9 | echo ${BASH_SOURCE[0]} does not support alinux instance. 10 | exit 1 11 | fi 12 | 13 | # Utility function to get script's directory (deal with Mac OSX quirkiness). 14 | # This function is ambidextrous as it works on both Linux and OSX. 15 | get_bin_dir() { 16 | local READLINK=readlink 17 | if [[ $(uname) == 'Darwin' ]]; then 18 | READLINK=greadlink 19 | if [ $(which greadlink) == '' ]; then 20 | echo '[ERROR] Mac OSX requires greadlink. Install with "brew install greadlink"' >&2 21 | exit 1 22 | fi 23 | fi 24 | 25 | local BIN_DIR=$(dirname "$($READLINK -f ${BASH_SOURCE[0]})") 26 | echo -n ${BIN_DIR} 27 | } 28 | 29 | SECONDS=0 30 | BIN_DIR=$(get_bin_dir) 31 | CONFIG_DOCKER=1 32 | 33 | # Ensure that we run only on a SageMaker classic notebook instance. 34 | ${BIN_DIR}/ensure-smnb.sh 35 | [[ $? != 0 ]] && exit 1 36 | 37 | 38 | ################################################################################ 39 | # 010: Dependencies 40 | ################################################################################ 41 | # Early install aria2c CLI, as it may be required by jobs running in subprocesses. 42 | ( 43 | echo "max_connections=10" | sudo tee -a /etc/yum.conf 44 | # Lots of problem, from wrong .repo content to broken selinux-container 45 | sudo rm /etc/yum.repos.d/docker-ce.repo || true 46 | sudo amazon-linux-extras install -y epel 47 | sudo yum install -y aria2 48 | ) 49 | 50 | # Placeholder to store persistent config files 51 | mkdir -p ~/SageMaker/.initsmnb.d 52 | 53 | # Hold symlinks of select binaries from the 'base' conda environment, so that 54 | # custom environments don't have to install them, e.g., nbdime, docker-compose. 55 | mkdir -p ~/.local/bin 56 | 57 | 58 | ################################################################################ 59 | # 020: Here we go... 60 | ################################################################################ 61 | run_and_track_stat() { 62 | local cmd 63 | local basecmd 64 | for cmd in "$@"; do 65 | basecmd=$(basename $cmd) 66 | $cmd && echo "INITSMNB SUCCESS $basecmd" || echo "INITSMNB ERROR $basecmd" 67 | done 68 | } 69 | 70 | ( 71 | run_and_track_stat ${BIN_DIR}/install-cli.sh 72 | run_and_track_stat ${BIN_DIR}/duf.sh 73 | run_and_track_stat ${BIN_DIR}/s5cmd.sh 74 | ) &> ~/INITSMNB-install-cli.txt & 75 | 76 | # These require jupyter lab restarted and browser reloaded, to see the changes. 77 | ${BIN_DIR}/patch-jupyter-config.sh 78 | ${BIN_DIR}/change-jlab-ui.sh 79 | 80 | # Disable jupyterlab git extension. For power git users, who don't like to 81 | # be distracted by jlab's frequent status changes on lower-left status bar. 82 | ~/anaconda3/envs/JupyterSystemEnv/bin/jupyter labextension disable '@jupyterlab/git' 83 | ~/anaconda3/envs/JupyterSystemEnv/bin/jupyter labextension disable 'jupyterlab_git' 84 | ~/anaconda3/envs/JupyterSystemEnv/bin/jupyter server extension disable jupyterlab_git 85 | 86 | # To prevent .ipynb_checkpoints/ in the tarball generated by SageMaker SDK 87 | # for training scripts, framework processing scripts, and model repack. 88 | echo "c.FileCheckpoints.checkpoint_dir = '/tmp/.ipynb_checkpoints'" \ 89 | >> ~/.jupyter/jupyter_notebook_config.py 90 | echo "c.FileCheckpoints.checkpoint_dir = '/tmp/.ipynb_checkpoints'" \ 91 | >> ~/.jupyter/jupyter_server_config.py 92 | 93 | run_and_track_stat ${BIN_DIR}/install-pipx.sh &> ~/INITSMNB-install-pipx.txt & 94 | run_and_track_stat ${BIN_DIR}/upgrade-jupyter.sh &> ~/INITSMNB-upgrade-jupyter.txt & 95 | run_and_track_stat ${BIN_DIR}/install-cdk.sh &> ~/INITSMNB-install-cdk.txt & 96 | run_and_track_stat ${BIN_DIR}/install-code-server.sh &> ~/INITSMNB-install-code-server.txt & 97 | 98 | ${BIN_DIR}/fix-osx-keymap.sh 99 | ${BIN_DIR}/patch-bash-config.sh 100 | ${BIN_DIR}/fix-ipython.sh 101 | ${BIN_DIR}/init-vim.sh 102 | ${BIN_DIR}/fix-pyspark-smnb.sh 103 | ${BIN_DIR}/mount-efs-accesspoint.sh fsid,fsapid,mountpoint 104 | 105 | if [[ $CONFIG_DOCKER == 1 ]]; then 106 | # Dances needed before we can start using the SageMaker local mode. 107 | ${BIN_DIR}/enable-sm-local-mode.sh 108 | 109 | # ~/SageMaker EBS can be upsized on demand and survives reboot. Hence, use 110 | # it for images, layers, caches, build temp dirs, etc. 111 | ${BIN_DIR}/change-docker-data-root.sh 112 | ${BIN_DIR}/change-docker-tmp-dir.sh 113 | 114 | ${BIN_DIR}/restart-docker.sh 115 | fi 116 | 117 | 118 | ################################################################################ 119 | # 030: Wrapping up... 120 | ################################################################################ 121 | # Wait for background jobs to complete. 122 | COLOR_RED="\033[1;31m" 123 | COLOR_OFF="\033[0m" 124 | echo -e " 125 | Waiting for these jobs to complete... 126 | $(jobs) 127 | 128 | They may take ${COLOR_RED}~4 minutes${COLOR_OFF} (on ml.t3.medium in ap-southeast-1 / Singapore). 129 | 130 | Job logs: 131 | $(ls -al ~/INITSMNB-*.txt) 132 | " 133 | wait 134 | 135 | # Improve code-server's UX in dealing with persistent conda environments. 136 | ~/anaconda3/bin/conda config --append envs_dirs ~/SageMaker/envs 137 | 138 | # Free up a bit more space on the ephemeral volume 139 | sudo yum clean packages 140 | rm -fr ~/.cache/{pip,yarn}/ 141 | # This operation turns out to be slow... 142 | #~/anaconda3/condabin/conda clean --all -y 143 | 144 | # Any failed jobs? 145 | echo -e "\nJobs status:" 146 | egrep -e '^INITSMNB SUCCESS|^INITSMNB ERROR' ~/INITSMNB*txt 147 | 148 | # Move it here to ensure delta's presence. 149 | ${BIN_DIR}/adjust-sm-git.sh 'Firstname Lastname' first.last@email.abc 150 | 151 | 152 | ################################################################################ 153 | # 040: Epilog 154 | ################################################################################ 155 | # Final checks and next steps to see the changes in-effect 156 | ${BIN_DIR}/final-check.sh 157 | 158 | echo "Elapsed: $(($SECONDS / 60))min $(($SECONDS % 60))sec 159 | " 160 | -------------------------------------------------------------------------------- /initsmnb/adjust-sm-git.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo On your SageMaker notebook, store this file under ~/Sagemaker and set as executable. 4 | echo Usage: ${BASH_SOURCE[0]} '""' email@address.com 5 | echo 6 | 7 | set_git_user() { 8 | local USER_NAME="$1" 9 | local USER_EMAIL=$2 10 | 11 | if [[ "$USER_NAME" != "" ]] 12 | then 13 | git config --global user.name "$USER_NAME" 14 | fi 15 | 16 | if [[ "$USER_EMAIL" != "" ]] 17 | then 18 | git config --global user.email $USER_EMAIL 19 | fi 20 | 21 | if [[ "$USER_NAME""$USER_EMAIL" != "" ]] 22 | then 23 | echo Global Git\'s name / email = $(git config --global user.name) / $(git config --global user.email) 24 | echo You may need to run: git commit --amend --reset-author 25 | fi 26 | } 27 | 28 | set_git_user "$@" 29 | 30 | echo 'Set editor to /usr/bin/vim (for DL AMI)' 31 | git config --global core.editor /usr/bin/vim 32 | 33 | echo 'Set default branch to main (effective only with git>=2.28)' 34 | git config --global init.defaultBranch main 35 | 36 | echo Adjusting log aliases... 37 | git config --global alias.lol "log --graph --format=format:'%C(bold blue)%h%C(reset) - %C(bold green)(%ar)%C(reset) %C(white)%s%C(reset) %C(bold white)— %an%C(reset)%C(bold yellow)%d%C(reset)' --abbrev-commit --date=relative" 38 | #git config --global alias.lola "lol --all" # SageMaker's git does not support alias chain :( 39 | git config --global alias.lola "! git lol --all" 40 | git config --global alias.lolc "! clear; git lol -\$(expr \`tput lines\` '*' 2 / 5)" 41 | git config --global alias.lolac "! clear; git lol --all -\$(expr \`tput lines\` '*' 2 / 5)" 42 | 43 | # Needed when notebook instance is not configured with a code repository. 44 | echo Setup steps for HTTPS connections to AWS CodeCommit repositories 45 | git config --global credential.helper '!aws codecommit credential-helper $@' 46 | git config --global credential.UseHttpPath true 47 | 48 | if command -v delta &> /dev/null ; then 49 | echo "adjust-git.sh: delta is available..." 50 | git config --global core.pager "delta -s" 51 | git config --global interactive.diffFilter "delta -s --color-only" 52 | git config --global delta.navigate "true" 53 | fi 54 | -------------------------------------------------------------------------------- /initsmnb/change-docker-data-root.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | mkdir -p ~/SageMaker/.initsmnb.d/docker/ 4 | 5 | sudo ~ec2-user/anaconda3/bin/python -c " 6 | import json 7 | 8 | with open('/etc/docker/daemon.json') as f: 9 | d = json.load(f) 10 | 11 | d['data-root'] = '/home/ec2-user/SageMaker/.initsmnb.d/docker' 12 | 13 | with open('/etc/docker/daemon.json', 'w') as f: 14 | json.dump(d, f, indent=4) 15 | f.write('\n') 16 | " 17 | -------------------------------------------------------------------------------- /initsmnb/change-docker-tmp-dir.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | FLAVOR=$(grep PRETTY_NAME /etc/os-release | cut -d'"' -f 2) 4 | if [[ $FLAVOR != "Amazon Linux 2" ]]; then 5 | echo ${BASH_SOURCE[0]} does not support alinux instance. 6 | exit 1 7 | fi 8 | 9 | # Give docker build a bit more space. E.g., as of Nov'21, building a custom 10 | # image based on the pytorch-1.10 DLC would fail due to exhausted /tmp. 11 | sudo sed -i \ 12 | 's|^\[Service\]$|[Service]\nEnvironment="DOCKER_TMPDIR=/home/ec2-user/SageMaker/.initsmnb.d/tmp"|' \ 13 | /usr/lib/systemd/system/docker.service 14 | -------------------------------------------------------------------------------- /initsmnb/change-jlab-ui.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | JUPYTER_CONFIG_ROOT=~/.jupyter/lab/user-settings/\@jupyterlab 3 | 4 | echo "After this script finishes, reload the Jupyter-Lab page in your browser." 5 | 6 | mkdir -p $JUPYTER_CONFIG_ROOT/apputils-extension/ 7 | cat << EOF > $JUPYTER_CONFIG_ROOT/apputils-extension/themes.jupyterlab-settings 8 | { 9 | // Theme 10 | // @jupyterlab/apputils-extension:themes 11 | // Theme manager settings. 12 | // ************************************* 13 | 14 | // Theme CSS Overrides 15 | // Override theme CSS variables by setting key-value pairs here 16 | "overrides": { 17 | "code-font-size": "11px", 18 | "content-font-size1": "13px" 19 | }, 20 | 21 | // Scrollbar Theming 22 | // Enable/disable styling of the application scrollbars 23 | "theme-scrollbars": false 24 | } 25 | EOF 26 | 27 | mkdir -p $JUPYTER_CONFIG_ROOT/terminal-extension/ 28 | cat << EOF > $JUPYTER_CONFIG_ROOT/terminal-extension/plugin.jupyterlab-settings 29 | { 30 | // Terminal 31 | // @jupyterlab/terminal-extension:plugin 32 | // Terminal settings. 33 | // ************************************* 34 | 35 | // Font size 36 | // The font size used to render text. 37 | "fontSize": 11, 38 | 39 | // Theme 40 | // The theme for the terminal. 41 | "theme": "dark" 42 | } 43 | EOF 44 | 45 | mkdir -p $JUPYTER_CONFIG_ROOT/codemirror-extension/ 46 | cat << EOF > $JUPYTER_CONFIG_ROOT/codemirror-extension/plugin.jupyterlab-settings 47 | { 48 | // CodeMirror 49 | // @jupyterlab/codemirror-extension:plugin 50 | // Text editor settings for all CodeMirror editors. 51 | // ************************************************ 52 | 53 | "defaultConfig": { 54 | "codeFolding": true, 55 | "highlightActiveLine": true, 56 | "highlightTrailingWhitespace": true, 57 | "rulers": [ 58 | 80, 59 | 100 60 | ] 61 | } 62 | } 63 | EOF 64 | 65 | mkdir -p $JUPYTER_CONFIG_ROOT/notebook-extension/ 66 | cat << EOF > $JUPYTER_CONFIG_ROOT/notebook-extension/tracker.jupyterlab-settings 67 | { 68 | // Notebook 69 | // @jupyterlab/notebook-extension:tracker 70 | // Notebook settings. 71 | // ************************************** 72 | 73 | // Code Cell Configuration 74 | // The configuration for all code cells; it will override the CodeMirror default configuration. 75 | "codeCellConfig": { 76 | "lineNumbers": true, 77 | "lineWrap": true 78 | }, 79 | 80 | // Markdown Cell Configuration 81 | // The configuration for all markdown cells; it will override the CodeMirror default configuration. 82 | "markdownCellConfig": { 83 | "lineNumbers": true, 84 | "lineWrap": true 85 | }, 86 | 87 | // Raw Cell Configuration 88 | // The configuration for all raw cells; it will override the CodeMirror default configuration. 89 | "rawCellConfig": { 90 | "lineNumbers": true, 91 | "lineWrap": true 92 | } 93 | } 94 | EOF 95 | -------------------------------------------------------------------------------- /initsmnb/duf.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -euo pipefail 4 | 5 | # Constants 6 | APP=duf 7 | GH=muesli/duf 8 | 9 | latest_download_url() { 10 | if [[ $(uname -i) == "x86_64" ]]; then 11 | local arch=amd64 12 | else 13 | echo WARNING: to test that this works on gravition, and the need for more precise condition 14 | local arch=arm64 15 | fi 16 | curl --silent "https://api.github.com/repos/${GH}/releases/latest" | # Get latest release from GitHub api 17 | grep "\"browser_download_url\": \"https.*\/duf_.*_linux_$arch.rpm" | # Get download url 18 | sed -E 's/.*"([^"]+)".*/\1/' # Pluck JSON value 19 | } 20 | 21 | LATEST_DOWNLOAD_URL=$(latest_download_url) 22 | RPM=${LATEST_DOWNLOAD_URL##*/} 23 | (cd /tmp/ && curl -LO ${LATEST_DOWNLOAD_URL}) 24 | 25 | sudo yum localinstall -y /tmp/$RPM && rm /tmp/$RPM 26 | -------------------------------------------------------------------------------- /initsmnb/enable-sm-local-mode.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ln -s ~/anaconda3/bin/docker-compose ~/.local/bin/ 4 | curl -sfL \ 5 | https://raw.githubusercontent.com/aws-samples/amazon-sagemaker-local-mode/main/blog/pytorch_cnn_cifar10/setup.sh \ 6 | | /bin/bash -s 7 | -------------------------------------------------------------------------------- /initsmnb/ensure-smnb.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [[ -d /var/log/studio/ ]]; then 4 | cat << EOF 5 | 6 | ########################################################################### 7 | # Customizations declined. # 8 | # # 9 | # Reason: /var/log/studio/ detected; probably this is a Studio notebook. # 10 | # # 11 | # Please make sure to run ~/SageMaker/initsmnb/setup-my-sagemaker.sh on # 12 | # a SageMaker classic notebook instance. # 13 | # # 14 | # If you still insists to proceed with customizations, please edit # 15 | # ~/SageMaker/initsmnb/setup-my-sagemaker.sh and disable the relevant # 16 | # check. And when you go this route, you're assumed to be proficient in # 17 | # shell scriptings, and thus, able to navigate your way with the scripts. # 18 | ########################################################################### 19 | 20 | EOF 21 | exit 1 22 | fi 23 | 24 | 25 | if [[ ! -f /etc/opt/ml/sagemaker-notebook-instance-config.json ]]; then 26 | cat << EOF 27 | 28 | ############################################################################# 29 | # Customizations declined. # 30 | # # 31 | # Reason: /etc/opt/ml/sagemaker-notebook-instance-config.json not detected; # 32 | # probably this is not a classic notebook instance. # 33 | # # 34 | # Please make sure to run ~/SageMaker/initsmnb/setup-my-sagemaker.sh on a # 35 | # SageMaker classic notebook instance. # 36 | # # 37 | # If you still insist to proceed with customizations, please edit # 38 | # ~/SageMaker/initsmnb/setup-my-sagemaker.sh and disable the relevant # 39 | # check. And when you go this route, you're assumed to be proficient in # 40 | # shell scriptings, and thus, able to navigate your way with the scripts. # 41 | ############################################################################# 42 | 43 | EOF 44 | exit 2 45 | fi 46 | 47 | exit 0 48 | -------------------------------------------------------------------------------- /initsmnb/final-check.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Final remarks on why by default, initsmnb didn't update existing SageMaker packages 4 | COLOR_RED="\033[1;31m" 5 | COLOR_OFF="\033[0m" 6 | echo -e " 7 | ${COLOR_RED}Did not update 100+ SageMaker-provided packages because it takes time. 8 | ${COLOR_OFF}If you still want to update these packages, run ${COLOR_RED}sudo yum update${COLOR_OFF}." 9 | 10 | # After all customizations applied, do a final check and display next steps 11 | # to have the customizations in-effect. 12 | 13 | FLAVOR=$(grep PRETTY_NAME /etc/os-release | cut -d'"' -f 2) 14 | if [[ $FLAVOR == "Amazon Linux 2" ]]; then 15 | CMD_RESTART="sudo systemctl restart jupyter-server " 16 | else 17 | CMD_RESTART="sudo initctl restart jupyter-server --no-wait" 18 | fi 19 | 20 | cat << EOF 21 | 22 | ############################################################################### 23 | # Customizations applied. Next, follow these steps to see them in-effect. # 24 | # # 25 | # First, restart the Jupyter process: # 26 | # # 27 | # ${CMD_RESTART} # 28 | # # 29 | # After the command, the Jupyter interface will probably freeze, which is # 30 | # expected. # 31 | # # 32 | # Then, refresh your browser tab, and enjoy the new experience. # 33 | EOF 34 | 35 | if [[ -e ~/HOWTO-RUN-CODE-SERVER.txt ]]; then 36 | cat << 'EOF' 37 | # # 38 | # --------------------------------------------------------------------------- # 39 | # NOTES: # 40 | # --------------------------------------------------------------------------- # 41 | # Please refer to file ~/HOWTO-RUN-CODE-SERVER.txt on how to use code-server # 42 | # (i.e., "VS Code in the browser") on this SageMaker notebook instance. # 43 | EOF 44 | 45 | fi 46 | 47 | GIT_VERSION=$(git --version) 48 | if [[ "$GIT_VERSION" < "git version 2.28" ]]; then 49 | cat << 'EOF' 50 | # # 51 | # --------------------------------------------------------------------------- # 52 | # NOTES: # 53 | # --------------------------------------------------------------------------- # 54 | # The git version on this SageMaker classic notebook instance is older than # 55 | # version 2.28, hence "git init" will default to branch "master". # 56 | # # 57 | # To initialize a new git repo with default branch "main", run: # 58 | # # 59 | # git init; git checkout -b main # 60 | EOF 61 | fi 62 | 63 | cat << EOF 64 | ############################################################################### 65 | 66 | EOF 67 | -------------------------------------------------------------------------------- /initsmnb/fix-ipython.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo "Change ipython color scheme on something.__class__ from dark blue (nearly invisible) to a more sane color." 4 | 5 | mkdir -p ~/.ipython/profile_default/ 6 | 7 | cat << 'EOF' >> ~/.ipython/profile_default/ipython_config.py 8 | # See: https://stackoverflow.com/a/48455387 9 | 10 | """ 11 | Syntax highlighting on Input: Change default dark blue for "object.__file__" to 12 | a more readable color, esp. on dark background. 13 | 14 | Find out the correct token type with: 15 | 16 | >>> from pygments.lexers import PythonLexer 17 | >>> list(PythonLexer().get_tokens('os.__class__')) 18 | [(Token.Name, 'os'), 19 | (Token.Operator, '.'), 20 | (Token.Name.Variable.Magic, '__class__'), 21 | (Token.Text, '\n')] 22 | """ 23 | from pygments.token import Name 24 | 25 | c.TerminalInteractiveShell.highlighting_style_overrides = { 26 | Name.Variable: "#B8860B", 27 | Name.Variable.Magic: "#B8860B", # Unclear why certain ipython prefers this 28 | Name.Function: "#6fa8dc", # For IPython 8+ (tone down dark blue for function name) 29 | } 30 | 31 | c.TerminalInteractiveShell.highlight_matching_brackets = True 32 | 33 | 34 | ################################################################################ 35 | """ 36 | Syntax highlighting on traceback: Tone down all dark blues. IPython-8+ has more 37 | dark blue compared to older versions. Quick test with the following: 38 | 39 | >>> import asdf 40 | 41 | Unfortunately, `IPython.core.ultratb.VerboseTB.get_records()` hardcodes the 42 | "default" pygments style, and doesn't seem to provide a way to override unlike 43 | what Input provides. Hence, let's directly override pygments. 44 | """ 45 | from pygments.styles.default import DefaultStyle 46 | DefaultStyle.styles = {k: v.replace("#0000FF", "#3d85c6") for k, v in DefaultStyle.styles.items()} 47 | EOF 48 | 49 | 50 | echo "Add ipython keybindings when connecting from OSX" 51 | IPYTHON_STARTUP_DIR=.ipython/profile_default/startup 52 | IPYTHON_STARTUP_CFG=${IPYTHON_STARTUP_DIR}/01-osx-jupyterlab-keys.py 53 | 54 | BIN_DIR=$(dirname "$(readlink -f ${BASH_SOURCE[0]})") 55 | 56 | mkdir -p ~/${IPYTHON_STARTUP_DIR}/ 57 | [[ ! -f ~/${IPYTHON_STARTUP_CFG} ]] && ln -s \ 58 | $BIN_DIR/${IPYTHON_STARTUP_CFG} \ 59 | ~/${IPYTHON_STARTUP_CFG} 60 | -------------------------------------------------------------------------------- /initsmnb/fix-osx-keymap.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo "Generating ~/.inputrc to fix a few bash shortcuts when browser runs on OSX..." 4 | cat << EOF >> ~/.inputrc 5 | # A few bash shortcuts when browser runs on OSX 6 | "ƒ": forward-word 7 | "∫": backward-word 8 | "≥": yank-last-arg 9 | "∂": kill-word 10 | EOF 11 | 12 | echo "Enabling keymap in ~/.bash_profile ..." 13 | cat << EOF >> ~/.bash_profile 14 | 15 | # Fix a few bash shortcuts when browser runs on OSX 16 | bind -f ~/.inputrc 17 | EOF 18 | 19 | echo "Keymap set to deal with OSX quirks." 20 | echo "To manually enforce keymap: bind -f ~/.inputrc" 21 | -------------------------------------------------------------------------------- /initsmnb/fix-pyspark-smnb.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo " 4 | Setting system-wide JAVA_HOME to enable .ipynb to run pyspark-2.x (from the 5 | conda_python3 kernel), directly on this notebook instance. 6 | 7 | - This version of pyspark requires Java-1.8. However, since some time in 2021, 8 | every .ipynb notebooks will automatically inherit 9 | os.environ['JAVA_HOME'] == '/home/ec2-user/anaconda3/envs/JupyterSystemEnv', 10 | and this OpenJDK-11 breaks the pyspark-2.x. 11 | 12 | - Note that setting JAVA_HOME in ~/.bashrc is not sufficient, because it affects 13 | only pyspark scripts or REPL ran from a terminal. 14 | " 15 | 16 | echo 'export JAVA_HOME=/usr/lib/jvm/java' | sudo tee -a /etc/profile.d/java.sh 17 | -------------------------------------------------------------------------------- /initsmnb/init-vim.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | VIM_SM_ROOT=~/SageMaker/.initsmnb.d 4 | VIM_RTP=${VIM_SM_ROOT}/.vim 5 | VIMRC=${VIM_SM_ROOT}/.vimrc 6 | 7 | apply_vim_setting() { 8 | # vimrc 9 | [[ -f ~/.vimrc ]] && rm ~/.vimrc 10 | ln -s ${VIMRC} ~/.vimrc 11 | 12 | echo "Vim initialized" 13 | } 14 | 15 | if [[ ! -f ${VIM_RTP}/_SUCCESS ]]; then 16 | echo "Initializing vim from ${VIMRC_SRC}" 17 | 18 | # vimrc 19 | cat << EOF > ${VIMRC} 20 | set rtp+=${VIM_RTP} 21 | 22 | " Hybrid line numbers 23 | " 24 | " Prefer built-in over RltvNmbr as the later makes vim even slower on 25 | " high-latency aka. cross-region instance. 26 | :set number relativenumber 27 | :augroup numbertoggle 28 | : autocmd! 29 | : autocmd BufEnter,FocusGained,InsertLeave * set relativenumber 30 | : autocmd BufLeave,FocusLost,InsertEnter * set norelativenumber 31 | :augroup END 32 | 33 | " Relative number only on focused-windows 34 | autocmd BufEnter,FocusGained,InsertLeave,WinEnter * if &number | set relativenumber | endif 35 | autocmd BufLeave,FocusLost,InsertEnter,WinLeave * if &number | set norelativenumber | endif 36 | 37 | " Remap keys to navigate window aka split screens to ctrl-{h,j,k,l} 38 | " See: https://vi.stackexchange.com/a/3815 39 | " 40 | " Vim defaults to ctrl-w-{h,j,k,l}. However, ctrl-w on Linux (and Windows) 41 | " closes browser tab. 42 | " 43 | " NOTE: ctrl-l was "clear and redraw screen". The later can still be invoked 44 | " with :redr[aw][!] 45 | nmap h 46 | nmap j 47 | nmap k 48 | nmap l 49 | 50 | set laststatus=2 51 | set hlsearch 52 | set colorcolumn=80 53 | set splitbelow 54 | set splitright 55 | 56 | "set cursorline 57 | "set lazyredraw 58 | set nottyfast 59 | 60 | autocmd FileType help setlocal number 61 | 62 | """ Coding style 63 | " Prefer spaces to tabs 64 | set tabstop=4 65 | set shiftwidth=4 66 | set expandtab 67 | set nowrap 68 | set foldmethod=indent 69 | set foldlevel=99 70 | set smartindent 71 | filetype plugin indent on 72 | 73 | """ Shortcuts 74 | map :set paste! 75 | " Use l to toggle display of whitespace 76 | nmap l :set list! 77 | 78 | " Highlight trailing space without plugins 79 | highlight RedundantSpaces ctermbg=red guibg=red 80 | match RedundantSpaces /\s\+$/ 81 | 82 | " Terminado supports 256 colors 83 | set t_Co=256 84 | "colorscheme delek 85 | "colorscheme elflord 86 | "colorscheme murphy 87 | "colorscheme ron 88 | highlight colorColumn ctermbg=237 89 | 90 | EOF 91 | mkdir -p ${VIM_RTP} 92 | touch ${VIM_RTP}/_SUCCESS 93 | fi 94 | 95 | apply_vim_setting 96 | -------------------------------------------------------------------------------- /initsmnb/install-cdk.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -euo pipefail 4 | 5 | ################################################################################ 6 | # STEP-00: environment variables related to CDK 7 | ################################################################################ 8 | cat << 'EOF' | sudo tee /etc/profile.d/cdk.sh 9 | 10 | # NVM installation after this env. var. is effective should survive reboots. 11 | export NVM_DIR=$HOME/SageMaker/.initsmnb.d/.nvm 12 | [[ -s "$NVM_DIR/nvm.sh" ]] && . "$NVM_DIR/nvm.sh" # Loads nvm 13 | [[ -s "$NVM_DIR/bash_completion" ]] && . "$NVM_DIR/bash_completion" # Loads nvm bash_completion 14 | EOF 15 | 16 | 17 | ################################################################################ 18 | # STEP-01: helper functions to install cdk (only when not installed yet). 19 | ################################################################################ 20 | # Activate for the current shell. 21 | export NVM_DIR=$HOME/SageMaker/.initsmnb.d/.nvm 22 | [[ -s "$NVM_DIR/nvm.sh" ]] && . "$NVM_DIR/nvm.sh" # Loads nvm 23 | [[ -s "$NVM_DIR/bash_completion" ]] && . "$NVM_DIR/bash_completion" # Loads nvm bash_completion 24 | 25 | detect_cmd() { 26 | "$@" &> /dev/null 27 | [[ $? == 0 ]] && echo "detected" || echo "not_detected" 28 | } 29 | 30 | mkdir -p $NVM_DIR 31 | 32 | # Install nvm to NVM_DIR 33 | if [[ $(detect_cmd nvm) == "not_detected" ]]; then 34 | echo "Installing nvm..." 35 | NVM_VERSION=$(curl -sL https://api.github.com/repos/nvm-sh/nvm/releases/latest | jq -r '.name') 36 | curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/${NVM_VERSION}/install.sh | bash 37 | [[ -s "$NVM_DIR/nvm.sh" ]] && . "$NVM_DIR/nvm.sh" # Loads nvm 38 | [[ -s "$NVM_DIR/bash_completion" ]] && . "$NVM_DIR/bash_completion" # Loads nvm bash_completion 39 | fi 40 | echo "Checking nvm:" `nvm --version` 41 | 42 | # Install node.js (use lts version as-per CDK recommendation) 43 | if [[ \ 44 | ( $(detect_cmd node -e "console.log('Running Node.js ' + process.version)") == "not_detected" ) \ 45 | || ( "$(which node)" == /home/ec2-user/anaconda3/envs/JupyterSystemEnv/bin/node ) \ 46 | ]]; then 47 | echo "Installing node.js and npm..." 48 | GLIBC_VERSION=$(rpm -q --queryformat '%{version}' glibc) 49 | if [[ "$GLIBC_VERSION" > "2.26" ]]; then 50 | nvm install --lts 51 | nvm use --lts 52 | else 53 | echo "Old glibc-$GLIBC_VERSION detected. Falling back to node.js v16." 54 | nvm install 16 55 | nvm use 16 56 | fi 57 | npm install -g npm 58 | fi 59 | 60 | node -e "console.log('Running Node.js ' + process.version)" 61 | echo "Checking npm:" `npm -v` 62 | 63 | # Install CDK 64 | if [[ $(detect_cmd cdk --version) == "not_detected" ]]; then 65 | echo "Installing cdk..." 66 | npm install -g aws-cdk 67 | fi 68 | 69 | # Run once, and see if there's any warning re. incompatible node.js version 70 | echo "CDK version:" $(cdk --version) 71 | -------------------------------------------------------------------------------- /initsmnb/install-cli.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -euo pipefail 4 | 5 | FLAVOR=$(grep PRETTY_NAME /etc/os-release | cut -d'"' -f 2) 6 | grep '^max_connections=' /etc/yum.conf &> /dev/null || echo "max_connections=10" | sudo tee -a /etc/yum.conf 7 | 8 | # Lots of problem, from wrong .repo content to broken selinux-container 9 | sudo rm /etc/yum.repos.d/docker-ce.repo || true 10 | 11 | sudo amazon-linux-extras install -y epel 12 | sudo yum-config-manager --add-repo=https://copr.fedorainfracloud.org/coprs/cyqsimon/el-rust-pkgs/repo/epel-7/cyqsimon-el-rust-pkgs-epel-7.repo 13 | #sudo yum update -y # Disable. It's slow to update 100+ SageMaker-provided packages. 14 | sudo yum install -y htop tree fio dstat dos2unix tig ncdu ripgrep bat git-delta inxi mediainfo git-lfs nvme-cli aria2 15 | echo "alias ncdu='ncdu --color dark'" | sudo tee /etc/profile.d/initsmnb-cli.sh 16 | echo 'export DSTAT_OPTS="-cdngym"' | sudo tee -a /etc/profile.d/initsmnb-cli.sh 17 | 18 | # This nbdime is broken. It crashes with ModuleNotFoundError: jsonschema.protocols. 19 | rm ~/anaconda3/bin/nb{diff,diff-web,dime,merge,merge-web,show} ~/anaconda3/bin/git-nb* || true 20 | hash -r 21 | 22 | # Use the good working nbdime 23 | ln -s ~/anaconda3/envs/JupyterSystemEnv/bin/nb{diff,diff-web,dime,merge,merge-web,show} ~/.local/bin/ || true 24 | ln -s ~/anaconda3/envs/JupyterSystemEnv/bin/git-nb* ~/.local/bin/ || true 25 | ~/.local/bin/nbdime config-git --enable --global 26 | 27 | # pre-commit cache survives reboot (NOTE: can also set $PRE_COMMIT_HOME) 28 | mkdir -p ~/SageMaker/.initsmnb.d/.pre-commit.cache 29 | ln -s ~/SageMaker/.initsmnb.d/.pre-commit.cache ~/.cache/pre-commit || true 30 | 31 | # Catch-up with awscliv2 which has nearly weekly releases. 32 | aria2c -x5 --dir /tmp -o awscli2.zip https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip 33 | cd /tmp && unzip -o -q /tmp/awscli2.zip 34 | aws/install --update --install-dir ~/SageMaker/.initsmnb.d/aws-cli-v2 --bin-dir ~/SageMaker/.initsmnb.d/bin 35 | sudo ln -s ~/SageMaker/.initsmnb.d/bin/aws /usr/local/bin/aws2 || true 36 | rm /tmp/awscli2.zip 37 | rm -fr /tmp/aws/ 38 | # Borrow these settings from aws-samples hpc repo 39 | aws configure set default.s3.max_concurrent_requests 100 40 | aws configure set default.s3.max_queue_size 10000 41 | aws configure set default.s3.multipart_threshold 64MB 42 | aws configure set default.s3.multipart_chunksize 16MB 43 | aws configure set default.cli_auto_prompt on-partial 44 | -------------------------------------------------------------------------------- /initsmnb/install-code-server.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -euo pipefail 4 | 5 | install_code_server() { 6 | #local LATEST_DOWNLOAD_URL=$( 7 | # curl --silent "https://api.github.com/repos/coder/code-server/releases/latest" | # Get latest release from GitHub api 8 | # grep "\"browser_download_url\": \"https.*\/code-server-.*-amd64.rpm" | # Get download url 9 | # sed -E 's/.*"([^"]+)".*/\1/' # Pluck JSON value 10 | #) 11 | # 12 | #local RPM=${LATEST_DOWNLOAD_URL##*/} 13 | 14 | # Pin to latest known version that can still work on alinux2 (glibc-2.26) 15 | local VERSION=4.16.1 16 | local LATEST_DOWNLOAD_URL="https://github.com/coder/code-server/releases/download/v${VERSION}/code-server-${VERSION}-amd64.rpm" 17 | local RPM="code-server_${VERSION}_amd64.rpm" 18 | 19 | aria2c -x10 --dir /tmp -o ${RPM} ${LATEST_DOWNLOAD_URL} 20 | sudo yum localinstall -y /tmp/$RPM && rm /tmp/$RPM 21 | [[ -d ~/.cache/code-server/ ]] && rm -fr ~/.cache/code-server/ 22 | 23 | mkdir -p ~/SageMaker/.initsmnb.d/code-server/ 24 | cat << 'EOF' > ~/SageMaker/.initsmnb.d/code-server/config.yaml 25 | # Auto-generated by initsmnb. Manual edits will be lost on next restart. 26 | user-data-dir: /home/ec2-user/SageMaker/.initsmnb.d/code-server 27 | auth: none 28 | disable-telemetry: true 29 | disable-update-check: true 30 | EOF 31 | export CODE_SERVER_CONFIG=/home/ec2-user/SageMaker/.initsmnb.d/code-server/config.yaml 32 | echo 'export CODE_SERVER_CONFIG=/home/ec2-user/SageMaker/.initsmnb.d/code-server/config.yaml' \ 33 | | sudo tee /etc/profile.d/code-server.sh 34 | } 35 | 36 | install_ext() { 37 | # [20221023] https://github.com/verdimrc/linuxcfg/blob/main/vscode/extensions.json 38 | # 39 | # Code-server cannot install some extensions, which is expected. 40 | # See: https://github.com/coder/code-server/discussions/2345. 41 | declare -a EXT=( 42 | "adpyke.vscode-sql-formatter" 43 | "bierner.markdown-mermaid" 44 | "bungcip.better-toml" 45 | "charliermarsh.ruff" 46 | "christian-kohler.path-intellisense" 47 | "DavidAnson.vscode-markdownlint" 48 | "donjayamanne.githistory" 49 | "donjayamanne.python-environment-manager" 50 | "EditorConfig.EditorConfig" 51 | "emilast.LogFileHighlighter" 52 | "Gruntfuggly.todo-tree" 53 | "IBM.output-colorizer" 54 | "leonhard-s.python-sphinx-highlight" 55 | "mechatroner.rainbow-csv" 56 | "mhutchie.git-graph" 57 | "mikestead.dotenv" 58 | "ms-python.black-formatter" 59 | # "ms-python.isort" 60 | "ms-python.python" 61 | "ms-toolsai.jupyter" 62 | "ms-toolsai.vscode-jupyter-powertoys" 63 | "ms-vscode.live-server" 64 | # "ms-vscode-remote.remote-ssh" 65 | # "ms-vscode-remote.remote-ssh-edit" 66 | "njpwerner.autodocstring" 67 | "redhat.vscode-yaml" 68 | "shardulm94.trailing-spaces" 69 | "stkb.rewrap" 70 | "tomoki1207.pdf" 71 | # "usernamehw.errorlens" 72 | "VisualStudioExptTeam.vscodeintellicode" 73 | "yzhang.markdown-all-in-one" 74 | ) 75 | 76 | declare -a EXT_ARGS=() 77 | for i in "${EXT[@]}"; do 78 | EXT_ARGS+=(--install-extension "$i") 79 | done 80 | code-server "${EXT_ARGS[@]}" 81 | } 82 | 83 | apply_setting() { 84 | mkdir -p ~/SageMaker/.initsmnb.d/code-server/{Machine,User}/ 85 | 86 | # [20221023] https://github.com/verdimrc/linuxcfg/blob/main/vscode/keybindings.json 87 | cat << 'EOF' > ~/SageMaker/.initsmnb.d/code-server/User/keybindings.json 88 | // Place your key bindings in this file to override the defaults 89 | [ 90 | // Since vscode-1.44, OSX + iTerm2 needs this new setting to pass 91 | // alt-backspace correctly to bash. 92 | { 93 | "key": "alt+backspace", 94 | "command": "deleteWordPartLeft", 95 | "when": "terminalFocus && isMac" 96 | }, 97 | // Re-assign ctrl-f in terminal, to remove conflict with vim (Linux) 98 | { 99 | "key": "ctrl+shift+f", 100 | "command": "workbench.action.terminal.focusFind", 101 | "when": "terminalFindFocused || terminalFocus" 102 | }, 103 | { 104 | "key": "ctrl+f", 105 | "command": "-workbench.action.terminal.focusFind", 106 | "when": "terminalFindFocused || terminalFocus" 107 | }, 108 | 109 | // Remove alt-w (Linux), since my zsh remaps alt-w to backward-kill-dir 110 | // TODO: figure out the correct "when", rather then disable at all. 111 | { 112 | "key": "alt+w", 113 | "command": "-workbench.action.terminal.toggleFindWholeWord" 114 | } 115 | ] 116 | EOF 117 | 118 | cat << 'EOF' > ~/SageMaker/.initsmnb.d/code-server/Machine/settings.json 119 | { 120 | // Python paths. 121 | // NOTE: if you change "python.defaultInterpreterPath", then on code-server UI: 122 | // 1. press ctrl+shift+p 123 | // 2. type or select "Jupyter: Select Interpreter to Start Jupyter Server", then press Enter 124 | // 3. choose "Python 3.x.x ('JupyterSystemEnv') ~/anaconda3/envs/JupyterSystemEnv/bin/python" 125 | "python.defaultInterpreterPath": "/home/ec2-user/anaconda3/envs/JupyterSystemEnv/bin/python" 126 | } 127 | EOF 128 | 129 | # [20221023] https://github.com/verdimrc/linuxcfg/blob/main/vscode/settings.json 130 | # 131 | # Notable changes: 132 | # - "python.languageServer" from "Pylance" to "Jedi" (https://github.com/coder/code-server/issues/1938) 133 | cat << 'EOF' > ~/SageMaker/.initsmnb.d/code-server/User/settings.json 134 | { 135 | /////////////////////////////////////////////////////////////////////////// 136 | // initsmnb specific 137 | /////////////////////////////////////////////////////////////////////////// 138 | "workbench.colorTheme": "Default Dark+", 139 | "terminal.integrated.shell.linux": "/bin/bash", 140 | 141 | 142 | /////////////////////////////////////////////////////////////////////////// 143 | // OFF TELEMETRIES (LIFTED TO NEAR TOP FOR VISIBILITY 144 | /////////////////////////////////////////////////////////////////////////// 145 | 146 | // Global telemetries 147 | "telemetry.enableCrashReporter": false, // Deprecated since v1.61 Oct'21 148 | "telemetry.enableTelemetry": false, // Deprecated since v1.61 Oct'21 149 | "telemetry.telemetryLevel": "off", // Since v1.61 Oct'21 150 | "python.experiments.enabled": false, 151 | "workbench.enableExperiments": false, 152 | 153 | // Extensions telemetries 154 | "aws.telemetry": false, 155 | "gitlens.advanced.telemetry.enabled": false, 156 | "redhat.telemetry.enabled": false, 157 | 158 | 159 | /////////////////////////////////////////////////////////////////////////// 160 | // EXPERIMENTAL OR PREVIEW FEATURES 161 | /////////////////////////////////////////////////////////////////////////// 162 | "markdown.experimental.updateLinksOnFileMove.enabled": "always", // aug22 163 | 164 | 165 | /////////////////////////////////////////////////////////////////////////// 166 | // HYPER-PERSONALIZED STANZA 167 | /////////////////////////////////////////////////////////////////////////// 168 | "terminal.integrated.env.linux": { 169 | "PYTHONPATH": "${workspaceFolder}/src:${env:PYTHONPATH}" 170 | }, 171 | 172 | // To use binaries outside of selected python interpreter, e.g., those installed by pipx 173 | //"python.linting.flake8Path": "/usr/local/bin/flake8", 174 | //"python.linting.mypyPath": "/usr/local/bin/mypy", 175 | //"python.linting.pydocstylePath": "/home/verdi/.local/bin/pydocstyle", 176 | 177 | // GUI: vscode 178 | "window.zoomLevel": 0, 179 | //"editor.fontFamily": "Monego", 180 | "debug.console.fontSize": 11, 181 | "editor.fontSize": 11, 182 | //"editor.lineHeight": 18, 183 | "terminal.integrated.fontSize": 11, 184 | "extensions.ignoreRecommendations": true, // Default to true; set false if you prefer. 185 | "workbench.colorCustomizations": { 186 | "editor.lineHighlightBackground": "#2d2d2d" // For dark theme 187 | }, 188 | // "window.titleBarStyle": "custom", 189 | // GUI: extensions 190 | "markdown.preview.fontSize": 16, // yzhang.markdown-all-in-one 191 | 192 | 193 | /////////////////////////////////////////////////////////////////////////// 194 | // OTHER CONFIGS 195 | /////////////////////////////////////////////////////////////////////////// 196 | 197 | // File exclusions 198 | "files.exclude": { 199 | "**/.git": true, 200 | "**/.svn": true, 201 | "**/.hg": true, 202 | "**/CVS": true, 203 | "**/.DS_Store": true, 204 | "**/._*": true, 205 | "**/__pycache__": true, 206 | "**/.ipynb_checkpoints": true, 207 | "**/.*_cache": true, 208 | "**/.tox": true 209 | }, 210 | "files.watcherExclude": { 211 | "**/.git/objects/**": true, 212 | "**/.git/subtree-cache/**": true, 213 | "**/node_modules/*/**": true, 214 | "**/._*/**": true, 215 | "**/__pycache__/**": true, 216 | "**/.ipynb_checkpoints/**": true, 217 | "**/.*_cache/**": true, 218 | "**/.tox/**": true 219 | }, 220 | 221 | "files.associations": { 222 | "Dockerfile.cpu": "dockerfile", 223 | "Dockerfile.gpu": "dockerfile" 224 | }, 225 | 226 | // Javascript 227 | "[javascript]": { 228 | "editor.defaultFormatter": "esbenp.pettier-vscode", 229 | "editor.codeActionsOnSave": { 230 | // "source.fixAll.eslint": false, 231 | "source.fixAll": false // Don't like. Too aggressive during dev. 232 | } 233 | }, 234 | 235 | // Python 236 | "debug.allowBreakpointsEverywhere": true, 237 | "jupyter.askForKernelRestart": false, 238 | "jupyter.debugJustMyCode": false, 239 | "jupyter.disableJupyterAutoStart": true, 240 | "jupyter.magicCommandsAsComments": true, 241 | "jupyter.sendSelectionToInteractiveWindow": true, 242 | "notebook.lineNumbers": "on", 243 | "python.formatting.provider": "black", 244 | "[python]": { 245 | "editor.defaultFormatter": "ms-python.black-formatter", 246 | "editor.codeActionsOnSave": { 247 | "source.fixAll": false // Don't like. Import can be removed unpredictably. 248 | } 249 | }, 250 | "python.analysis.inlayHints.variableTypes": true, 251 | "python.analysis.inlayHints.functionReturnTypes": true, 252 | "python.languageServer": "Jedi", 253 | "python.linting.enabled": true, 254 | "python.linting.flake8Enabled": true, 255 | "python.linting.mypyEnabled": true, 256 | "python.linting.mypyArgs": [ "--show-error-codes" ], 257 | "python.linting.pydocstyleEnabled": false, 258 | "python.showStartPage": false, 259 | "python.testing.pytestEnabled": true, 260 | "python.testing.unittestEnabled": false, 261 | 262 | // Workbench & editor 263 | "breadcrumbs.enabled": true, 264 | "editor.bracketPairColorization.enabled": true, 265 | "editor.formatOnSave": true, 266 | //"editor.formatOnSaveMode": "modifications", //Not supported with Black 267 | "editor.codeActionsOnSave": { 268 | "source.fixAll.markdownlint": true, // Extension davidanson.vscode-markdownlint 269 | "source.organizeImports": true 270 | }, 271 | "editor.minimap.enabled": false, 272 | "editor.parameterHints.cycle": true, 273 | "editor.renderLineHighlight": "all", 274 | //"editor.renderWhitespace": "trailing", 275 | "editor.renderControlCharacters": false, 276 | "editor.rulers": [ 277 | 80, 278 | 100 279 | ], 280 | "editor.stickyScroll.enabled": true, 281 | "editor.stickyTabStops": true, 282 | "editor.suggestSelection": "first", 283 | "editor.wordWrapColumn": 100, 284 | "explorer.confirmDelete": false, 285 | "explorer.confirmDragAndDrop": false, 286 | "explorer.openEditors.visible": 0, 287 | "files.eol": "\n", 288 | "git.terminalAuthentication": false, 289 | "scm.diffDecorationsGutterPattern": { 290 | "added": true 291 | }, 292 | "json.format.keepLines": true, 293 | "outline.showVariables": false, 294 | "terminal.integrated.enableMultiLinePasteWarning": false, 295 | "terminal.integrated.lineHeight": 1.2, 296 | "terminal.integrated.localEchoLatencyThreshold": -1, 297 | "terminal.integrated.persistentSessionReviveProcess": "never", 298 | "terminal.integrated.enablePersistentSessions": false, 299 | "terminal.integrated.shellIntegration.enabled": true, // Jul22 default is on 300 | "workbench.editor.decorations.colors": true, 301 | "workbench.editor.decorations.badges": true, 302 | // "workbench.editor.enablePreviewFromCodeNavigation": true, 303 | "workbench.reduceMotion": "on", 304 | "workbench.startupEditor": "none", 305 | "workbench.tree.indent": 20, 306 | 307 | // Shortcuts in integrated terminal 308 | "terminal.integrated.allowChords": false, // send ctrl+k to integrated terminal 309 | "terminal.integrated.commandsToSkipShell": [ 310 | // Linux: send ctrl+e to integrated terminal 311 | "-workbench.action.quickOpen", 312 | 313 | // alt-backspace to behave like in Bash 314 | "-workbench.action.terminal.deleteWordLeft" 315 | ], 316 | //"terminal.integrated.macOptionIsMeta": true, // Allow alt-{f,b,.} in integrated terminal. See https://github.com/Microsoft/vscode/issues/11314 317 | "terminal.integrated.sendKeybindingsToShell": true, 318 | "terminal.integrated.showExitAlert": false, 319 | 320 | // Extensions 321 | "autoDocstring.docstringFormat": "google", 322 | "aws.samcli.location": "/usr/local/bin/sam", 323 | "aws.profile": "default", 324 | "gitlens.hovers.currentLine.over": "line", 325 | // "markdown.extension.preview.autoShowPreviewToSide": true, 326 | "[markdown]": { 327 | "editor.defaultFormatter": "yzhang.markdown-all-in-one" 328 | }, 329 | "rewrap.wrappingColumn": 100, 330 | "sql-formatter.uppercase": true, 331 | "todo-tree.general.tags": [ 332 | "BUG", 333 | "HACK", 334 | "FIXME", 335 | "TODO", 336 | "XXX", 337 | "[ ]", 338 | "[x]" 339 | ], 340 | "todo-tree.highlights.customHighlight": { 341 | "TODO": { 342 | "foreground": "yellow" 343 | }, 344 | "FIXME": { 345 | "foreground": "red" 346 | } 347 | }, 348 | "todo-tree.highlights.defaultHighlight": { 349 | "opacity": 0, 350 | "fontStyle": "italic", 351 | "fontWeight": "bold" 352 | }, 353 | "todo-tree.regex.regex": "(//|#|