├── .github
    └── workflows
    │   └── docs_build_and_deploy.yml
├── .gitignore
├── .pre-commit-config.yaml
├── CONTRIBUTING.md
├── LICENSE
├── README.md
└── docs
    ├── Makefile
    ├── make.bat
    ├── requirements.txt
    └── source
        ├── _static
            ├── code-blocks-note.md
            ├── css
            │   └── custom.css
            ├── dark-logo-gatsby.png
            ├── dark-logo-swc.png
            ├── dark-logo-ucl.png
            ├── gin-clipboard.png
            ├── gin-privacy-settings.png
            ├── light-logo-gatsby.png
            ├── light-logo-swc.png
            ├── light-logo-ucl.png
            ├── logo_dark.png
            ├── logo_light.png
            ├── mermaid_config.json
            ├── ssh_flowchart_full.mmd
            ├── ssh_flowchart_full.png
            ├── ssh_flowchart_unmanaged.mmd
            ├── ssh_flowchart_unmanaged.png
            └── swc-wiki-warning.md
        ├── _templates
            ├── footer_end.html
            └── footer_start.html
        ├── conf.py
        ├── data_analysis
            ├── HPC-module-SLEAP.md
            └── index.md
        ├── electrophysiology
            ├── community.md
            ├── example_pipelines
            │   ├── examples
            │   │   ├── README.rst
            │   │   ├── notes-on-docs-structure.txt
            │   │   └── sara_mederos.py
            │   ├── index.md
            │   └── matlab_examples.rst
            ├── index.md
            └── resources.md
        ├── index.md
        ├── open_science
            ├── Data-sharing.md
            ├── GIN-repositories.md
            ├── Licensing.md
            └── index.md
        └── programming
            ├── Cookiecutter-cruft.md
            ├── Mount-ceph-ubuntu-temp.md
            ├── Mount-ceph-ubuntu.md
            ├── SLURM-arguments.md
            ├── SSH-SWC-cluster.md
            ├── SSH-vscode.md
            ├── Troubleshooting.md
            ├── index.md
            └── vscode-with-slurm-job.md


/.github/workflows/docs_build_and_deploy.yml:
--------------------------------------------------------------------------------
 1 | name: Docs
 2 | 
 3 | # Generate the documentation on all merges to main, all pull requests, or by
 4 | # manual workflow dispatch. The build job can be used as a CI check that the
 5 | # docs still build successfully. The deploy job only runs when merging
 6 | # to main and actually moves the generated html to the gh-pages branch
 7 | # (which triggers a GitHub pages deployment).
 8 | on:
 9 |   push:
10 |     branches:
11 |       - main
12 |     tags:
13 |       - '*'
14 |   pull_request:
15 |   workflow_dispatch:
16 | 
17 | jobs:
18 |   linting:
19 |     # scheduled workflows should not run on forks
20 |     if: (${{ github.event_name == 'schedule' }} && ${{ github.repository_owner == 'neuroinformatics-unit' }} && ${{ github.ref == 'refs/heads/main' }}) || (${{ github.event_name != 'schedule' }})
21 |     runs-on: ubuntu-latest
22 |     steps:
23 |       - uses: neuroinformatics-unit/actions/lint@v2
24 | 
25 |   build_sphinx_docs:
26 |     name: Build Sphinx Docs
27 |     needs: linting
28 |     runs-on: ubuntu-latest
29 |     steps:
30 |       - uses: neuroinformatics-unit/actions/build_sphinx_docs@v2
31 |         with:
32 |           python-version: 3.12
33 | 
34 |   deploy_sphinx_docs:
35 |     name: Deploy Sphinx Docs
36 |     needs: build_sphinx_docs
37 |     permissions:
38 |       contents: write
39 |     if: github.event_name == 'push' && github.ref_name == 'main'
40 |     runs-on: ubuntu-latest
41 |     steps:
42 |       - uses: neuroinformatics-unit/actions/deploy_sphinx_docs@v2
43 |         with:
44 |           secret_input: ${{ secrets.GITHUB_TOKEN }}
45 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | sg_execution_times.rst
 2 | auto_examples_ephys
 3 | 
 4 | # Byte-compiled / optimized / DLL files
 5 | __pycache__/
 6 | *.py[cod]
 7 | *$py.class
 8 | 
 9 | # C extensions
10 | *.so
11 | 
12 | # Distribution / packaging
13 | .Python
14 | env/
15 | build/
16 | develop-eggs/
17 | dist/
18 | downloads/
19 | eggs/
20 | .eggs/
21 | lib/
22 | lib64/
23 | parts/
24 | sdist/
25 | var/
26 | *.egg-info/
27 | .installed.cfg
28 | *.egg
29 | 
30 | # PyInstaller
31 | #  Usually these files are written by a python script from a template
32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 | 
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 | 
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .coverage
44 | .coverage.*
45 | .cache
46 | nosetests.xml
47 | coverage.xml
48 | *,cover
49 | .hypothesis/
50 | 
51 | # Translations
52 | *.mo
53 | *.pot
54 | 
55 | # Django stuff:
56 | *.log
57 | local_settings.py
58 | 
59 | # Flask instance folder
60 | instance/
61 | 
62 | # Sphinx documentation
63 | docs/_build/
64 | 
65 | # MkDocs documentation
66 | /site/
67 | 
68 | # PyBuilder
69 | target/
70 | 
71 | # Pycharm and VSCode
72 | .idea/
73 | venv/
74 | .vscode/
75 | 
76 | # IPython Notebook
77 | .ipynb_checkpoints
78 | 
79 | # pyenv
80 | .python-version
81 | 
82 | # OS
83 | .DS_Store
84 | 
85 | # written by setuptools_scm
86 | **/_version.py
87 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |     - repo: https://github.com/pre-commit/pre-commit-hooks
 3 |       rev: v4.3.0
 4 |       hooks:
 5 |           - id: check-docstring-first
 6 |           - id: check-executables-have-shebangs
 7 |           - id: check-merge-conflict
 8 |           - id: check-toml
 9 |           - id: end-of-file-fixer
10 |           - id: mixed-line-ending
11 |             args: [--fix=lf]
12 |           - id: requirements-txt-fixer
13 |           - id: trailing-whitespace
14 |     - repo: https://github.com/codespell-project/codespell
15 |       rev: v2.2.5
16 |       hooks:
17 |       - id: codespell
18 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # How to contribute to this website
 2 | 
 3 | ## Website structure
 4 | The website is structured in three high-level sections, represented by folders in the `docs/source` directory:
 5 | - Data Analysis
 6 | - Programming
 7 | - Open Science
 8 | 
 9 | Each section directory (`docs/source/<section>`) may contain several markdown files, with each file corresponding to a specific long-form guide. There are also `docs/source/<section>/index.md` files, which are used to create the tables of contents for each section.
10 | 
11 | ## Adding a long-form guide
12 | To add a new guide, create a new markdown file in the appropriate section directory, and make sure to start it with a level-1 heading. Remember to also add the new file to the table of contents in the corresponding`docs/source/<section>/index.md` file.
13 | 
14 | ## Adding a small tip
15 | If the content you want to add is not long enough to warrant a full guide, for example a small tip or a quick solution to a common problem, you can add it to a `Troubleshooting.md` file in the appropriate section directory. For an example see `docs/source/programming/Troubleshooting.md`. Each small tip should start with a level-2 heading.
16 | 
17 | > **warning**
18 | >
19 | > Since the website is already named "HowTo", please avoid starting your guides/tips with "How to ...".
20 | > For example, instead of "How to detach a forked repo on GitHub", use "Detach a forked repo on GitHub".
21 | 
22 | ## GitHub workflow
23 | * Clone the GitHub repository, and create your `new_branch`.
24 | * Edit the website and commit your changes to the `new_branch`.
25 | * Push the `new_branch` to GitHub and create a draft pull request. This will automatically trigger a [GitHub action](https://github.com/neuroinformatics-unit/actions/tree/main/build_sphinx_docs) that checks if the website still builds correctly.
26 | * If the checks pass, mark the pull request as ready to review assign someone to review your changes.
27 | * When the reviewer merges your changes into the `main` branch, a different [GitHub action](https://github.com/neuroinformatics-unit/actions/tree/main/deploy_sphinx_docs) will be triggered, which will build the website and publish it to the `gh-pages` branch.
28 | * The updated website should be available at [howto.neuroinformatics.dev](https://howto.neuroinformatics.dev)
29 | 
30 | > **note**
31 | >
32 | > If you wish to view the website locally, before you push it, you can do so by running the following commands from the root of the repository.
33 | > ```bash
34 | > # First time only
35 | > pip install -r docs/requirements.txt
36 | > sphinx-build docs/source docs/build
37 | >
38 | > # Every time you want to update the local build
39 | > rm -rf docs/build && sphinx-build docs/source docs/build
40 | >```
41 | >You can view the local build at `docs/build/index.html`
42 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | Attribution 4.0 International
  2 | 
  3 | =======================================================================
  4 | 
  5 | Creative Commons Corporation ("Creative Commons") is not a law firm and
  6 | does not provide legal services or legal advice. Distribution of
  7 | Creative Commons public licenses does not create a lawyer-client or
  8 | other relationship. Creative Commons makes its licenses and related
  9 | information available on an "as-is" basis. Creative Commons gives no
 10 | warranties regarding its licenses, any material licensed under their
 11 | terms and conditions, or any related information. Creative Commons
 12 | disclaims all liability for damages resulting from their use to the
 13 | fullest extent possible.
 14 | 
 15 | Using Creative Commons Public Licenses
 16 | 
 17 | Creative Commons public licenses provide a standard set of terms and
 18 | conditions that creators and other rights holders may use to share
 19 | original works of authorship and other material subject to copyright
 20 | and certain other rights specified in the public license below. The
 21 | following considerations are for informational purposes only, are not
 22 | exhaustive, and do not form part of our licenses.
 23 | 
 24 |      Considerations for licensors: Our public licenses are
 25 |      intended for use by those authorized to give the public
 26 |      permission to use material in ways otherwise restricted by
 27 |      copyright and certain other rights. Our licenses are
 28 |      irrevocable. Licensors should read and understand the terms
 29 |      and conditions of the license they choose before applying it.
 30 |      Licensors should also secure all rights necessary before
 31 |      applying our licenses so that the public can reuse the
 32 |      material as expected. Licensors should clearly mark any
 33 |      material not subject to the license. This includes other CC-
 34 |      licensed material, or material used under an exception or
 35 |      limitation to copyright. More considerations for licensors:
 36 | 	wiki.creativecommons.org/Considerations_for_licensors
 37 | 
 38 |      Considerations for the public: By using one of our public
 39 |      licenses, a licensor grants the public permission to use the
 40 |      licensed material under specified terms and conditions. If
 41 |      the licensor's permission is not necessary for any reason--for
 42 |      example, because of any applicable exception or limitation to
 43 |      copyright--then that use is not regulated by the license. Our
 44 |      licenses grant only permissions under copyright and certain
 45 |      other rights that a licensor has authority to grant. Use of
 46 |      the licensed material may still be restricted for other
 47 |      reasons, including because others have copyright or other
 48 |      rights in the material. A licensor may make special requests,
 49 |      such as asking that all changes be marked or described.
 50 |      Although not required by our licenses, you are encouraged to
 51 |      respect those requests where reasonable. More_considerations
 52 |      for the public:
 53 | 	wiki.creativecommons.org/Considerations_for_licensees
 54 | 
 55 | =======================================================================
 56 | 
 57 | Creative Commons Attribution 4.0 International Public License
 58 | 
 59 | By exercising the Licensed Rights (defined below), You accept and agree
 60 | to be bound by the terms and conditions of this Creative Commons
 61 | Attribution 4.0 International Public License ("Public License"). To the
 62 | extent this Public License may be interpreted as a contract, You are
 63 | granted the Licensed Rights in consideration of Your acceptance of
 64 | these terms and conditions, and the Licensor grants You such rights in
 65 | consideration of benefits the Licensor receives from making the
 66 | Licensed Material available under these terms and conditions.
 67 | 
 68 | 
 69 | Section 1 -- Definitions.
 70 | 
 71 |   a. Adapted Material means material subject to Copyright and Similar
 72 |      Rights that is derived from or based upon the Licensed Material
 73 |      and in which the Licensed Material is translated, altered,
 74 |      arranged, transformed, or otherwise modified in a manner requiring
 75 |      permission under the Copyright and Similar Rights held by the
 76 |      Licensor. For purposes of this Public License, where the Licensed
 77 |      Material is a musical work, performance, or sound recording,
 78 |      Adapted Material is always produced where the Licensed Material is
 79 |      synched in timed relation with a moving image.
 80 | 
 81 |   b. Adapter's License means the license You apply to Your Copyright
 82 |      and Similar Rights in Your contributions to Adapted Material in
 83 |      accordance with the terms and conditions of this Public License.
 84 | 
 85 |   c. Copyright and Similar Rights means copyright and/or similar rights
 86 |      closely related to copyright including, without limitation,
 87 |      performance, broadcast, sound recording, and Sui Generis Database
 88 |      Rights, without regard to how the rights are labeled or
 89 |      categorized. For purposes of this Public License, the rights
 90 |      specified in Section 2(b)(1)-(2) are not Copyright and Similar
 91 |      Rights.
 92 | 
 93 |   d. Effective Technological Measures means those measures that, in the
 94 |      absence of proper authority, may not be circumvented under laws
 95 |      fulfilling obligations under Article 11 of the WIPO Copyright
 96 |      Treaty adopted on December 20, 1996, and/or similar international
 97 |      agreements.
 98 | 
 99 |   e. Exceptions and Limitations means fair use, fair dealing, and/or
100 |      any other exception or limitation to Copyright and Similar Rights
101 |      that applies to Your use of the Licensed Material.
102 | 
103 |   f. Licensed Material means the artistic or literary work, database,
104 |      or other material to which the Licensor applied this Public
105 |      License.
106 | 
107 |   g. Licensed Rights means the rights granted to You subject to the
108 |      terms and conditions of this Public License, which are limited to
109 |      all Copyright and Similar Rights that apply to Your use of the
110 |      Licensed Material and that the Licensor has authority to license.
111 | 
112 |   h. Licensor means the individual(s) or entity(ies) granting rights
113 |      under this Public License.
114 | 
115 |   i. Share means to provide material to the public by any means or
116 |      process that requires permission under the Licensed Rights, such
117 |      as reproduction, public display, public performance, distribution,
118 |      dissemination, communication, or importation, and to make material
119 |      available to the public including in ways that members of the
120 |      public may access the material from a place and at a time
121 |      individually chosen by them.
122 | 
123 |   j. Sui Generis Database Rights means rights other than copyright
124 |      resulting from Directive 96/9/EC of the European Parliament and of
125 |      the Council of 11 March 1996 on the legal protection of databases,
126 |      as amended and/or succeeded, as well as other essentially
127 |      equivalent rights anywhere in the world.
128 | 
129 |   k. You means the individual or entity exercising the Licensed Rights
130 |      under this Public License. Your has a corresponding meaning.
131 | 
132 | 
133 | Section 2 -- Scope.
134 | 
135 |   a. License grant.
136 | 
137 |        1. Subject to the terms and conditions of this Public License,
138 |           the Licensor hereby grants You a worldwide, royalty-free,
139 |           non-sublicensable, non-exclusive, irrevocable license to
140 |           exercise the Licensed Rights in the Licensed Material to:
141 | 
142 |             a. reproduce and Share the Licensed Material, in whole or
143 |                in part; and
144 | 
145 |             b. produce, reproduce, and Share Adapted Material.
146 | 
147 |        2. Exceptions and Limitations. For the avoidance of doubt, where
148 |           Exceptions and Limitations apply to Your use, this Public
149 |           License does not apply, and You do not need to comply with
150 |           its terms and conditions.
151 | 
152 |        3. Term. The term of this Public License is specified in Section
153 |           6(a).
154 | 
155 |        4. Media and formats; technical modifications allowed. The
156 |           Licensor authorizes You to exercise the Licensed Rights in
157 |           all media and formats whether now known or hereafter created,
158 |           and to make technical modifications necessary to do so. The
159 |           Licensor waives and/or agrees not to assert any right or
160 |           authority to forbid You from making technical modifications
161 |           necessary to exercise the Licensed Rights, including
162 |           technical modifications necessary to circumvent Effective
163 |           Technological Measures. For purposes of this Public License,
164 |           simply making modifications authorized by this Section 2(a)
165 |           (4) never produces Adapted Material.
166 | 
167 |        5. Downstream recipients.
168 | 
169 |             a. Offer from the Licensor -- Licensed Material. Every
170 |                recipient of the Licensed Material automatically
171 |                receives an offer from the Licensor to exercise the
172 |                Licensed Rights under the terms and conditions of this
173 |                Public License.
174 | 
175 |             b. No downstream restrictions. You may not offer or impose
176 |                any additional or different terms or conditions on, or
177 |                apply any Effective Technological Measures to, the
178 |                Licensed Material if doing so restricts exercise of the
179 |                Licensed Rights by any recipient of the Licensed
180 |                Material.
181 | 
182 |        6. No endorsement. Nothing in this Public License constitutes or
183 |           may be construed as permission to assert or imply that You
184 |           are, or that Your use of the Licensed Material is, connected
185 |           with, or sponsored, endorsed, or granted official status by,
186 |           the Licensor or others designated to receive attribution as
187 |           provided in Section 3(a)(1)(A)(i).
188 | 
189 |   b. Other rights.
190 | 
191 |        1. Moral rights, such as the right of integrity, are not
192 |           licensed under this Public License, nor are publicity,
193 |           privacy, and/or other similar personality rights; however, to
194 |           the extent possible, the Licensor waives and/or agrees not to
195 |           assert any such rights held by the Licensor to the limited
196 |           extent necessary to allow You to exercise the Licensed
197 |           Rights, but not otherwise.
198 | 
199 |        2. Patent and trademark rights are not licensed under this
200 |           Public License.
201 | 
202 |        3. To the extent possible, the Licensor waives any right to
203 |           collect royalties from You for the exercise of the Licensed
204 |           Rights, whether directly or through a collecting society
205 |           under any voluntary or waivable statutory or compulsory
206 |           licensing scheme. In all other cases the Licensor expressly
207 |           reserves any right to collect such royalties.
208 | 
209 | 
210 | Section 3 -- License Conditions.
211 | 
212 | Your exercise of the Licensed Rights is expressly made subject to the
213 | following conditions.
214 | 
215 |   a. Attribution.
216 | 
217 |        1. If You Share the Licensed Material (including in modified
218 |           form), You must:
219 | 
220 |             a. retain the following if it is supplied by the Licensor
221 |                with the Licensed Material:
222 | 
223 |                  i. identification of the creator(s) of the Licensed
224 |                     Material and any others designated to receive
225 |                     attribution, in any reasonable manner requested by
226 |                     the Licensor (including by pseudonym if
227 |                     designated);
228 | 
229 |                 ii. a copyright notice;
230 | 
231 |                iii. a notice that refers to this Public License;
232 | 
233 |                 iv. a notice that refers to the disclaimer of
234 |                     warranties;
235 | 
236 |                  v. a URI or hyperlink to the Licensed Material to the
237 |                     extent reasonably practicable;
238 | 
239 |             b. indicate if You modified the Licensed Material and
240 |                retain an indication of any previous modifications; and
241 | 
242 |             c. indicate the Licensed Material is licensed under this
243 |                Public License, and include the text of, or the URI or
244 |                hyperlink to, this Public License.
245 | 
246 |        2. You may satisfy the conditions in Section 3(a)(1) in any
247 |           reasonable manner based on the medium, means, and context in
248 |           which You Share the Licensed Material. For example, it may be
249 |           reasonable to satisfy the conditions by providing a URI or
250 |           hyperlink to a resource that includes the required
251 |           information.
252 | 
253 |        3. If requested by the Licensor, You must remove any of the
254 |           information required by Section 3(a)(1)(A) to the extent
255 |           reasonably practicable.
256 | 
257 |        4. If You Share Adapted Material You produce, the Adapter's
258 |           License You apply must not prevent recipients of the Adapted
259 |           Material from complying with this Public License.
260 | 
261 | 
262 | Section 4 -- Sui Generis Database Rights.
263 | 
264 | Where the Licensed Rights include Sui Generis Database Rights that
265 | apply to Your use of the Licensed Material:
266 | 
267 |   a. for the avoidance of doubt, Section 2(a)(1) grants You the right
268 |      to extract, reuse, reproduce, and Share all or a substantial
269 |      portion of the contents of the database;
270 | 
271 |   b. if You include all or a substantial portion of the database
272 |      contents in a database in which You have Sui Generis Database
273 |      Rights, then the database in which You have Sui Generis Database
274 |      Rights (but not its individual contents) is Adapted Material; and
275 | 
276 |   c. You must comply with the conditions in Section 3(a) if You Share
277 |      all or a substantial portion of the contents of the database.
278 | 
279 | For the avoidance of doubt, this Section 4 supplements and does not
280 | replace Your obligations under this Public License where the Licensed
281 | Rights include other Copyright and Similar Rights.
282 | 
283 | 
284 | Section 5 -- Disclaimer of Warranties and Limitation of Liability.
285 | 
286 |   a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE
287 |      EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS
288 |      AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF
289 |      ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS,
290 |      IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION,
291 |      WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR
292 |      PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS,
293 |      ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT
294 |      KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT
295 |      ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU.
296 | 
297 |   b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE
298 |      TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION,
299 |      NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT,
300 |      INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES,
301 |      COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR
302 |      USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN
303 |      ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR
304 |      DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR
305 |      IN PART, THIS LIMITATION MAY NOT APPLY TO YOU.
306 | 
307 |   c. The disclaimer of warranties and limitation of liability provided
308 |      above shall be interpreted in a manner that, to the extent
309 |      possible, most closely approximates an absolute disclaimer and
310 |      waiver of all liability.
311 | 
312 | 
313 | Section 6 -- Term and Termination.
314 | 
315 |   a. This Public License applies for the term of the Copyright and
316 |      Similar Rights licensed here. However, if You fail to comply with
317 |      this Public License, then Your rights under this Public License
318 |      terminate automatically.
319 | 
320 |   b. Where Your right to use the Licensed Material has terminated under
321 |      Section 6(a), it reinstates:
322 | 
323 |        1. automatically as of the date the violation is cured, provided
324 |           it is cured within 30 days of Your discovery of the
325 |           violation; or
326 | 
327 |        2. upon express reinstatement by the Licensor.
328 | 
329 |      For the avoidance of doubt, this Section 6(b) does not affect any
330 |      right the Licensor may have to seek remedies for Your violations
331 |      of this Public License.
332 | 
333 |   c. For the avoidance of doubt, the Licensor may also offer the
334 |      Licensed Material under separate terms or conditions or stop
335 |      distributing the Licensed Material at any time; however, doing so
336 |      will not terminate this Public License.
337 | 
338 |   d. Sections 1, 5, 6, 7, and 8 survive termination of this Public
339 |      License.
340 | 
341 | 
342 | Section 7 -- Other Terms and Conditions.
343 | 
344 |   a. The Licensor shall not be bound by any additional or different
345 |      terms or conditions communicated by You unless expressly agreed.
346 | 
347 |   b. Any arrangements, understandings, or agreements regarding the
348 |      Licensed Material not stated herein are separate from and
349 |      independent of the terms and conditions of this Public License.
350 | 
351 | 
352 | Section 8 -- Interpretation.
353 | 
354 |   a. For the avoidance of doubt, this Public License does not, and
355 |      shall not be interpreted to, reduce, limit, restrict, or impose
356 |      conditions on any use of the Licensed Material that could lawfully
357 |      be made without permission under this Public License.
358 | 
359 |   b. To the extent possible, if any provision of this Public License is
360 |      deemed unenforceable, it shall be automatically reformed to the
361 |      minimum extent necessary to make it enforceable. If the provision
362 |      cannot be reformed, it shall be severed from this Public License
363 |      without affecting the enforceability of the remaining terms and
364 |      conditions.
365 | 
366 |   c. No term or condition of this Public License will be waived and no
367 |      failure to comply consented to unless expressly agreed to by the
368 |      Licensor.
369 | 
370 |   d. Nothing in this Public License constitutes or may be interpreted
371 |      as a limitation upon, or waiver of, any privileges and immunities
372 |      that apply to the Licensor or You, including from the legal
373 |      processes of any jurisdiction or authority.
374 | 
375 | 
376 | =======================================================================
377 | 
378 | Creative Commons is not a party to its public
379 | licenses. Notwithstanding, Creative Commons may elect to apply one of
380 | its public licenses to material it publishes and in those instances
381 | will be considered the “Licensor.” The text of the Creative Commons
382 | public licenses is dedicated to the public domain under the CC0 Public
383 | Domain Dedication. Except for the limited purpose of indicating that
384 | material is shared under a Creative Commons public license or as
385 | otherwise permitted by the Creative Commons policies published at
386 | creativecommons.org/policies, Creative Commons does not authorize the
387 | use of the trademark "Creative Commons" or any other trademark or logo
388 | of Creative Commons without its prior written consent including,
389 | without limitation, in connection with any unauthorized modifications
390 | to any of its public licenses or any other arrangements,
391 | understandings, or agreements concerning use of licensed material. For
392 | the avoidance of doubt, this paragraph does not form part of the
393 | public licenses.
394 | 
395 | Creative Commons may be contacted at creativecommons.org.
396 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Neuroinformatics Unit HowTo
 2 | 
 3 | [![Website shields.io](https://img.shields.io/website-up-down-green-red/https/howto.neuroinformatics.dev.svg)](https://howto.neuroinformatics.dev)
 4 | [![made-with-sphinx-doc](https://img.shields.io/badge/Made%20with-Sphinx-1f425f.svg)](https://www.sphinx-doc.org/)
 5 | 
 6 | Source code for the [Neuroinformatics Unit HowTo](https://howto.neuroinformatics.dev/) website.
 7 | 
 8 | On this website, we keep track of long-form how-to guides as well as collections of small tips and tricks related to data analysis and software development in neuroscience.
 9 | 
10 | The information is primarily aimed at researchers at the [Sainsbury Wellcome Centre (SWC)](https://www.sainsburywellcome.org/web/) and [Gatsby Computational Neuroscience Unit (GCNU)](https://www.ucl.ac.uk/gatsby/gatsby-computational-neuroscience-unit), though most of it should be useful to neuroscientists in general.
11 | 
12 | If you wish to edit the website, please read the [contributing guide](CONTRIBUTING.md) first.
13 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = source
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 | 
13 | %SPHINXBUILD% >NUL 2>NUL
14 | if errorlevel 9009 (
15 | 	echo.
16 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
17 | 	echo.installed, then set the SPHINXBUILD environment variable to point
18 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
19 | 	echo.may add the Sphinx directory to PATH.
20 | 	echo.
21 | 	echo.If you don't have Sphinx installed, grab it from
22 | 	echo.https://www.sphinx-doc.org/
23 | 	exit /b 1
24 | )
25 | 
26 | if "%1" == "" goto help
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
 1 | linkify-it-py
 2 | matplotlib
 3 | myst-parser
 4 | nbsphinx
 5 | numpydoc
 6 | pydata-sphinx-theme
 7 | 
 8 | sphinx
 9 | sphinx-copybutton
10 | sphinx-design
11 | sphinx-gallery
12 | 


--------------------------------------------------------------------------------
/docs/source/_static/code-blocks-note.md:
--------------------------------------------------------------------------------
 1 | :::{dropdown} Interpreting code blocks
 2 | :color: info
 3 | :icon: info
 4 | 
 5 | Shell commands will be shown in code blocks like this
 6 | (with the `$` sign indicating the shell prompt):
 7 | ```{code-block} console
 8 | $ echo "Hello world!"
 9 | ```
10 | 
11 | Similarly, Python code blocks will appear with the `>>>` sign indicating the
12 | Python interpreter prompt:
13 | ```{code-block} pycon
14 | >>> print("Hello world!")
15 | ```
16 | 
17 | The expected outputs of both Shell and Python commands will be shown without
18 | any prompt:
19 | ```{code-block} console
20 | Hello world!
21 | ```
22 | :::
23 | 


--------------------------------------------------------------------------------
/docs/source/_static/css/custom.css:
--------------------------------------------------------------------------------
 1 | html[data-theme=dark] {
 2 |   --pst-color-primary: #04B46D;
 3 |   --pst-color-link: var(--pst-color-primary);
 4 | }
 5 | 
 6 | html[data-theme=light] {
 7 |   --pst-color-primary: #03A062;
 8 |   --pst-color-link: var(--pst-color-primary);
 9 | }
10 | 
11 | .sponsor {
12 |   height: 100px;
13 |   padding-top: 5px;
14 |   padding-right: 5px;
15 |   padding-bottom: 5px;
16 |   padding-left: 5px;
17 | }
18 | 
19 | .img-sponsor {
20 |   height: 50px;
21 |   padding-top: 5px;
22 |   padding-right: 5px;
23 |   padding-bottom: 5px;
24 |   padding-left: 5px;
25 | }
26 | 
27 | .things-in-a-row {
28 |   display: flex;
29 |   flex-wrap: wrap;
30 |   justify-content: space-between;
31 | }
32 | 


--------------------------------------------------------------------------------
/docs/source/_static/dark-logo-gatsby.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/neuroinformatics-unit/HowTo/c8534cc450a8a89cd72c0147fdfbbd2169b42b86/docs/source/_static/dark-logo-gatsby.png


--------------------------------------------------------------------------------
/docs/source/_static/dark-logo-swc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/neuroinformatics-unit/HowTo/c8534cc450a8a89cd72c0147fdfbbd2169b42b86/docs/source/_static/dark-logo-swc.png


--------------------------------------------------------------------------------
/docs/source/_static/dark-logo-ucl.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/neuroinformatics-unit/HowTo/c8534cc450a8a89cd72c0147fdfbbd2169b42b86/docs/source/_static/dark-logo-ucl.png


--------------------------------------------------------------------------------
/docs/source/_static/gin-clipboard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/neuroinformatics-unit/HowTo/c8534cc450a8a89cd72c0147fdfbbd2169b42b86/docs/source/_static/gin-clipboard.png


--------------------------------------------------------------------------------
/docs/source/_static/gin-privacy-settings.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/neuroinformatics-unit/HowTo/c8534cc450a8a89cd72c0147fdfbbd2169b42b86/docs/source/_static/gin-privacy-settings.png


--------------------------------------------------------------------------------
/docs/source/_static/light-logo-gatsby.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/neuroinformatics-unit/HowTo/c8534cc450a8a89cd72c0147fdfbbd2169b42b86/docs/source/_static/light-logo-gatsby.png


--------------------------------------------------------------------------------
/docs/source/_static/light-logo-swc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/neuroinformatics-unit/HowTo/c8534cc450a8a89cd72c0147fdfbbd2169b42b86/docs/source/_static/light-logo-swc.png


--------------------------------------------------------------------------------
/docs/source/_static/light-logo-ucl.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/neuroinformatics-unit/HowTo/c8534cc450a8a89cd72c0147fdfbbd2169b42b86/docs/source/_static/light-logo-ucl.png


--------------------------------------------------------------------------------
/docs/source/_static/logo_dark.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/neuroinformatics-unit/HowTo/c8534cc450a8a89cd72c0147fdfbbd2169b42b86/docs/source/_static/logo_dark.png


--------------------------------------------------------------------------------
/docs/source/_static/logo_light.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/neuroinformatics-unit/HowTo/c8534cc450a8a89cd72c0147fdfbbd2169b42b86/docs/source/_static/logo_light.png


--------------------------------------------------------------------------------
/docs/source/_static/mermaid_config.json:
--------------------------------------------------------------------------------
1 | {
2 |     "theme": "neutral",
3 |     "themeVariables": {
4 |         "fontFamily": "arial"
5 |       }
6 |   }
7 | 


--------------------------------------------------------------------------------
/docs/source/_static/ssh_flowchart_full.mmd:
--------------------------------------------------------------------------------
 1 | flowchart LR
 2 |     classDef safe stroke:#03A062,stroke-width:3px;
 3 |     classDef unsafe stroke:#d95f02,stroke-width:3px;
 4 |     classDef emphasis color:#03A062,stroke:#03A062,stroke-width:3px;
 5 | 
 6 |     unmanaged("💻<br> Unmanaged<br> Computer") -->|ssh| bastion(("Bastion Node<br> ssh.swc.ucl.ac.uk"))
 7 | 
 8 |     subgraph trusted["SWC Network Domain"]
 9 | 
10 |         bastion --> |ssh| gateway(("Gateway Node<br> hpc-gw2"))
11 |         managed_win("💻<br> Managed<br> Windows<br> Desktop") ---> |ssh| gateway
12 |         managed_linux("💻<br> Managed<br> Linux<br> Desktop") ---> |ssh| gateway
13 |         bastion ----> |srun<br> sbatch| slurm{"SLURM 🚦"}
14 |         gateway --> |srun<br> sbatch| slurm
15 |         managed_linux --> |srun<br> sbatch| slurm
16 | 
17 |         subgraph compute["Compute Nodes"]
18 |         node1(("Node 1"))
19 |         node2(("Node 2"))
20 |         node3(("Node 3"))
21 |         end
22 | 
23 |         slurm --> node1
24 |         slurm --> node2
25 |         slurm --> node3
26 | 
27 |     end
28 | 
29 |     class bastion emphasis
30 |     class gateway emphasis
31 |     class unmanaged unsafe
32 |     class managed_win safe
33 |     class managed_linux safe
34 |     class node1 safe
35 |     class node2 safe
36 |     class node3 safe
37 | 


--------------------------------------------------------------------------------
/docs/source/_static/ssh_flowchart_full.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/neuroinformatics-unit/HowTo/c8534cc450a8a89cd72c0147fdfbbd2169b42b86/docs/source/_static/ssh_flowchart_full.png


--------------------------------------------------------------------------------
/docs/source/_static/ssh_flowchart_unmanaged.mmd:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/neuroinformatics-unit/HowTo/c8534cc450a8a89cd72c0147fdfbbd2169b42b86/docs/source/_static/ssh_flowchart_unmanaged.mmd


--------------------------------------------------------------------------------
/docs/source/_static/ssh_flowchart_unmanaged.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/neuroinformatics-unit/HowTo/c8534cc450a8a89cd72c0147fdfbbd2169b42b86/docs/source/_static/ssh_flowchart_unmanaged.png


--------------------------------------------------------------------------------
/docs/source/_static/swc-wiki-warning.md:
--------------------------------------------------------------------------------
1 | :::{warning}
2 | Some links within this document point to the
3 | [SWC internal wiki](https://wiki.ucl.ac.uk/display/SI/SWC+Intranet),
4 | which is only accessible from within the SWC network.
5 | We recommend opening these links in a new tab.
6 | :::
7 | 


--------------------------------------------------------------------------------
/docs/source/_templates/footer_end.html:
--------------------------------------------------------------------------------
 1 | <div class="things-in-a-row">
 2 |     <a href="https://www.sainsburywellcome.org/web/">
 3 |         <img src="{{ pathto('_static/light-logo-swc.png', 1) }}" alt="Sponsors" class="only-light img-sponsor"/>
 4 |         <img src="{{ pathto('_static/dark-logo-swc.png', 1) }}" alt="Sponsors" class="only-dark img-sponsor"/>
 5 |     </a>
 6 |     <a href="https://www.ucl.ac.uk/gatsby/gatsby-computational-neuroscience-unit">
 7 |         <img src="{{ pathto('_static/light-logo-gatsby.png', 1) }}" alt="Sponsors" class="only-light img-sponsor"/>
 8 |         <img src="{{ pathto('_static/dark-logo-gatsby.png', 1) }}" alt="Sponsors" class="only-dark img-sponsor"/>
 9 |     </a>
10 |     <a href="https://www.ucl.ac.uk/">
11 |         <img src="{{ pathto('_static/light-logo-ucl.png', 1) }}" alt="Sponsors" class="only-light img-sponsor"/>
12 |         <img src="{{ pathto('_static/dark-logo-ucl.png', 1) }}" alt="Sponsors" class="only-dark img-sponsor"/>
13 |     </a>
14 | </div>
15 | 


--------------------------------------------------------------------------------
/docs/source/_templates/footer_start.html:
--------------------------------------------------------------------------------
 1 | <p class="sphinx-version">
 2 | {% trans sphinx_version=sphinx_version|e %}Created using <a href="https://sphinx-doc.org/">Sphinx</a> {{ sphinx_version }}.{% endtrans %}
 3 | <br/>
 4 | </p>
 5 | <p class="theme-version">
 6 | {{ _("Built with the") }}
 7 | <a href="https://pydata-sphinx-theme.readthedocs.io/en/stable/index.html">PyData Sphinx Theme</a>
 8 | {{ theme_version }}.
 9 | </p>
10 | 


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
  1 | # Configuration file for the Sphinx documentation builder.
  2 | #
  3 | # This file only contains a selection of the most common options. For a full
  4 | # list see the documentation:
  5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
  6 | 
  7 | # -- Path setup --------------------------------------------------------------
  8 | 
  9 | # If extensions (or modules to document with autodoc) are in another directory,
 10 | # add these directories to sys.path here. If the directory is relative to the
 11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 12 | #
 13 | # import os
 14 | # import sys
 15 | # sys.path.insert(0, os.path.abspath('.'))
 16 | 
 17 | 
 18 | # -- Project information -----------------------------------------------------
 19 | 
 20 | project = "HowTo"
 21 | copyright = "2022, Neuroinformatics Unit"
 22 | author = "Neuroinformatics Unit"
 23 | 
 24 | # The full version, including alpha/beta/rc tags
 25 | release = "0.0.1"
 26 | 
 27 | 
 28 | # -- General configuration ---------------------------------------------------
 29 | 
 30 | # Add any Sphinx extension module names here, as strings. They can be
 31 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 32 | # ones.
 33 | extensions = [
 34 |     "sphinx.ext.githubpages",
 35 |     "sphinx.ext.autodoc",
 36 |     "sphinx.ext.autosummary",
 37 |     "sphinx.ext.viewcode",
 38 |     "sphinx.ext.intersphinx",
 39 |     "sphinx.ext.napoleon",
 40 |     "sphinx_design",
 41 |     "sphinx_copybutton",
 42 |     "myst_parser",
 43 |     "numpydoc",
 44 |     "nbsphinx",
 45 |     "sphinx_gallery.gen_gallery",
 46 | ]
 47 | 
 48 | # Configure the myst parser to enable cool markdown features
 49 | myst_enable_extensions = [
 50 |     "amsmath",
 51 |     "colon_fence",
 52 |     "deflist",
 53 |     "dollarmath",
 54 |     "fieldlist",
 55 |     "html_admonition",
 56 |     "html_image",
 57 |     "linkify",
 58 |     "replacements",
 59 |     "smartquotes",
 60 |     "strikethrough",
 61 |     "substitution",
 62 |     "tasklist",
 63 | ]
 64 | # Automatically add anchors to markdown headings
 65 | myst_heading_anchors = 3
 66 | 
 67 | # Add any paths that contain templates here, relative to this directory.
 68 | templates_path = ["_templates"]
 69 | 
 70 | # List of patterns, relative to source directory, that match files and
 71 | # directories to ignore when looking for source files.
 72 | # This pattern also affects html_static_path and html_extra_path.
 73 | exclude_patterns = [
 74 |     "**.ipynb_checkpoints",
 75 |     # to ensure that include files (partial pages) aren't built, exclude them
 76 |     # https://github.com/sphinx-doc/sphinx/issues/1965#issuecomment-124732907
 77 |     "**/includes/**",
 78 |     # exclude .py and .ipynb files in auto_examples generated by sphinx-gallery
 79 |     # this is to prevent sphinx from complaining about duplicate source files
 80 |     "electrophysiology/example_pipelines/auto_examples_ephys/*.ipynb",
 81 |     "electrophysiology/example_pipelines/auto_examples_ephys/*.py",
 82 | ]
 83 | 
 84 | # Ignore certain URLs from being checked
 85 | linkcheck_ignore = [
 86 |     "https://neuromorpho.org/",
 87 |     "https://wiki.ucl.ac.uk/",  # ignore everything on the internal wiki
 88 |     "https://linux.die.net/man/1/rsync",
 89 |     "https://www.uclb.com/",
 90 |     "https://support.zadarastorage.com",
 91 |     "https://stackoverflow.com/questions/24447047/remove-git-annex-repository-from-file-tree",
 92 |     "https://joinup.ec.europa.eu/collection/eupl/solution/joinup-licensing-assistant/jla-compatibility-checker",
 93 |     "http://ccdb.ucsd.edu/home",
 94 |     "https://opensource.org/licenses/BSD-3-Clause",
 95 | ]
 96 | # The linkcheck builder will skip verifying that anchors exist when checking
 97 | # these URLs (e.g. because they are generated by JavaScript).
 98 | linkcheck_anchors_ignore_for_url = [
 99 |     "https://gin.g-node.org/G-Node/Info/wiki",
100 |     "https://gin.g-node.org/G-Node/info/wiki",  # ignore both spellings
101 | ]
102 | linkcheck_retries = 2
103 | 
104 | # -- Options for HTML output -------------------------------------------------
105 | 
106 | # The theme to use for HTML and HTML Help pages.  See the documentation for
107 | # a list of builtin themes.
108 | #
109 | html_theme = "pydata_sphinx_theme"
110 | html_title = "HowTo"
111 | 
112 | # Redirect the webpage to another URL
113 | # Sphinx will create the appropriate CNAME file in the build directory
114 | # https://www.sphinx-doc.org/en/master/usage/extensions/githubpages.html
115 | html_baseurl = "https://howto.neuroinformatics.dev/"
116 | 
117 | # Add any paths that contain custom static files (such as style sheets) here,
118 | # relative to this directory. They are copied after the builtin static files,
119 | # so a file named "default.css" will overwrite the builtin "default.css".
120 | html_static_path = ["_static"]
121 | 
122 | html_css_files = [
123 |     "css/custom.css",
124 | ]
125 | 
126 | html_favicon = "_static/logo_light.png"
127 | 
128 | ## Customize the theme
129 | html_theme_options = {
130 |     "icon_links": [
131 |         {
132 |             # Label for this link
133 |             "name": "GitHub",
134 |             # URL where the link will redirect
135 |             "url": "https://github.com/neuroinformatics-unit/HowTo",  # required
136 |             # Icon class (if "type": "fontawesome"), or path to local image (if "type": "local")
137 |             "icon": "fa-brands fa-github",
138 |             # The type of image to be used (see below for details)
139 |             "type": "fontawesome",
140 |         }
141 |     ],
142 |     "logo": {
143 |         "text": "HowTo",
144 |         "image_light": "logo_light.png",
145 |         "image_dark": "logo_dark.png",
146 |     },
147 |     "footer_start": ["footer_start"],
148 |     "footer_end": ["footer_end"],
149 |     "show_prev_next": False
150 | }
151 | 
152 | # Hide the "Show Source" button
153 | html_show_sourcelink = False
154 | 
155 | # Configure the code block copy button
156 | # don't copy line numbers, prompts, or console outputs
157 | copybutton_exclude = ".linenos, .gp, .go"
158 | 
159 | # Configure sphinx-gallery
160 | sphinx_gallery_conf = {
161 |      'examples_dirs': ["electrophysiology/example_pipelines/examples"],   # path to your example scripts
162 |      'gallery_dirs': ["electrophysiology/example_pipelines/auto_examples_ephys"],    # path to where to save gallery generated output
163 | }
164 | 


--------------------------------------------------------------------------------
/docs/source/data_analysis/HPC-module-SLEAP.md:
--------------------------------------------------------------------------------
  1 | # Use the SLEAP module on the SWC HPC cluster
  2 | 
  3 | ```{include} ../_static/swc-wiki-warning.md
  4 | ```
  5 | 
  6 | ```{include} ../_static/code-blocks-note.md
  7 | ```
  8 | 
  9 | ## Abbreviations
 10 | | Acronym                                                         | Meaning                                      |
 11 | | --------------------------------------------------------------- | -------------------------------------------- |
 12 | | [SLEAP](https://sleap.ai/)                                      | Social LEAP Estimates Animal Poses           |
 13 | | [SWC](https://www.sainsburywellcome.org/web/)                   | Sainsbury Wellcome Centre                    |
 14 | | [HPC](https://en.wikipedia.org/wiki/High-performance_computing) | High Performance Computing                   |
 15 | | [IT](https://en.wikipedia.org/wiki/Information_technology)      | Information Technology                       |
 16 | | [GUI](https://en.wikipedia.org/wiki/Graphical_user_interface)   | Graphical User Interface                     |
 17 | | [SLURM](https://slurm.schedmd.com/)                             | Simple Linux Utility for Resource Management |
 18 | 
 19 | ## Prerequisites
 20 | 
 21 | ::: {dropdown} Note on managed Linux desktops
 22 | :color: info
 23 | :icon: info
 24 | 
 25 | The SWC's IT team offers managed desktop computers equipped with a Linux image. These machines are already part of SWC's trusted domain and have direct access to SLURM, the HPC modules, and the SWC filesystem.
 26 | 
 27 | If you have access to one of these desktops,
 28 | you can skip the pre-requisite steps.
 29 | You may simply open a terminal, type `module load SLEAP`,
 30 | and start using SLEAP directly, as you would on any local
 31 | Linux machine. All SLEAP commands should work as expected,
 32 | including `sleap-label` for launching the GUI.
 33 | 
 34 | That said, you may still want to offload GPU-intensive tasks to an HPC node (e.g. because the desktop's GPU is not powerful enough or because you need to run many jobs in parallel). In that case, you may
 35 | still want to read the sections on [model training](sleap-training)
 36 | and [inference](sleap-inference).
 37 | :::
 38 | 
 39 | (access-to-the-hpc-cluster)=
 40 | ### Access to the HPC cluster
 41 | Verify that you can access HPC gateway node (typing your `<SWC-PASSWORD>` both times when prompted):
 42 | ```{code-block} console
 43 | $ ssh <SWC-USERNAME>@ssh.swc.ucl.ac.uk
 44 | $ ssh hpc-gw2
 45 | ```
 46 | To learn more about accessing the HPC via SSH, see the [relevant how-to guide](ssh-cluster-target).
 47 | 
 48 | ### Access to the SLEAP module
 49 | Once you are on the HPC gateway node, SLEAP should be listed among the available modules when you run `module avail`:
 50 | 
 51 | ```{code-block} console
 52 | $ module avail
 53 | ...
 54 | SLEAP/2023-03-13
 55 | SLEAP/2023-08-01
 56 | SLEAP/2024-08-14
 57 | ...
 58 | ```
 59 | - `SLEAP/2023-03-13` corresponds to `SLEAP v.1.2.9`
 60 | - `SLEAP/2023-08-01` corresponds to `SLEAP v.1.3.1`
 61 | - `SLEAP/2024-08-14` corresponds to `SLEAP v.1.3.3`
 62 | 
 63 | We recommend always using the latest version, which is the one loaded by default
 64 | when you run `module load SLEAP`. If you want to load a specific version,
 65 | you can do so by typing the full module name,
 66 | including the date e.g. `module load SLEAP/2023-08-01`.
 67 | 
 68 | If a module has been successfully loaded, it will be listed when you run `module list`,
 69 | along with other modules it may depend on:
 70 | 
 71 | ```{code-block} console
 72 | $ module list
 73 | Currently Loaded Modulefiles:
 74 |  1) cuda/11.8   2) SLEAP/2023-08-01
 75 | ```
 76 | 
 77 | If you have troubles with loading the SLEAP module,
 78 | see this guide's [Troubleshooting section](#problems-with-the-sleap-module).
 79 | 
 80 | 
 81 | ### Install SLEAP on your local PC/laptop
 82 | While you can delegate the GPU-intensive work to the HPC cluster,
 83 | you will need to use the SLEAP GUI for some steps, such as labelling frames.
 84 | Thus, you also need to install SLEAP on your local PC/laptop.
 85 | 
 86 | We recommend following the official [SLEAP installation guide](https://sleap.ai/installation.html).
 87 | To minimise the risk of issues due to incompatibilities between versions, ensure the version of your local installation of SLEAP matches the one you plan to load in the cluster.
 88 | 
 89 | ### Mount the SWC filesystem on your local PC/laptop
 90 | The rest of this guide assumes that you have mounted the SWC filesystem on your local PC/laptop.
 91 | If you have not done so, please follow the relevant instructions on the
 92 | [SWC internal wiki](https://wiki.ucl.ac.uk/display/SSC/SWC+Storage+Platform+Overview).
 93 | 
 94 | We will also assume that the data you are working with are stored in a `ceph`
 95 | directory to which you have access to. In the rest of this guide, we will use the path
 96 | `/ceph/scratch/neuroinformatics-dropoff/SLEAP_HPC_test_data` which contains a SLEAP project
 97 | for test purposes. You should replace this with the path to your own data.
 98 | 
 99 | :::{dropdown} Data storage location matters
100 | :color: warning
101 | :icon: alert-fill
102 | 
103 | The cluster has fast access to data stored on the `ceph` filesystem, so if your
104 | data is stored elsewhere, make sure to transfer it to `ceph` before running the job.
105 | You can use tools such as [`rsync`](https://linux.die.net/man/1/rsync)
106 | to copy data from your local machine to `ceph` via an ssh connection. For example:
107 | 
108 | ```{code-block} console
109 | $ rsync -avz <LOCAL-DIR> <SWC-USERNAME>@ssh.swc.ucl.ac.uk:/ceph/scratch/neuroinformatics-dropoff/SLEAP_HPC_test_data
110 | ```
111 | :::
112 | 
113 | (sleap-training)=
114 | ## Model training
115 | This will consist of two parts: [preparing a training job](prepare-the-training-job)
116 | (on your local SLEAP installation) and [running a training job](run-the-training-job)
117 | (on the HPC cluster's SLEAP module). Some evaluation metrics for the trained models
118 | can be [viewed via the SLEAP GUI](model-evaluation) on your local SLEAP installation.
119 | 
120 | (prepare-the-training-job)=
121 | ### Prepare the training job
122 | Follow the SLEAP instructions for [Creating a Project](https://sleap.ai/tutorials/new-project.html)
123 | and [Initial Labelling](https://sleap.ai/tutorials/initial-labeling.html).
124 | Ensure that the project file (e.g. `labels.v001.slp`) is saved in the mounted SWC filesystem
125 | (as opposed to your local filesystem).
126 | 
127 | Next, follow the instructions in [Remote Training](https://sleap.ai/guides/remote.html#remote-training),
128 | i.e. *Predict* -> *Run Training…* -> *Export Training Job Package…*.
129 | - For selecting the right configuration parameters, see [Configuring Models](https://sleap.ai/guides/choosing-models.html#) and [Troubleshooting Workflows](https://sleap.ai/guides/troubleshooting-workflows.html)
130 | - Set the *Predict On* parameter to *nothing*. Remote training and inference (prediction) are easiest to run separately on the HPC Cluster. Also unselect *Visualize Predictions During Training* in training settings, if it's enabled by default.
131 | - If you are working with camera view from above or below (as opposed to a side view), set the *Rotation Min Angle* and *Rotation Max Angle* to -180 and 180 respectively in the *Augmentation* section.
132 | - Make sure to save the exported training job package (e.g. `labels.v001.slp.training_job.zip`) in the mounted SWC filesystem, for example, in the same directory as the project file.
133 | - Unzip the training job package. This will create a folder with the same name (minus the `.zip` extension). This folder contains everything needed to run the training job on the HPC cluster.
134 | 
135 | (run-the-training-job)=
136 | ### Run the training job
137 | Login to the HPC cluster as described above.
138 | ```{code-block} console
139 | $ ssh <SWC-USERNAME>@ssh.swc.ucl.ac.uk
140 | $ ssh hpc-gw2
141 | ```
142 | Navigate to the training job folder (replace with your own path) and list its contents:
143 | ```{code-block} console
144 | :emphasize-lines: 12
145 | $ cd /ceph/scratch/neuroinformatics-dropoff/SLEAP_HPC_test_data
146 | $ cd labels.v001.slp.training_job
147 | $ ls -1
148 | centered_instance.json
149 | centroid.json
150 | inference-script.sh
151 | jobs.yaml
152 | labels.v001.pkg.slp
153 | labels.v001.slp.predictions.slp
154 | train_slurm.sh
155 | swc-hpc-pose-estimation
156 | train-script.sh
157 | ```
158 | There should be a `train-script.sh` file created by SLEAP, which already contains the
159 | commands to run the training. You can see the contents of the file by running `cat train-script.sh`:
160 | ```{code-block} bash
161 | :caption: labels.v001.slp.training_job/train-script.sh
162 | :name: train-script-sh
163 | :linenos:
164 | #!/bin/bash
165 | sleap-train centroid.json labels.v001.pkg.slp
166 | sleap-train centered_instance.json labels.v001.pkg.slp
167 | ```
168 | The precise commands will depend on the model configuration you chose in SLEAP.
169 | Here we see two separate training calls, one for the 'centroid' and another for
170 | the 'centered_instance' model. That's because in this example we have chosen
171 | the ['Top-Down'](https://sleap.ai/tutorials/initial-training.html#training-options)
172 | configuration, which consists of two neural networks - the first for isolating
173 | the animal instances (by finding their centroids) and the second for predicting
174 | all the body parts per instance.
175 | 
176 | ![Top-Down model configuration](https://sleap.ai/_images/topdown_approach.jpg)
177 | 
178 | :::{dropdown} More on 'Top-Down' vs 'Bottom-Up' models
179 | :color: info
180 | :icon: info
181 | 
182 | Although the 'Top-Down' configuration was designed with multiple animals in mind,
183 | it can also be used for single-animal videos. It makes sense to use it for videos
184 | where the animal occupies a relatively small portion of the frame - see
185 | [Troubleshooting Workflows](https://sleap.ai/guides/troubleshooting-workflows.html) for more info.
186 | :::
187 | 
188 | Next you need to create a SLURM batch script, which will schedule the training job
189 | on the HPC cluster. Create a new file called `train_slurm.sh`
190 | (you can do this in the terminal with `nano`/`vim` or in a text editor of
191 | your choice on your local PC/laptop). Here we create the script in the same folder
192 | as the training job, but you can save it anywhere you want, or even keep track of it with `git`.
193 | 
194 | ```{code-block} console
195 | $ nano train_slurm.sh
196 | ```
197 | 
198 | An example is provided below, followed by explanations.
199 | ```{code-block} bash
200 | :caption: train_slurm.sh
201 | :name: train-slurm-sh
202 | :linenos:
203 | #!/bin/bash
204 | 
205 | #SBATCH -J slp_train # job name
206 | #SBATCH -p gpu # partition (queue)
207 | #SBATCH -N 1   # number of nodes
208 | #SBATCH --mem 32G # memory pool for all cores
209 | #SBATCH -n 8 # number of cores
210 | #SBATCH -t 0-06:00 # time (D-HH:MM)
211 | #SBATCH --gres gpu:1 # request 1 GPU (of any kind)
212 | #SBATCH -o slurm.%x.%N.%j.out # STDOUT
213 | #SBATCH -e slurm.%x.%N.%j.err # STDERR
214 | #SBATCH --mail-type=ALL
215 | #SBATCH --mail-user=user@domain.com
216 | 
217 | # Load the SLEAP module
218 | module load SLEAP
219 | 
220 | # Define directories for SLEAP project and exported training job
221 | SLP_DIR=/ceph/scratch/neuroinformatics-dropoff/SLEAP_HPC_test_data
222 | SLP_JOB_NAME=labels.v001.slp.training_job
223 | SLP_JOB_DIR=$SLP_DIR/$SLP_JOB_NAME
224 | 
225 | # Go to the job directory
226 | cd $SLP_JOB_DIR
227 | 
228 | # Run the training script generated by SLEAP
229 | ./train-script.sh
230 | ```
231 | 
232 | In `nano`, you can save the file by pressing `Ctrl+O` and exit by pressing `Ctrl+X`.
233 | 
234 | :::{dropdown} Explanation of the batch script
235 | :color: info
236 | :icon: info
237 | - The `#SBATCH` lines are SLURM directives. They specify the resources needed
238 | for the job, such as the number of nodes, CPUs, memory, etc.
239 | A primer on the most useful SLURM arguments is provided in this [how-to guide](slurm-arguments-target).
240 | For more information  see the [SLURM documentation](https://slurm.schedmd.com/sbatch.html).
241 | 
242 | - The `#` lines are comments. They are not executed by SLURM, but they are useful
243 | for explaining the script to your future self and others.
244 | 
245 | - The `module load SLEAP` line loads the latest SLEAP module and any other modules
246 | it may depend on.
247 | 
248 | - The `cd` line changes the working directory to the training job folder.
249 | This is necessary because the `train-script.sh` file contains relative paths
250 | to the  model configuration and the project file.
251 | 
252 | - The `./train-script.sh` line runs the training job (executes the contained commands).
253 | :::
254 | 
255 | :::{warning}
256 | Before submitting the job, ensure that you have permissions to execute
257 | both the batch script and the training script generated by SLEAP.
258 | You can make these files executable by running in the terminal:
259 | 
260 | ```{code-block} console
261 | $ chmod +x train-script.sh
262 | $ chmod +x train_slurm.sh
263 | ```
264 | 
265 | If the scripts are not in your working directory, you will need to specify their full paths:
266 | 
267 | ```{code-block} console
268 | $ chmod +x /path/to/train-script.sh
269 | $ chmod +x /path/to/train_slurm.sh
270 | ```
271 | :::
272 | 
273 | Now you can submit the batch script via running the following command
274 | (in the same directory as the script):
275 | ```{code-block} console
276 | $ sbatch train_slurm.sh
277 | Submitted batch job 3445652
278 | ```
279 | 
280 | You may monitor the progress of the job in various ways:
281 | 
282 | ::::{tab-set}
283 | 
284 | :::{tab-item} squeue
285 | 
286 | View the status of the queued/running jobs with [`squeue`](https://slurm.schedmd.com/squeue.html):
287 | 
288 | ```{code-block} console
289 | $ squeue --me
290 | JOBID    PARTITION  NAME     USER      ST  TIME   NODES  NODELIST(REASON)
291 | 3445652  gpu        slp_train sirmpila  R   23:11  1      gpu-sr670-20
292 | ```
293 | :::
294 | 
295 | :::{tab-item} sacct
296 | 
297 | View status of running/completed jobs with [`sacct`](https://slurm.schedmd.com/sacct.html):
298 | 
299 | ```{code-block} console
300 | $ sacct
301 | JobID           JobName  Partition    Account  AllocCPUS      State ExitCode
302 | ------------ ---------- ---------- ---------- ---------- ---------- --------
303 | 3445652      slp_train        gpu     swc-ac          2  COMPLETED      0:0
304 | 3445652.bat+      batch                swc-ac          2  COMPLETED      0:0
305 | ```
306 | Run `sacct` with some more helpful arguments.
307 | For example, you can view jobs from the last 24 hours, displaying the time
308 | elapsed and the peak memory usage in KB (MaxRSS):
309 | 
310 | ```{code-block} console
311 | $ sacct \
312 |   --starttime $(date -d '24 hours ago' +%Y-%m-%dT%H:%M:%S) \
313 |   --endtime $(date +%Y-%m-%dT%H:%M:%S) \
314 |   --format=JobID,JobName,Partition,State,Start,Elapsed,MaxRSS
315 | 
316 | JobID           JobName  Partition      State               Start    Elapsed     MaxRSS
317 | ------------ ---------- ---------- ---------- ------------------- ---------- ----------
318 | 4043595       slp_infer        gpu     FAILED 2023-10-10T18:14:31   00:00:35
319 | 4043595.bat+      batch                FAILED 2023-10-10T18:14:31   00:00:35    271104K
320 | 4043603       slp_infer        gpu     FAILED 2023-10-10T18:27:32   00:01:37
321 | 4043603.bat+      batch                FAILED 2023-10-10T18:27:32   00:01:37    423476K
322 | 4043611       slp_infer        gpu    PENDING             Unknown   00:00:00
323 | ```
324 | :::
325 | 
326 | :::{tab-item} view the logs
327 | 
328 | View the contents of standard output and error
329 | (the node name and job ID will differ in each case):
330 | ```{code-block} console
331 | $ cat slurm.gpu-sr670-20.3445652.out
332 | $ cat slurm.gpu-sr670-20.3445652.err
333 | ```
334 | :::
335 | 
336 | ::::
337 | 
338 | ```{dropdown} Out-of-memory (OOM) errors
339 | :color: warning
340 | :icon: alert-fill
341 | 
342 | If you encounter out-of-memory errors, keep in mind that there two main sources of memory usage:
343 | - CPU memory (RAM), specified via the `--mem` argument in the SLURM batch script. This is the memory used by the Python process running the training job and is shared among all the CPU cores.
344 | - GPU memory, this is the memory used by the GPU card(s) and depends on the GPU card type you requested via the `--gres gpu:1` argument in the SLURM batch script. To increase it, you can request a specific GPU card type with more GPU memory (e.g. `--gres gpu:a4500:1`). The SWC wiki provides a [list of all GPU card types and their specifications](https://wiki.ucl.ac.uk/display/SSC/CPU+and+GPU+Platform+architecture).
345 | - If requesting more memory doesn't help, you can try reducing the size of your SLEAP models. You may tweak the model backbone architecture, or play with *Input scaling*, *Max stride* and *Batch size*. See SLEAP's [documentation](https://sleap.ai/) and [discussion forum](https://github.com/talmolab/sleap/discussions) for more details.
346 | ```
347 | 
348 | (model-evaluation)=
349 | ## Model evaluation
350 | Upon successful completion of the training job, a `models` folder will have
351 | been created in the training job directory. It contains one subfolder per
352 | training run (by default prefixed with the date and time of the run).
353 | 
354 | ```{code-block} console
355 | $ cd /ceph/scratch/neuroinformatics-dropoff/SLEAP_HPC_test_data
356 | $ cd labels.v001.slp.training_job
357 | $ cd models
358 | $ ls -1
359 | 230509_141357.centered_instance
360 | 230509_141357.centroid
361 | ```
362 | 
363 | Each subfolder holds the trained model files (e.g. `best_model.h5`),
364 | their configurations (`training_config.json`) and some evaluation metrics.
365 | 
366 | ```{code-block} console
367 | $ cd 230509_141357.centered_instance
368 | $ ls -1
369 | best_model.h5
370 | initial_config.json
371 | labels_gt.train.slp
372 | labels_gt.val.slp
373 | labels_pr.train.slp
374 | labels_pr.val.slp
375 | metrics.train.npz
376 | metrics.val.npz
377 | training_config.json
378 | training_log.csv
379 | ```
380 | The SLEAP GUI on your local machine can be used to quickly evaluate the trained models.
381 | 
382 | - Select *Predict* -> *Evaluation Metrics for Trained Models...*
383 | - Click on *Add Trained Models(s)* and select the folder containing the model(s) you want to evaluate.
384 | - You can view the basic metrics on the shown table or you can also view a more detailed report (including plots) by clicking *View Metrics*.
385 | 
386 | For more detailed evaluation metrics, you can refer to [SLEAP's model evaluation notebook](https://sleap.ai/notebooks/Model_evaluation.html).
387 | 
388 | (sleap-inference)=
389 | ## Model inference
390 | By inference, we mean using a trained model to predict the labels on new frames/videos.
391 | SLEAP provides the [`sleap-track`](https://sleap.ai/guides/cli.html?#inference-and-tracking) command line utility for running inference
392 | on a single video or a folder of videos.
393 | 
394 | Below is an example SLURM batch script that contains a `sleap-track` call.
395 | ```{code-block} bash
396 | :caption: infer_slurm.sh
397 | :name: infer-slurm-sh
398 | :linenos:
399 | #!/bin/bash
400 | 
401 | #SBATCH -J slp_infer # job name
402 | #SBATCH -p gpu # partition
403 | #SBATCH -N 1   # number of nodes
404 | #SBATCH --mem 64G # memory pool for all cores
405 | #SBATCH -n 16 # number of cores
406 | #SBATCH -t 0-02:00 # time (D-HH:MM)
407 | #SBATCH --gres gpu:rtx5000:1 # request 1 GPU (of a specific kind)
408 | #SBATCH -o slurm.%x.%N.%j.out # write STDOUT
409 | #SBATCH -e slurm.%x.%N.%j.err # write STDERR
410 | #SBATCH --mail-type=ALL
411 | #SBATCH --mail-user=user@domain.com
412 | 
413 | # Load the SLEAP module
414 | module load SLEAP
415 | 
416 | # Define directories for SLEAP project and exported training job
417 | SLP_DIR=/ceph/scratch/neuroinformatics-dropoff/SLEAP_HPC_test_data
418 | VIDEO_DIR=$SLP_DIR/videos
419 | SLP_JOB_NAME=labels.v001.slp.training_job
420 | SLP_JOB_DIR=$SLP_DIR/$SLP_JOB_NAME
421 | 
422 | # Go to the job directory
423 | cd $SLP_JOB_DIR
424 | # Make a directory to store the predictions
425 | mkdir -p predictions
426 | 
427 | # Run the inference command
428 | sleap-track $VIDEO_DIR/M708149_EPM_20200317_165049331-converted.mp4 \
429 |     -m $SLP_JOB_DIR/models/231010_164307.centroid/training_config.json \
430 |     -m $SLP_JOB_DIR/models/231010_164307.centered_instance/training_config.json \
431 |     --gpu auto \
432 |     --tracking.tracker simple \
433 |     --tracking.similarity centroid \
434 |     --tracking.post_connect_single_breaks 1 \
435 |     -o predictions/labels.v001.slp.predictions.slp \
436 |     --verbosity json \
437 |     --no-empty-frames
438 | ```
439 | The script is very similar to the training script, with the following differences:
440 | - The time limit `-t` is set lower, since inference is normally faster than training. This will however depend on the size of the video and the number of models used.
441 | - The requested number of cores `n` and memory `--mem` are higher. This will depend on the requirements of the specific job you are running. It's best practice to try with a scaled-down version of your data first, to get an idea of the resources needed.
442 | - The requested GPU is of a specific kind (RTX 5000). This will again depend on the requirements of your job, as the different GPU kinds vary in GPU memory size and compute capabilities (see [the SWC wiki](https://wiki.ucl.ac.uk/display/SSC/CPU+and+GPU+Platform+architecture)).
443 | - The `./train-script.sh` line is replaced by the `sleap-track` command.
444 | - The `\` character is used to split the long `sleap-track` command into multiple lines for readability. It is not necessary if the command is written on a single line.
445 | 
446 | ::: {dropdown} Explanation of the sleap-track arguments
447 | :color: info
448 | :icon: info
449 | 
450 |  Some important command line arguments are explained below.
451 |  You can view a full list of the available arguments by running `sleap-track --help`.
452 | - The first argument is the path to the video file to be processed.
453 | - The `-m` option is used to specify the path to the model configuration file(s) to be used for inference. In this example we use the two models that were trained above.
454 | - The `--gpu` option is used to specify the GPU to be used for inference. The `auto` value will automatically select the GPU with the highest percentage of available memory (of the GPUs that are available on the machine/node)
455 | - The options starting with `--tracking` specify parameters used for tracking the detected instances (animals) across frames. See SLEAP's guide on [tracking methods](https://sleap.ai/guides/proofreading.html#tracking-method-details) for more info.
456 | - The `-o` option is used to specify the path to the output file containing the predictions.
457 | - The above script will predict all the frames in the video. You may select specific frames via the `--frames` option. For example: `--frames 1-50` or `--frames 1,3,5,7,9`.
458 | :::
459 | 
460 | You can submit and monitor the inference job in the same way as the training job.
461 | ```{code-block} console
462 | $ sbatch infer_slurm.sh
463 | $ squeue --me
464 | ```
465 | Upon completion, a `labels.v001.slp.predictions.slp` file will have been created in the job directory.
466 | 
467 | You can use the SLEAP GUI on your local machine to load and view the predictions:
468 | *File* -> *Open Project...* -> select the `labels.v001.slp.predictions.slp` file.
469 | 
470 | ## The training-inference cycle
471 | Now that you have some predictions, you can keep improving your models by repeating
472 | the training-inference cycle. The basic steps are:
473 | - Manually correct some of the predictions: see [Prediction-assisted labeling](https://sleap.ai/tutorials/assisted-labeling.html)
474 | - Merge corrected labels into the initial training set: see [Merging guide](https://sleap.ai/guides/merging.html)
475 | - Save the merged training set as `labels.v002.slp`
476 | - Export a new training job `labels.v002.slp.training_job` (you may reuse the training configurations from `v001`)
477 | - Repeat the training-inference cycle until satisfied
478 | 
479 | ## Troubleshooting
480 | 
481 | ### Problems with the SLEAP module
482 | 
483 | In this section, we will describe how to test that the SLEAP module is loaded
484 | correctly for you and that it can use the available GPUs.
485 | 
486 | Login to the HPC cluster as described [above](access-to-the-hpc-cluster).
487 | 
488 | Start an interactive job on a GPU node. This step is necessary, because we need
489 | to test the module's access to the GPU.
490 | ```{code-block} console
491 | $ srun -p gpu --gres=gpu:1 --pty bash -i
492 | ```
493 | :::{dropdown} Explain the above command
494 | :color: info
495 | :icon: info
496 | 
497 | * `-p gpu` requests a node from the 'gpu' partition (queue)
498 | * `--gres=gpu:1` requests 1 GPU of any kind
499 | *  `--pty` is short for 'pseudo-terminal'
500 | *  The `-i` stands for 'interactive'
501 | 
502 | Taken together, the above command will start an interactive bash terminal session
503 | on a node of the 'gpu' partition, equipped with 1 GPU card.
504 | :::
505 | 
506 | First, let's verify that you are indeed on a node equipped with a functional
507 | GPU, by typing `nvidia-smi`:
508 | ```{code-block} console
509 | $ nvidia-smi
510 | Wed Sep 27 10:34:35 2023
511 | +-----------------------------------------------------------------------------+
512 | | NVIDIA-SMI 525.125.06   Driver Version: 525.125.06   CUDA Version: 12.0     |
513 | |-------------------------------+----------------------+----------------------+
514 | | GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
515 | | Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
516 | |                               |                      |               MIG M. |
517 | |===============================+======================+======================|
518 | |   0  NVIDIA GeForce ...  Off  | 00000000:41:00.0 Off |                  N/A |
519 | |  0%   42C    P8    22W / 240W |      1MiB /  8192MiB |      0%      Default |
520 | |                               |                      |                  N/A |
521 | +-------------------------------+----------------------+----------------------+
522 | 
523 | +-----------------------------------------------------------------------------+
524 | | Processes:                                                                  |
525 | |  GPU   GI   CI        PID   Type   Process name                  GPU Memory |
526 | |        ID   ID                                                   Usage      |
527 | |=============================================================================|
528 | |  No running processes found                                                 |
529 | +-----------------------------------------------------------------------------+
530 | ```
531 | Your output should look similar to the above. You will be able to see the GPU
532 | name, temperature, memory usage, etc. If you see an error message instead,
533 | (even though you are on a GPU node) please contact the SWC Scientific Computing team.
534 | 
535 | Next, load the SLEAP module.
536 | ```{code-block} console
537 | $ module load SLEAP
538 | Loading SLEAP/2024-08-14
539 |   Loading requirement: cuda/11.8
540 | ```
541 | 
542 | To verify that the module was loaded successfully:
543 | ```{code-block} console
544 | $ module list
545 | Currently Loaded Modulefiles:
546 |  1) SLEAP/2024-08-14
547 | ```
548 | You can essentially think of the module as a centrally installed conda environment.
549 | When it is loaded, you should be using a particular Python executable.
550 | You can verify this by running:
551 | 
552 | ```{code-block} console
553 | $ which python
554 | /ceph/apps/ubuntu-20/packages/SLEAP/2024-08-14/bin/python
555 | ```
556 | 
557 | Finally we will verify that the `sleap` python package can be imported and can
558 | 'see' the GPU. We will mostly just follow the
559 | [relevant SLEAP instructions](https://sleap.ai/installation.html#testing-that-things-are-working).
560 | First, start a Python interpreter:
561 | ```{code-block} console
562 | $ python
563 | ```
564 | Next, run the following Python commands:
565 | 
566 | ::: {warning}
567 | The `import sleap` command may take some time to run (more than a minute).
568 | This is normal. Subsequent imports should be faster.
569 | :::
570 | 
571 | ```{code-block} pycon
572 | >>> import sleap
573 | 
574 | >>> sleap.versions()
575 | SLEAP: 1.3.3
576 | TensorFlow: 2.8.4
577 | Numpy: 1.21.6
578 | Python: 3.7.12
579 | OS: Linux-5.4.0-109-generic-x86_64-with-debian-bullseye-sid
580 | 
581 | >>> sleap.system_summary()
582 | GPUs: 1/1 available
583 |   Device: /physical_device:GPU:0
584 |          Available: True
585 |         Initialized: False
586 |      Memory growth: None
587 | 
588 | >>> import tensorflow as tf
589 | 
590 | >>> print(tf.config.list_physical_devices('GPU'))
591 | [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
592 | 
593 | >>> tf.constant("Hello world!")
594 | <tf.Tensor: shape=(), dtype=string, numpy=b'Hello world!'>
595 | ```
596 | 
597 | If all is as expected, you can exit the Python interpreter, and then exit the GPU node
598 | ```{code-block} pycon
599 | >>> exit()
600 | ```
601 | ```{code-block} console
602 | $ exit()
603 | ```
604 | If you encounter troubles with using the SLEAP module, contact
605 | Niko Sirmpilatze of the SWC [Neuroinformatics Unit](https://neuroinformatics.dev/).
606 | 
607 | To completely exit the HPC cluster, you will need to type `exit` or
608 | `logout` until you are back to the terminal prompt of your local machine.
609 | See [Set up SSH for the SWC HPC cluster](../programming/SSH-SWC-cluster.md)
610 | for more information.
611 | 


--------------------------------------------------------------------------------
/docs/source/data_analysis/index.md:
--------------------------------------------------------------------------------
 1 | # Data Analysis
 2 | 
 3 | Guides related to the analysis of neuroscientific data, spanning a wide range of data types such as electrophysiology, behaviour, calcium imaging, histology, etc. The focus may be on the use of specific software tools, or on more general analysis tasks and concepts.
 4 | 
 5 | ```{toctree}
 6 | :maxdepth: 1
 7 | 
 8 | ../electrophysiology/index
 9 | HPC-module-SLEAP
10 | ```
11 | 


--------------------------------------------------------------------------------
/docs/source/electrophysiology/community.md:
--------------------------------------------------------------------------------
 1 | # Community
 2 | 
 3 | This site is maintained by the
 4 | [NeuroInformatics Unit](https://neuroinformatics.dev/).
 5 | Please don't hesitate to contact us
 6 | (in particular Joe Ziminski) on Slack or
 7 | [Zulip chat](https://neuroinformatics.zulipchat.com/)
 8 | with any questions or feedback.
 9 | 
10 | For information and advice on electrophysiology from the SWC community, the
11 | best place to go is the `#forum-extracellular-ephys` channel on the SWC Slack.
12 | 
13 | Outside the SWC, you can address any questions or issues about
14 | [SpikeInterface](https://github.com/SpikeInterface)
15 | by raising an issue on their
16 | [GitHub repository](https://github.com/SpikeInterface/spikeinterface/issues).
17 | They are very friendly and
18 | happy to answer any questions!
19 | 
20 | In addition, the
21 | [Neuropixels](https://neuropixelsgroup.slack.com/)
22 | Slack channel is a great resource, with an active community discussing
23 | extracellular electrophysiology acquisition and analysis.
24 | 


--------------------------------------------------------------------------------
/docs/source/electrophysiology/example_pipelines/examples/README.rst:
--------------------------------------------------------------------------------
 1 | :orphan:
 2 | 
 3 | Python Pipelines
 4 | ================
 5 | 
 6 | Multiple probes (Cambridge Neurotech)
 7 | -------------------------------------
 8 | 
 9 | Dammy Onih
10 | (`Akrami lab <https://www.sainsburywellcome.org/web/groups/akrami-lab>`__)
11 | is running a multi-session paradigm with two
12 | `Cambridge Neurotech <https://www.cambridgeneurotech.com/neural-probes>`__
13 | probes. In this multi-session task, mice learn a statistical learning paradigm to a
14 | reward-associated auditory stimulus, recording from the hippocampus.
15 | The pipeline uses SpikeInterface for preprocessing,
16 | sorting, and analysis and can be found `here <https://github.com/AOONIH/ephys/tree/master>`__.
17 | 
18 | The IBL analysis pipeline
19 | -------------------------
20 | 
21 | Nate Miska (`Mrsic-Flogel lab <https://www.sainsburywellcome.org/web/groups/mrsic-flogel-lab>`__)
22 | is a member of the
23 | `International Brain Laboratory
24 | (IBL) <https://www.internationalbrainlab.com/>`_
25 | running the
26 | `IBL's standardised behavioural task <https://elifesciences.org/articles/63711>`_
27 | with acute Neuropixels 1.0 recordings. Details of the
28 | `analysis pipeline code <https://github.com/int-brain-lab/ibl-neuropixel>`__
29 | on the IBL data management system can be found
30 | `here <https://int-brain-lab.github.io/iblenv/index.html>`_.
31 | 
32 | Standalone Scripts
33 | ------------------
34 | 


--------------------------------------------------------------------------------
/docs/source/electrophysiology/example_pipelines/examples/notes-on-docs-structure.txt:
--------------------------------------------------------------------------------
1 | Here, the 'Python Examples' page is rendered as a sphinx gallery. The
2 | first section is python examples that link to outside repos. The second
3 | section is the sphinx gallery entries. The 'README.rst' is the
4 | 'Python Examples' page, it must be called 'README.rst' for sphinx-related
5 | reasons.
6 | 


--------------------------------------------------------------------------------
/docs/source/electrophysiology/example_pipelines/examples/sara_mederos.py:
--------------------------------------------------------------------------------
  1 | """
  2 | NP2.0 in SpikeInterface
  3 | =======================
  4 | 
  5 | Sara Mederos
  6 | `(Hofer Lab) <https://www.sainsburywellcome.org/web/groups/hofer-lab>`__
  7 | performs chronic electrophysiological
  8 | recordings from subcortical and cortical areas using 4-shank
  9 | Neuropixels 2.0 probes (acquired using
 10 | `Open Ephys <https://open-ephys.org/>`__).
 11 | Recordings are conducted in freely moving mice during behavioral
 12 | paradigms that assess the cognitive control of innate behaviors.
 13 | A pipeline used for pre-processing, sorting, and quality metrics
 14 | can be found below.
 15 | """
 16 | 
 17 | from spikeinterface import extract_waveforms
 18 | from spikeinterface.extractors import read_openephys
 19 | from spikeinterface.preprocessing import phase_shift, bandpass_filter, common_reference
 20 | from spikeinterface.sorters import run_sorter
 21 | from spikeinterface.qualitymetrics import compute_quality_metrics
 22 | from pathlib import Path
 23 | from probeinterface.plotting import plot_probe
 24 | import matplotlib.pyplot as plt
 25 | from spikeinterface import curation
 26 | from spikeinterface.widgets import plot_timeseries
 27 | import numpy as np
 28 | 
 29 | 
 30 | data_path = Path(
 31 |     r"/ceph/.../100323/2023-10-03_18-57-09/Record Node 101/experiment1"
 32 | )
 33 | output_path = Path(
 34 |     r"/ceph/.../derivatives/100323/"
 35 | )
 36 | 
 37 | show_probe = False
 38 | show_preprocessing = True
 39 | 
 40 | # This reads OpenEphys 'Binary' format. It determines the
 41 | # probe using probeinterface.read_openephys, which reads `settings.xml`
 42 | # and requires the NP_PROBE field is filled.
 43 | raw_recording = read_openephys(data_path)
 44 | 
 45 | if show_probe:
 46 |     probe = raw_recording.get_probe()
 47 |     plot_probe(probe)
 48 |     plt.show()
 49 | 
 50 | # Run time shift (multiplex correction) and filter
 51 | shifted_recording = phase_shift(raw_recording)
 52 | filtered_recording = bandpass_filter(shifted_recording, freq_min=300, freq_max=6000)
 53 | 
 54 | # Perform median average filter by shank
 55 | channel_group = filtered_recording.get_property("group")
 56 | split_channel_ids = [
 57 |     filtered_recording.get_channel_ids()[channel_group == idx]
 58 |     for idx in np.unique(channel_group)
 59 | ]
 60 | preprocessed_recording = common_reference(
 61 |     filtered_recording, reference="global", operator="median", groups=split_channel_ids
 62 | )
 63 | 
 64 | if show_preprocessing:
 65 |     recs_grouped_by_shank = preprocessed_recording.split_by("group")
 66 |     for rec in recs_grouped_by_shank:
 67 |         plot_timeseries(
 68 |             preprocessed_recording,
 69 |             order_channel_by_depth=True,
 70 |             time_range=(3499, 3500),
 71 |             return_scaled=True,
 72 |             show_channel_ids=True,
 73 |             mode="map",
 74 |         )
 75 |         plt.show()
 76 | 
 77 | # Run the sorting
 78 | sorting = run_sorter(
 79 |     "kilosort3",
 80 |     preprocessed_recording,
 81 |     singularity_image=True,
 82 |     output_folder=(output_path / "sorting").as_posix(),
 83 |     car=False,
 84 |     freq_min=150,
 85 | )
 86 | 
 87 | # Curate the sorting output and extract waveforms. Calculate
 88 | # quality metrics from the waveforms.
 89 | sorting = sorting.remove_empty_units()
 90 | 
 91 | sorting = curation.remove_excess_spikes(sorting, preprocessed_recording)
 92 | 
 93 | # The way spikeinterface is set up means that quality metrics are
 94 | # calculated on the spikeinterface-preprocessed, NOT the kilosort
 95 | # preprocessed (i.e. drift-correct data).
 96 | # see https://github.com/SpikeInterface/spikeinterface/pull/1954 for details.
 97 | waveforms = extract_waveforms(
 98 |     preprocessed_recording,
 99 |     sorting,
100 |     folder=(output_path / "postprocessing").as_posix(),
101 |     ms_before=2,
102 |     ms_after=2,
103 |     max_spikes_per_unit=500,
104 |     return_scaled=True,
105 |     sparse=True,
106 |     peak_sign="neg",
107 |     method="radius",
108 |     radius_um=75,
109 | )
110 | 
111 | quality_metrics = compute_quality_metrics(waveforms)
112 | quality_metrics.to_csv(output_path / "postprocessing")
113 | 


--------------------------------------------------------------------------------
/docs/source/electrophysiology/example_pipelines/index.md:
--------------------------------------------------------------------------------
 1 | # Examples
 2 | 
 3 | This page shares extracellular electrophysiology pipelines used by researchers across the SWC.
 4 | 
 5 | These examples aim to demonstrate the possibilities for ephys
 6 | pipeline building rather than serve as drop-in tools for
 7 | data processing. However, please feel free to
 8 | contact any listed researchers to ask questions
 9 | or seek advice!
10 | 
11 | ::::{grid} 1 2 2 2
12 | :gutter: 3
13 | 
14 | :::{grid-item-card} {fas}`chart-simple;sd-text-primary` Python Examples
15 | :link: auto_examples_ephys/index
16 | :link-type: doc
17 | :::
18 | 
19 | :::{grid-item-card} {fas}`chart-simple;sd-text-primary` MATLAB Examples
20 | :link: matlab_examples
21 | :link-type: doc
22 | :::
23 | 
24 | ::::
25 | 
26 | 
27 | ```{toctree}
28 | :maxdepth: 2
29 | :hidden:
30 | 
31 | auto_examples_ephys/index
32 | matlab_examples
33 | 
34 | ```
35 | 


--------------------------------------------------------------------------------
/docs/source/electrophysiology/example_pipelines/matlab_examples.rst:
--------------------------------------------------------------------------------
 1 | .. _matlab_examples:
 2 | 
 3 | :orphan:
 4 | 
 5 | Matlab Pipelines
 6 | =================
 7 | 
 8 | Multi-probe Neuropixels 1.0
 9 | ---------------------------
10 | 
11 | Andrei Khilkevich
12 | (`Mrsic-Flogel lab <https://www.sainsburywellcome.org/web/groups/mrsic-flogel-lab>`__)
13 | performs acute recordings in a visual change-detection decision-making task.
14 | Two implanted probes (NP 1.0) in a head-fixed preparation are used to
15 | simultaneously record different regions across the whole brain (using `SpikeGLX <https://github.com/billkarsh/SpikeGLX>`__).
16 | Their pipeline is available
17 | `here <https://github.com/BaselLaserMouse/Khilkevich_Lohse_2024/tree/main/NPX-postprocessing-pipeline>`__.
18 | 
19 | Automated pipeline for multimodal integration
20 | ---------------------------------------------
21 | 
22 | Mateo Velez-Fort
23 | (`Margrie Lab <https://www.sainsburywellcome.org/web/groups/margrie-lab>`__)
24 | investigates the integration of visual
25 | and vestibular information in the visual cortex with the
26 | Margrie lab's 'Translocator' setup. They use
27 | Neuropixels 2.0 (acquired using `SpikeGLX <https://github.com/billkarsh/SpikeGLX>`__
28 | for acute recordings from the primary visual cortex and other cortical areas as the head-fixed
29 | mouse is physically displaced along a track.
30 | The pipeline is available
31 | `here <https://github.com/SainsburyWellcomeCentre/rc2_analysis>`__.
32 | 


--------------------------------------------------------------------------------
/docs/source/electrophysiology/index.md:
--------------------------------------------------------------------------------
 1 | # Electrophysiology
 2 | 
 3 | This page serves as an information source for researchers using
 4 | extracellular electrophysiology at the SWC. It contains
 5 | useful [resources](resources.md) for learning, as well as
 6 | [example pipelines](example_pipelines/index)
 7 | used by researchers in the building.
 8 | 
 9 | We encourage all types of contributions. If you'd like to contribute a
10 | guide or pipeline, please don't hesitate to get in touch! See the
11 | [community](community.md)
12 | page for more details.
13 | 
14 | This page is maintained by the [Neuroinformatics Unit](https://neuroinformatics.dev).
15 | We are focused on building community tools to automate
16 | and standardise electrophysiology analysis.
17 | 
18 | 
19 | ::::{grid} 1 3 3 3
20 | :gutter: 3
21 | 
22 | :::{grid-item-card} {fas}`chart-simple;sd-text-primary` Resources
23 | :link: resources
24 | :link-type: doc
25 | :::
26 | 
27 | :::{grid-item-card} {fas}`chart-simple;sd-text-primary` Example Pipelines
28 | :link: example_pipelines/index
29 | :link-type: doc
30 | :::
31 | 
32 | :::{grid-item-card} {fas}`chart-simple;sd-text-primary` Community
33 | :link: community
34 | :link-type: doc
35 | :::
36 | 
37 | ::::
38 | 
39 | ```{toctree}
40 | :maxdepth: 2
41 | :hidden:
42 | 
43 | resources
44 | example_pipelines/index
45 | community
46 | ```
47 | 


--------------------------------------------------------------------------------
/docs/source/electrophysiology/resources.md:
--------------------------------------------------------------------------------
  1 | # Resources
  2 | 
  3 | This section includes articles and videos providing
  4 | background and technical detail on extracellular
  5 | electrophysiology data preprocessing and analysis.
  6 | 
  7 | This section is by no means exhaustive, please
  8 | feel free to [get in contact](community.md) to suggest
  9 | additions to this page.
 10 | 
 11 | For pipeline building, we recommend
 12 | [SpikeInterface](https://github.com/SpikeInterface/spikeinterface),
 13 | an open source community toolkit for extracellular electrophysiology,
 14 | for preprocessing and spike sorting. To begin
 15 | building your pipeline, the
 16 | [SpikeInterface](https://spikeinterface.readthedocs.io/en/stable/)
 17 | documentation is a good starting point.
 18 | 
 19 | ## General Introduction
 20 | 
 21 | Below are a selection of papers that give a history
 22 | and overview of the extracellular electrophysiology landscape:
 23 | 
 24 | [Steinmetz NA et al. (2018). Challenges and opportunities for large-scale electrophysiology with Neuropixels probes. *Current Opinion in Neurobiology*.](https://pubmed.ncbi.nlm.nih.gov/29444488/)
 25 | 
 26 | [Buccino AP et al. (2022). Spike sorting: new trends and challenges of the era of high-density probes. *Progress in Biomedical Engineering*.](https://iopscience.iop.org/article/10.1088/2516-1091/ac6b96/meta)
 27 | 
 28 | [Rey HG et al. (2015). Past, present and future of spike sorting techniques. *Brain Research Bulletin*.](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4674014/)
 29 | 
 30 | [Carlson D et al. (2019). Continuing progress of spike sorting in the era of big data. *Current Opinion in Neurobiology*](https://pubmed.ncbi.nlm.nih.gov/30856552/)
 31 | 
 32 | ## Technical Introduction
 33 | 
 34 | This section includes more technical resources on the different stages of
 35 | extracellular electrophysiology analysis.
 36 | 
 37 | A particularly useful resource is the
 38 | [Neuropixels](https://www.ucl.ac.uk/neuropixels/courses) course, with their videos published online
 39 | (e.g. [2023](https://www.ucl.ac.uk/neuropixels/training/2023-neuropixels-course)).
 40 | While these are targeted towards Neuropixels users, they are
 41 | valuable resources for any researcher
 42 | approaching electrophysiology preprocessing and analysis.
 43 | 
 44 | ### Preprocessing
 45 | 
 46 | The [IBL white paper](https://figshare.com/articles/online_resource/Spike_sorting_pipeline_for_the_International_Brain_Laboratory/19705522)
 47 | contains a clearly written overview of common preprocessing steps. Similarly,
 48 | [Bill Karsh's guide](https://billkarsh.github.io/SpikeGLX/help/catgt_tshift/catgt_tshift/) on
 49 | SpikeGLX preprocessing tools gives a useful overview.
 50 | 
 51 | [de Cheveigné & Nelken (2019)](https://pubmed.ncbi.nlm.nih.gov/30998899/)
 52 | provide a technical treatment of digital filtering, a key step in preprocessing and analysis.
 53 | 
 54 | ### Spike Sorting
 55 | 
 56 | [This video on Spike Sorting with Christophe Pouzat](https://www.youtube.com/watch?v=vSydfDvsewY),
 57 | provides an excellent overview of the spike-sorting problem.
 58 | 
 59 | This article provides a more detailed introduction to spike sorting and associated quality metrics:
 60 | 
 61 | [Hill DN et al, (2007). Spike Sorting. In *Observed Brain Dynamics by P. P. Mitra and H. Bokil*.](https://neurophysics.ucsd.edu/publications/obd_ch3_2.pdf)
 62 | 
 63 | It is also recommended to check out the
 64 | papers of existing spike sorting algorithms. A list of the main
 65 | spike sorters can be found
 66 | [on the SpikeInterface website](https://spikeinterface.readthedocs.io/en/latest/modules/sorters.html#supported-spike-sorters).
 67 | 
 68 | 
 69 | ### Quality Metrics and Manual Curation
 70 | 
 71 | Assessing the quality of spike-sorting is a key to producing high-quality data.
 72 | 
 73 | These two papers provide a nice introduction to quality metrics for assessing
 74 | spike sorting outputs:
 75 | 
 76 | [Hill DN et al. (2011). Quality Metrics to Accompany Spike Sorting of Extracellular Signals. *Journal of Neuroscience*.](https://www.jneurosci.org/content/31/24/8699)
 77 | 
 78 | [Harris KD et al. (2016). Improving data quality in neuronal population recordings. *Nature Neuroscience*.](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5244825/)
 79 | 
 80 | [Phy](https://github.com/cortex-lab/phy)
 81 | is the most popular tool for performing manual curation of spike sorting results.
 82 | A great [guide by Steve Lenzi](https://phy.readthedocs.io/en/latest/sorting_user_guide/) takes you
 83 | through the key steps for manual curation.
 84 | 
 85 | More recently, advances in the automating curation has been made in the
 86 | [Bombcell package](https://github.com/Julie-Fabre/bombcell).
 87 | 
 88 | SpikeInterface also maintains a set of quality metrics,
 89 | [explained in detail](https://spikeinterface.readthedocs.io/en/latest/modules/qualitymetrics.html)
 90 | in their documentation.
 91 | 
 92 | ## SpikeInterface
 93 | 
 94 | Visit the SpikeInterface
 95 | [GitHub](https://github.com/SpikeInterface/spikeinterface)
 96 | and
 97 | [Documentation](https://spikeinterface.readthedocs.io/en/stable/)
 98 | to get started.
 99 | 
100 | ## Other Community Tools
101 | 
102 | ### Analysis
103 | 
104 | SpikeInterface
105 | is mainly focused on preprocessing, spike sorting and quality metrics.
106 | [Pynapple](https://github.com/pynapple-org/pynapple),
107 | [Elephant]( https://neuralensemble.org/elephant/),
108 | and [Nemos](https://github.com/flatironinstitute/nemos)
109 | all provide useful  toolboxes for analysing data post-sorting.
110 | 
111 | The [SpikeForest](https://spikeforest.flatironinstitute.org/)
112 | project is an excellent resource for assessing the performance of
113 | different spike-sorting algorithms across probe types and brain regions.
114 | 
115 | ### Pipelines
116 | 
117 | [The Allen spike-sorting pipeline](https://github.com/AllenInstitute/ecephys_spike_sorting)
118 | 
119 | [The IBL sorting pipeline](https://github.com/int-brain-lab/ibl-neuropixel)
120 | 
121 | For working with NeuroPixels, [Neuropixels Utils](https://djoshea.github.io/neuropixel-utils/) package
122 | (MATLAB) and [NeuroPyxels](https://github.com/m-beau/NeuroPyxels) (Python).
123 | 


--------------------------------------------------------------------------------
/docs/source/index.md:
--------------------------------------------------------------------------------
 1 | # Neuroinformatics Unit HowTo
 2 | 
 3 | On this website you can find long-form how-to guides as well as collections of small tips and tricks related to data analysis and software development in neuroscience.
 4 | 
 5 | The website is maintained by members of the [Neuroinformatics Unit](https://neuroinformatics.dev). The information is primarily aimed at researchers at the [Sainsbury Wellcome Centre (SWC)](https://www.sainsburywellcome.org/web/) and [Gatsby Computational Neuroscience Unit (GCNU)](https://www.ucl.ac.uk/gatsby/gatsby-computational-neuroscience-unit), though most of it should be useful to neuroscientists in general.
 6 | 
 7 | 
 8 | ## Main sections
 9 | The guides are organised into three main sections:
10 | 
11 | ::::{grid} 1 2 2 3
12 | :gutter: 3
13 | 
14 | :::{grid-item-card} {fas}`chart-simple;sd-text-primary` Data Analysis
15 | :link: data_analysis/index
16 | :link-type: doc
17 | 
18 | Guides related to tools and approaches for analysing neuroscientific data.
19 | :::
20 | 
21 | :::{grid-item-card} {fas}`code;sd-text-primary` Programming
22 | :link: programming/index
23 | :link-type: doc
24 | 
25 | General coding and software development issues.
26 | :::
27 | 
28 | :::{grid-item-card} {fas}`lock-open;sd-text-primary` Open Science
29 | :link: open_science/index
30 | :link-type: doc
31 | 
32 | How to effectively share your code and data.
33 | :::
34 | 
35 | ::::
36 | 
37 | 
38 | ```{toctree}
39 | :maxdepth: 2
40 | :hidden:
41 | 
42 | data_analysis/index
43 | programming/index
44 | open_science/index
45 | ```
46 | 


--------------------------------------------------------------------------------
/docs/source/open_science/Data-sharing.md:
--------------------------------------------------------------------------------
 1 | # Data Sharing
 2 | 
 3 | ## Background
 4 | 
 5 | Data is key to neuroscience, and it should be shared for maximal utility. There is increasing pressure from funders and publishers to share data alongside publications:
 6 | 
 7 | **Wellcome:**
 8 | 
 9 | > “We expect our researchers to maximise the availability of research data, software and materials with as few restrictions as possible. As a minimum, the data underpinning research papers should be made available to other researchers at the time of publication, as well as any original software that is required to view datasets or to replicate analyses.”
10 | 
11 | **Cell Press:**
12 | 
13 | > “All datasets that are composed of standardized datatypes and reported in Cell Press papers must be deposited in a datatype-specific, Cell-Press-recommended repository before a paper is accepted for publication.”
14 | 
15 | > “All original code must be either deposited in a repository that mints DOIs or included in the supplemental information before a paper is accepted for publication.”
16 | 
17 | 
18 | ## Requirements
19 | 
20 | Currently, SWC does not have any specific code and data sharing requirements, but your funder or publisher may do.
21 | 
22 | **Ideally, all data and code will be shared in such a way that any other researcher could easily reproduce key figures from a publication, and build on those data and software in their own research.**
23 | 
24 | 
25 | ## Code
26 | Original code can be hosted in a public GitHub/GitLab repository, hosted on an organisational account (i.e. not an individual account). This could be the [SWC organisation](https://github.com/SainsburyWellcomeCentre), or that of a particular team, lab or collaboration. To enable reuse, code should include an [appropriate license](licensing-target).
27 | 
28 | To ensure a permanent archive of the code associated with a publication a [DOI](https://www.doi.org/) should be created (e.g. using [Zenodo](https://docs.github.com/en/repositories/archiving-a-github-repository/referencing-and-citing-content)).
29 | 
30 | ## Data
31 | ### Repository
32 | There are [many repositories](repo-table) that will accept the types of data generated by SWC. They come into three categories, general, neuroscience-specific and modality-specific.  If you do not have a preference, we recommended:
33 | 
34 | **For small (<20GB) datasets:**
35 | Use the existing [SWC section of the UCL FigShare instance](https://rdr.ucl.ac.uk/The_Sainsbury_Wellcome_Centre). This is easy to use, and is supported centrally by UCL
36 | 
37 | **For larger datasets:**
38 | Create a GIN repository, ideally within the [SWC organisation](https://gin.g-node.org/SainsburyWellcomeCentre). GIN uses Git and provides a versioning system, making it particularly well suited for ongoing research projects.
39 | 
40 | **Specific modalities:**
41 | In some cases, there may be a modality or field-specific repository that would be best suited for the data (or mandated by the publisher), some examples include:
42 | - [Brain Image Library](http://www.brainimagelibrary.org/) - Large brain image datasets
43 | - [DANDI](https://dandiarchive.org/) - Electrophysiology, optophysiology, behavioural time-series (must be in NWB format)
44 | - [BioImage Archive](https://www.ebi.ac.uk/bioimage-archive/) - Biological images
45 | 
46 | In all cases, using a repository that can create a [DOI](https://www.doi.org/) will help increase citations.
47 | 
48 | ### Structure
49 | There is no single best way to structure your data, but it must be in a form that allows users to easily reuse it for their own purposes. This may be by adopting specific file formats (such as [NeuroData Without Borders](https://www.nwb.org/)). Alternatively the data could be organised within a standardised project folder structure. Many labs have their own system, but those starting from scratch may wish to use the [NeuroBlueprint data structure](https://neuroblueprint.neuroinformatics.dev/).
50 | 
51 | **However the data is structured, all relevant metadata must be included with the raw data.**
52 | 
53 | ## Assistance
54 | If you have any technical questions about sharing data or code, please contact adam.tyson@ucl.ac.uk.
55 | 
56 | (repo-table)=
57 | ## Reference of potential repositories for neuroscience data
58 | | Repository                         | URL                                                 | Domain                                                                                                |
59 | | ---------------------------------- |-----------------------------------------------------| ----------------------------------------------------------------------------------------------------- |
60 | | Globus                             |                                                     | Anything                                                                                              |
61 | | figshare                           | https://rdr.ucl.ac.uk/The_Sainsbury_Wellcome_Centre | Anything                                                                                              |
62 | | Zenodo                             | https://zenodo.org/                                 | Anything                                                                                              |
63 | | Dryad                              | https://datadryad.org                               | Anything                                                                                              |
64 | | GIN (German Neuroinformatics Node) | https://gin.g-node.org/                             | Neuroscience                                                                                          |
65 | | EBRAINS                            | https://ebrains.eu/service/share-data               | Neuroscience                                                                                          |
66 | | Open Source Brain                  | https://www.v2.opensourcebrain.org/                 | Any neuroscience data (data must be hosted elsewhere)                                                 |
67 | | Brain Image Library                | http://www.brainimagelibrary.org/                   | Large brain image datasets                                                                            |
68 | | Image data resource                | https://idr.openmicroscopy.org/                     | Reference image datasets                                                                              |
69 | | BioImage Archive                   | https://www.ebi.ac.uk/bioimage-archive/             | Biological images                                                                                     |
70 | | DANDI                              | https://dandiarchive.org/                           | Electrophysiology, optophysiology, behavioural time-series and images from immunostaining experiments |
71 | | NeMO                               | https://nemoarchive.org/                            | Omic data from the BRAIN Initiative (& others)                                                        |
72 | | Openneuro                          | https://openneuro.org/                              | BIDS-compliant MRI, PET, EEG etc                                                                      |
73 | | CRCNS                              | https://crcns.org/                                  | Computational neuroscience                                                                            |
74 | | BrainGlobe                         | https://gin.g-node.org/BrainGlobe/atlases           | Brain Atlases                                                                                         |
75 | | NeuroMorpho                        | https://neuromorpho.org/                            | Neuronal morphologies                                                                                 |
76 | | Cell Image Library                 | http://ccdb.ucsd.edu/home                           | Cell images                                                                                           |
77 | | ModelDB                            | https://modeldb.science/                            | Computational neuroscience models                                                                     |
78 | 


--------------------------------------------------------------------------------
/docs/source/open_science/GIN-repositories.md:
--------------------------------------------------------------------------------
  1 | (target-create-gin-repo)=
  2 | # Create a GIN repository for your dataset
  3 | 
  4 | [GIN](https://gin.g-node.org/G-Node/Info/wiki) (hosted by the German Neuroinformatics Node) is a free and open data management system designed for neuroscientific data.
  5 | 
  6 | It is web-accessible, based on [`git`](https://git-scm.com/) and [`git-annex`](https://git-annex.branchable.com/), and allows you to keep your data in sync, backed up and easily accessible.
  7 | 
  8 | Below we explain the main user workflows in GIN.
  9 | 
 10 | 
 11 | ## Preparatory steps - do only once
 12 | 
 13 | We need to do these steps only the first time we use GIN's command-line interface (CLI) on our machine.
 14 | 
 15 | 1. Create [a GIN account](https://gin.g-node.org/user/sign_up).
 16 | 2. [Download GIN CLI](https://gin.g-node.org/G-Node/Info/wiki/GIN+CLI+Setup#setup-gin-client) and set it up by running:
 17 |    ```
 18 |    $ gin login
 19 |    ```
 20 |    You will be prompted for your GIN username and password.
 21 | 3. Confirm that everything is working properly by typing:
 22 |    ```
 23 |    $ gin --version
 24 |    ```
 25 | 
 26 | ## Create a GIN repository
 27 | 
 28 | To create a local and a remote GIN repository, follow these steps:
 29 | 
 30 | 1. **Log in to the GIN server**
 31 | 
 32 |    Before running any `gin` commands, make sure you are logged in to your account by running:
 33 | 
 34 |    ```
 35 |    $ gin login
 36 |    ```
 37 | 
 38 | :::{tip}
 39 | In Unix-like systems (Ubuntu, MacOS), you may need `sudo` permissions for some of the following `gin` commands. If so, remember to prepend all commands with `sudo`.
 40 | :::
 41 | 
 42 | 2. **Initialise a GIN repository**
 43 | 
 44 |     ::::{tab-set}
 45 | 
 46 |     :::{tab-item} In a new directory
 47 | 
 48 |     - Create a new GIN repository locally and on the GIN server:
 49 | 
 50 |       ```
 51 |       $ gin create <repository-name>
 52 |       ```
 53 |       This will create a repository called `<repository-name>` on the GIN server under your user account, and a directory with the same name in the current working directory.
 54 | 
 55 |       <details><summary> <b> OR alternatively: </b> </summary>
 56 | 
 57 |       Create a repository in the GIN server [from the browser](https://gin.g-node.org/repo/create), and clone (retrieve) it to your local workspace:
 58 | 
 59 |       ```
 60 |       $ gin get <username>/<remote-repository-name>
 61 |       ```
 62 | 
 63 |       </details>
 64 | 
 65 |     - Next, move or copy files to the newly created directory to add data to the local GIN repository.
 66 |     :::
 67 | 
 68 |     :::{tab-item} In an existing directory
 69 |     - Move to the relevant directory using `cd`.
 70 | 
 71 |     - Create a new repository on the GIN server and locally in the current working directory:
 72 | 
 73 |       ```
 74 |       $ gin create --here <repository-name>
 75 |       ```
 76 |       This will create a repository named `<repository-name>` on the GIN server under your user account and link it to the current working directory.
 77 | 
 78 |       <details><summary> <b> OR alternatively: </b> </summary>
 79 | 
 80 |       - Initialise the current working directory as a GIN repository:
 81 | 
 82 |         ```
 83 |         $ gin init
 84 |         ```
 85 | 
 86 |       - Add a remote:
 87 | 
 88 |         ```
 89 |         $ gin add-remote <remote-name> <remote-repository-location>
 90 |         ```
 91 | 
 92 |         where `<remote-name>` is the name you want to give to the remote (e.g. `origin`) and `<remote-repository-location>` is the location of the data store, which should be in the form of alias:path or server:path (e.g. `gin:<username>/<remote-repository-name>`).
 93 | 
 94 |       - If the remote GIN repository doesn't exist, you will be prompted to either create it, add simply the remote address, or abort.
 95 |       </details>
 96 |     :::
 97 |     ::::
 98 | 
 99 | 
100 | :::{note}
101 | Initialising the GIN local repository (with `gin create` or `gin init`) will create a hidden `.git` directory under the local repository directory. The local repository excluding this `.git` folder is what we will later call the _working directory_.
102 | :::
103 | 
104 | 
105 | 3. **Add files to the GIN remote repository**
106 | 
107 |    It is good practice to keep a record of the changes in the repository through commit messages. To keep a useful and clean commit history, it is also recommended to make small commits by selecting a subset of the files.
108 | 
109 |    - To add a record of the current state of a local repository, run:
110 | 
111 |      ```
112 |      $ gin commit --message <message> <filename>
113 |      ```
114 | 
115 |      You can replace the `<filename>` above by an expression with a wildcard (e.g., `*.png` to include all png files). It can also be a list of files separated by whitespaces. A `<filename>` equal to `.` will include all files with changes. See the full syntax for `gin commit` [here](https://gin.g-node.org/G-Node/Info/wiki/GIN+CLI+Help#record-changes-in-local-repository).
116 | 
117 |    - To upload all local changes to the remote GIN repository, run:
118 | 
119 |      ```
120 |      $ gin upload <filename>
121 |      ```
122 | 
123 |      `<filename>` accepts the same inputs as in `gin commit`. You can run an upload command after a few commits (so not necessarily after every commit).
124 | 
125 |      You can use the flag `--to <remote-name>` to upload the changes to a specific remote. To show the remotes accessible to your GIN account, run `gin remotes`. See the full syntax for `gin upload` [here](https://gin.g-node.org/G-Node/Info/wiki/GIN+CLI+Help#upload-local-changes-to-a-remote-repository).
126 | 
127 |      If the set of files in the `gin upload` command includes files that have been changed locally but have not been committed, they will be automatically committed when uploading.
128 | 
129 |      After running `gin upload`, the data will be uploaded to the GIN server and it will be possible to retrieve it later from there. However, notice the upload command sends all changes made in the directory to the GIN server, including deletions, renames, etc. Therefore, if you delete files from the directory on your computer and perform a `gin upload`, the file will be removed from the server as well. Such changes can be synchronized by simply running `git upload` (i.e., without specifying any files). See further details in the [GIN docs](https://gin.g-node.org/G-Node/Info/wiki/GIN+CLI+Usage+Tutorial#basic-workflow-only-using-gin).
130 | 
131 | 4. **Consider whether to lock the data**
132 | 
133 |    You may want to lock the data to save space locally or to prevent editing in the future — see the section on [File locking](#file-locking) for further details.
134 | 
135 | :::{tip}
136 |  - Use `gin ls` to check on the current status of the GIN repository (if your are familiar with `git`, is is somewhat equivalent to `git status`). The file status acronyms used in the output are described [here](https://gin.g-node.org/G-Node/Info/wiki/GIN+CLI+Help#list-the-sync-status-of-files-in-the-local-repository).
137 |  - Use `gin sync` to sync the changes bi-directionally between the local and the remote GIN repository.
138 |  - If the output from `gin ls` doesn't look right (e.g., files already uploaded to the GIN server appear under `Locally modified (unsaved)`), try running `gin sync` and check the status again.
139 |  - To logout from the GIN CLI session in the terminal, run `gin logout`.
140 | :::
141 | 
142 | 
143 | 
144 | ### Is this repository public or private?
145 | 
146 | By default, all newly-created GIN repos are private.
147 | 
148 | To make a GIN repository public:
149 | 
150 | 1. Go to the homepage of the remote repository. You can see the URLs for the repositories you have access to by running `gin repos --all`.
151 | 1. Click on _Settings_ (top right).
152 | 1. Unselect the _Private_ checkbox under the _Basic settings_ section.
153 | 
154 | ```{image} ../_static/gin-privacy-settings.png
155 | :class: bg-primary
156 | :width: 600px
157 | :align: center
158 | ```
159 | 
160 | 
161 | ## Download a GIN dataset
162 | 
163 | To download a dataset from the GIN server to a local machine, follow these steps:
164 | 
165 | ::::{tab-set}
166 | 
167 | :::{tab-item} If the local repository doesn't exist
168 | 
169 | 1. If the repository does not exist locally, clone it from the GIN remote server:
170 | 
171 |    ```
172 |    $ gin get <remote-repository-location>
173 |    ```
174 | 
175 |    This command will clone to repository to the current working directory, and download the large files in your dataset as lightweight placeholders.
176 | 
177 | 2. To download the content of the placeholder files, run:
178 |    ```
179 |    $ gin download --content
180 |    ```
181 |    If the large files in the dataset are **_locked_**, this command will download the content to the git annex subdirectory, and turn the placeholder files in the working directory into symlinks that point to the content.
182 | 
183 |    If the files are **_unlocked_**, this command will replace the placeholder files in the working directory with the full-content files and **also** download the content to the git annex locally.
184 | 
185 |    See the section on [File locking](#file-locking) for further details.
186 | :::
187 | 
188 | :::{tab-item} If the local repository repository exists
189 | 
190 | 1. If the repository already exists locally, we only need to download any changes from the remote. To do this, run from the GIN local repository:
191 | 
192 |    ```
193 |    $ gin download
194 |    ```
195 | 
196 |    This command will create new files that were added remotely, delete files that were removed, and update files that were changed. By default, new files are added as empty placeholders.
197 | 
198 |    To retrieve the content of all files in the repository, run the download command with the optional `--content` flag. See the [GIN docs](https://gin.g-node.org/G-Node/Info/wiki/GIN+CLI+Help#download-all-new-information-from-a-remote-repository) for further details.
199 | :::
200 | ::::
201 | 
202 | :::{tip}
203 | The content of individual files can be retrieved using:
204 |    ```
205 |    $ gin get-content <filename>
206 |    ```
207 |    and removed with:
208 |    ```
209 |    $ gin remove-content <filename>
210 |    ```
211 | :::
212 | 
213 | ## Update a GIN dataset
214 | 
215 | To update a dataset hosted in GIN:
216 | 1. First clone the repository locally by running:
217 | 
218 |    ```
219 |    $ gin get <remote-repository-location>
220 |    ```
221 | 
222 |    To see the `<remote-repository-location>`s accessible from your GIN account, run `gin repos --all`.
223 | 
224 | 2. Copy or move the required files to the local repository and log the changes with a commit:
225 | 
226 |    ```
227 |    $ gin commit -m <message> <filename>
228 |    ```
229 | 
230 | 3. Upload the committed changes to the GIN server:
231 |    ```
232 |    $ gin upload <filename>
233 |    ```
234 | 
235 | :::{tip}
236 | - To [unannex a file](https://gin.g-node.org/G-Node/Info/wiki/FAQ+Troubleshooting#how-to-unannex-files), that is, to remove a file from the GIN tracking before uploading:
237 |   ```
238 |   $ gin git annex unannex [path/filename]
239 |   ```
240 | 
241 | - To stop tracking an existing directory as a GIN repository, delete the `.git` directory.
242 |   - If in the directory we want to stop tracking the files are locked, remember to unlock them before deleting the `.git` directory! Otherwise we may not be able to delete the `.git/annex` content.
243 | :::
244 | 
245 | 
246 | 
247 | ## File locking
248 | [File locking](https://gin.g-node.org/G-Node/Info/wiki/GIN+CLI+Usage+Tutorial#file-locking) is an important part of GIN repositories. Below are the main ideas behind this.
249 | 
250 | Files in a GIN repository can be **_locked_** or **_unlocked_**. The lock state relates to the nature of the placeholder files we get in the working directory when we clone the remote repository via `gin get <remote-repository-location>`:
251 | 
252 |   - **on Unix-like systems** (MacOS, Ubuntu):
253 |     - if a file is **_locked_**, its corresponding placeholder file will be a **_symlink_**. These symlinks point to the annexed content (under `.git/annex/objects`). We can open the files in the working directory (using the symlinks) but we can't modify them.
254 |     - If a file is **_unlocked_**, the placeholder file in the working directory is an **_ASCII text file_** with a path. The path is approximately where the content of the file will be downloaded to when we request it.
255 |   - **on Windows**:
256 |     - if a file is **_locked_**, the placeholder file is a **_plain text file_** with a path pointing to the content in the git annex (but see caution below!).
257 |     - If a file is **_unlocked_**, the behaviour is the same as in Unix-like systems.
258 | 
259 | The lock state of a file is **_persistent_**. This means that if we clone a GIN (remote) repository whose files are unlocked, we lock them in the local copy, and then upload the local repository to the GIN server, the next time someone clones the GIN repository the files they fetch will be locked.
260 | 
261 | Unlocked files can be edited. If the data is unlocked and the full content of the dataset is downloaded locally, the file in the working directory has content, and so does its copy under git annex.
262 | 
263 | :::{caution}
264 | Note that if we download the contents of unlocked files locally, the disk usage of the files checked into the repo doubles, because the content exists both in the working directory and under the git annex. But in exchange users can modify and revert files to previous commits.
265 | :::
266 | 
267 | :::{caution}
268 | We have observed that it is possible to unintentionally overwrite locked files on Windows. Please be careful and double-check the output of `gin ls` before uploading. You may also want to read [about `git-annex` on Windows](https://gin.g-node.org/G-Node/Info/wiki/Some+Notes+On+Git+Annex) if you are considering using `git-annex` directly.
269 | :::
270 | 
271 | Locked files cannot be edited. For example, if we open a locked image with Preview in MacOS and try to edit it, we will be asked if we wish to unlock the file. However, even if we do unlock it, we won't be able to save any changes because we don't have writing permissions.
272 | 
273 | Files need to be committed before locking. We can switch the locking state for one or more files with:
274 |   ```
275 |   $ gin lock <filename>
276 |   ```
277 |   and
278 |   ```
279 |   $ gin unlock <filename>
280 |   ```
281 | After changing the locking state, remember to record the new state with a `gin commit`!
282 | 
283 | **Recommendations from the GIN docs on when to lock / unlock data:**
284 | - Keep files **_unlocked_** if the workflow requires editing large files and keeping snapshots of the progress. But keep in mind this will increase storage use with every commit of a file.
285 | - Keep files **_locked_** if using the repository's main goal is long term storage as an archive, if files are only to be read, and if the filesystem supports symlinks. This will save extra storage of keeping two copies of the same file.
286 | 
287 | ## Download a GIN dataset with Python
288 | 
289 | We recommend [pooch](https://www.fatiando.org/pooch/latest/index.html) to programmatically download a dataset from a GIN repository's URL. `pooch` is easy to use and has some nice
290 | functionalities like caching the downloaded data, verifying cryptographic hashes or unzipping files upon download.
291 | 
292 | Here is a simple example of how to download a dataset from a GIN repository using `pooch`:
293 | 
294 | ```python
295 | import pooch
296 | 
297 | filepath = pooch.retrieve(
298 |     url="https://gin.g-node.org/<username>/<repository>/raw/main/<file>",
299 |     known_hash=None,
300 |     path="/home/<user>/downloads", # this is where the file will be saved
301 |     progressbar=True,
302 | )
303 | ```
304 | 
305 | :::{tip}
306 | A common mistake is to use the URL of the GIN repository's webpage instead of the URL of the raw file.
307 | 
308 | The URL of the raw file will have `/raw/` in its path. For example, the URL of the raw file `image.png` in the repository `my-repo` under the user `my-user` should be `https://gin.g-node.org/my-user/my-repo/raw/main/image.png`.
309 | 
310 | The easiest way to get the raw URL of a file is to:
311 | 1. Navigate to the specific file in the GIN repository,
312 | 2. Copy the URL at the search bar of the browser, and
313 | 3. Replace `src` by `raw` in the copied URL.
314 | 
315 | With videos and images, you can often get the raw URL of a file directly by right-clicking on the file in the GIN repository and opening it in a new tab. The URL in the new tab should be the raw URL.
316 | :::
317 | 
318 | 
319 | 
320 | ## Some under-the-hood details
321 | 
322 | GIN is a wrapper around [git-annex](https://git-annex.branchable.com/). The high-level idea behind git-annex is:
323 |   - `git` is designed to track small text files, and doesn't cope well with large binary files.
324 |   - `git-annex` bypasses this by using git only to track the names and metadata (hashes) of these large binary files, but not their content.
325 | 
326 | The content of these large binary files is only retrieved on demand.
327 | 
328 | Indeed, when we `gin download` a repository from the GIN server, we get a local "copy" (clone) of the dataset in our machine, but this is not strictly a copy. This is because the large binary files that make up this dataset will only be downloaded as placeholders.
329 | 
330 | :::{dropdown} How? Case for an unlocked dataset
331 | 
332 |   - If the dataset is unlocked, these placeholder files have the same filenames (and paths) as the corresponding original files, but are instead simply ASCII text files. If we open these placeholder files, we see they contain a path. This path is where the actual content of the corresponding file will be downloaded to, when we request it.
333 | 
334 |   - For example, if the placeholder ASCII text file with name `image.png` points to this path:
335 |     ```
336 |     /annex/objects/MD5-s15081575--f0a21c00672ab7ed0733951a652d4b49
337 |     ```
338 |     And when we specifically request for this file's content with:
339 |     ```
340 |     gin get-content image.png
341 |     ```
342 |     the actual png file is downloaded to:
343 |     ```
344 |     .git/annex/objects/Xq/7G/MD5-s15081575--f0a21c00672ab7ed0733951a652d4b49/MD5-s15081575--f0a21c00672ab7ed0733951a652d4b49
345 |     ```
346 |     Notice that the path in the ASCII file and the actual path are somewhat different (the actual path contains some additional directories under `objects`).
347 | 
348 |     We can actually verify this file is the actual image by opening it with an image viewer (e.g. Preview in MacOS):
349 |     ```
350 |     open -a Preview .git/annex/objects/Xq/7G/MD5-s15081575--f0a21c00672ab7ed0733951a652d4b49/MD5-s15081575--f0a21c00672ab7ed0733951a652d4b49
351 |     ```
352 | :::
353 | 
354 | 
355 | :::{dropdown} How? Case for a locked dataset
356 |   - If the dataset is locked and no content has been downloaded, the symlinks in the working directory will be broken (since there is no data in the git annex to retrieve).
357 |   - To get the actual content in the git annex, we need to run `gin download --content`. This will fetch the content from the GIN server. After this, the symlinks in the working directory should work.
358 | :::
359 | 
360 | :::{dropdown} How? Case for a new (or updated) local repository
361 | - If we want to create (or update) a GIN repository, we would initialise (or clone) it locally, add files and commit the changes.
362 | - When committing, the data is "copied" from the working directory to the git annex. You can verify this by checking the size of the `.git` folder before and after running `git commit`.
363 | - When we lock the data with `gin lock <path-to-data>`, the files in the working directory are replaced with symlinks to the git annex content.
364 | - If after locking the data we commit the state change and upload the changes to the GIN server., the files will stay locked for any future retrievals of the repository.
365 | :::
366 | 
367 | 
368 | <!-- ## Other useful tips
369 | 
370 | - To [unannex a file](https://gin.g-node.org/G-Node/Info/wiki/FAQ+Troubleshooting#how-to-unannex-files), that is, to remove a file from the GIN tracking before uploading:
371 | 
372 |   ```
373 |   $ gin git annex unannex [path/filename]
374 |   ```
375 | 
376 | - To stop tracking the GIN repo locally, delete the `.git` directory.
377 | 
378 |   :::{note}
379 |   If in the GIN repo the files are locked, remember to unlock them before removing the `.git` directory! Otherwise we won't be able to delete the `.git/annex` content.
380 |   :::
381 | 
382 | - To delete a GIN repository but keep the git repo:
383 | 
384 |   1. Delete the repository in the GIN server via the browser.
385 |   2. Delete the GIN local repository with `git annex uninit`.
386 |     - This command removes relevant bits in `.git/annex` and `.git/objects`, but some pre-commits may need to be edited by hand (see this [SO post](https://stackoverflow.com/questions/24447047/remove-git-annex-repository-from-file-tree)). -->
387 | 
388 | ## Useful GIN resources
389 | 
390 | - [GIN CLI usage tutorial](https://gin.g-node.org/G-Node/Info/wiki/GIN+CLI+Usage+Tutorial): includes a description of common workflows.
391 | - [GIN CLI recipes](https://gin.g-node.org/G-Node/Info/wiki/GIN+CLI+Recipes).
392 | - [GIN CLI commands cheatsheet](https://gin.g-node.org/G-Node/Info/wiki/GIN+CLI+Help).
393 | - [GIN troubleshooting](https://gin.g-node.org/G-Node/Info/wiki/FAQ%20Troubleshooting).
394 | 
395 | 
396 | ## References
397 | 
398 | - https://movement.neuroinformatics.dev/community/contributing.html#adding-new-data
399 | - https://gin.g-node.org/G-Node/info/wiki#how-do-i-start
400 | - https://gin-howto.readthedocs.io/en/latest/gin-repositories.html
401 | - On GIN and its relation to `git-annex` (very high-level): https://gin.g-node.org/G-Node/Info/wiki/GIN+Advantages+Structure
402 | 


--------------------------------------------------------------------------------
/docs/source/open_science/Licensing.md:
--------------------------------------------------------------------------------
 1 | (licensing-target)=
 2 | # Software licensing
 3 | 
 4 | ## Disclaimer
 5 | **This document was not written by lawyers, and does not represent legal advice.**
 6 | 
 7 | This guide was written by the [Neuroinformatics Unit](https://neuroinformatics.dev/) to help researchers at the [Sainsbury Wellcome Centre (SWC)](https://www.sainsburywellcome.org/web/) and [Gatsby Computational Neuroscience Unit (GCNU)](https://www.ucl.ac.uk/gatsby/gatsby-computational-neuroscience-unit) at [University College London (UCL)](https://www.ucl.ac.uk/) choose a license for their software.
 8 | It does not represent official guidance by SWC, GCNU or UCL.
 9 | In particular this is not advice about intellectual property, but rather the practicalities of adding a license to your software.
10 | For more information, SWC/GCNU researchers should consult the SWC/GCNU and UCL documentation about intellectual property or contact [UCL Business](https://www.uclb.com/) if you think your software may have commercial potential (before release).
11 | 
12 | ## Summary
13 | **Unless your software has commercial potential or depends upon other software with a restrictive license, we recommend you use [The 3-Clause BSD License (BSD-3-Clause)](https://opensource.org/licenses/BSD-3-Clause).**
14 | 
15 | ## Why license your software?
16 | There are many reasons to release your software openly such as a funder/publisher requirement, to collaborate with others or simply in the spirit of open science. However, if your software is not licensed then nobody outside your employer’s organisation can (legally) use it.
17 | 
18 | To allow others to use your software, you must explicitly choose a license to detail what they can, and cannot do with your software. Typically, this license is included with your software in a file, such as `LICENSE.txt`.
19 | 
20 | ## Choosing a license
21 | Intellectual property (IP) law is complex, and the open source software (OSS) license selection reflects this. However, there are many useful resources to help you pick a license such as [choosealicense.com](https://choosealicense.com/) and [tldrlegal.com](https://tldrlegal.com/).
22 | 
23 | It is often tempting to write your own license, perhaps only allowing academic use. Unless you are an IP lawyer, please use one of the existing, well-established licenses. This way others can understand what they can use your software for, without seeking their own legal advice.
24 | 
25 | ## To choose a license:
26 | 1. Ensure you have permission to license the software yourself. In most cases this decision will be up to your research group leader or department head.
27 | 2. Obtain permission from any other contributors. If any part of the software was developed at a different institute (either by yourself, or a collaborator) ensure you have written permission. Ensure you choose a license in accordance with the requirements of any other institution and their funders. Adding or changing a license when external collaborators are involved is tricky. This can be avoided by choosing a license before commencing work.
28 | 3. If you think your software has commercial potential contact your institutional IP or tech transfer team (e.g. [UCL Business](https://www.uclb.com/)) before releasing your code.
29 | 4. Ensure you follow the requirements of the funders and the institutes in which you work. For SWC/GCNU researchers, at the time of writing, UCL, Gatsby and Wellcome do not have a formal OSS licensing policy.
30 | 5. Check the licenses of any other software on which yours depends and ensure you release your software in accordance with these.
31 | 6. Consider the licenses used by other software in the field. It may be easier to contribute to a larger ecosystem of tools if the licenses are compatible.
32 | 7. If you have specific license requirements, consult a guide such as [choosealicense.com](https://choosealicense.com/). **Otherwise …**
33 | 8. **If you want a simple license that allows for virtually unrestricted use of your software, but with a liability disclaimer, use [The 3-Clause BSD License (BSD-3-Clause)](https://opensource.org/licenses/BSD-3-Clause).**
34 | 
35 | ## Adding a license
36 | In most cases, adding a license is as simple as copying the license text into a file named `LICENSE.txt` or similar and including this with your software. If you have a GitHub repository, adding a new file named `LICENSE.txt` from the web interface will produce a button “Choose a license template” which will allow you to choose from a list.
37 | 
38 | ## Frequently asked questions
39 | 
40 | ### Can I change the license later?
41 | You can, but the earlier versions will still be licensed under the original terms. This is why it’s important to carefully consider the license from the outset.
42 | 
43 | ### How do I know when my code is ready to release?
44 | If you’ve followed the above steps, then it’s ready to release. There will always be bugs, and the best way to find these are to allow others to read your code and use your software.
45 | 
46 | ### If my code depends on another library with a more restrictive license (e.g. GPL), does this affect how I license my software?
47 | 
48 | If it is installed separately, and you simply call the library (e.g. using a Python package) then you can choose a license for your software as normal. If you bundle the third party code with your library or link against it, this may affect the license you can use. In these cases, check the terms of the specific license as the GPL family (e.g. GPL, LGPL, AGPL) may vary.
49 | 
50 | ### I have included code from others within my repository (e.g. a DL model), how does this affect licensing?
51 | Firstly, you should ensure that the license for your code is compatible with the included code (e.g. by using a [license compatibility checking tool](https://joinup.ec.europa.eu/collection/eupl/solution/joinup-licensing-assistant/jla-compatibility-checker)). You should also adhere to any terms of the third party license and it is good practice to always include a copy of this license with the third party code.
52 | 
53 | ## Acknowledgements
54 | This document was adapted from the excellent [Imperial OSS license document](https://www.imperial.ac.uk/research-and-innovation/support-for-staff/scholarly-communication/research-data-management/sharing-data/research-software/).
55 | 


--------------------------------------------------------------------------------
/docs/source/open_science/index.md:
--------------------------------------------------------------------------------
 1 | # Open Science
 2 | 
 3 | A collection of long-form guides on topics related to open science.
 4 | 
 5 | ```{toctree}
 6 | :maxdepth: 1
 7 | 
 8 | Licensing
 9 | Data-sharing
10 | GIN-repositories
11 | ```
12 | 


--------------------------------------------------------------------------------
/docs/source/programming/Cookiecutter-cruft.md:
--------------------------------------------------------------------------------
 1 | # Update cookiecutter-generated files with cruft
 2 | 
 3 | Ref: [original docs](https://cruft.github.io/cruft/) and [github repo](https://github.com/cruft/cruft).
 4 | 
 5 | ## Setup: `cruft link`
 6 | Install: `pip install cruft`.
 7 | 
 8 | If this is your first time using `cruft`, you have to set it up using the `cruft link` command. You have to specify the URL of your cookiecutter on GitHub.
 9 | 
10 | ```python
11 | cruft link https://github.com/neuroinformatics-unit/python-cookiecutter
12 | ```
13 | At this point, cruft will ask you the same questions you were asked when you first generated the repo with cookiecutter. Enter the same answers you did before, but make sure to enter "provide later" when it prompts for "github_repository_url".
14 | 
15 | :::{dropdown} Why 🤷🏻‍♀️?
16 | I have found that if you enter the correct link to your package's repo in `github_repository_url` field, the command `cruft diff` fails 🤷🏻‍♀️
17 | :::
18 | 
19 | After you have answered all cookiecutter's questions, `cruft` will prompt you to specify the commit hash, i.e. the older version of cookiecutter you had used to generate the repo. For now, we have to do this manually by searching the history. Example commit hash: `9daec12bffbc32da6a252605c5e67c90028fefc0`. We need to do this because `cruft` will compare your repo with the template, and will only update the files that have changed since the commit hash you specify.
20 | 
21 | After you have entered the commit hash, `cruft` will create a `.cruft.json` file in your repo containing the URL to the cookicutter, the commit hash, and your responses to cookiecutter's prompts. Please track the changes to this file with git.
22 | 
23 | ## Apply updates: `cruft diff` & `cruft update`
24 | Now that `.cruft.json` exists, we can run `cruft diff` to see the changes between your repo and the template. At this point, you could manually apply the subset of the changes you want.
25 | 
26 | You can also ask `cruft` to apply the changes for you by running `cruft update`. You will be prompted to confirm the changes with `y` before they are applied. You can also press `v` to see the changes before confirming.
27 | 
28 | :::{note}
29 | `cruft update` will not delete the files you have added to your repo since its creation. It only cares about the files that were generated by cookiecutter.
30 | 
31 | It also doesn't delete the files that have since been removed from the template. You have to do this manually.
32 | :::
33 | 
34 | In case `cruft update` is not able to apply all the changes, it will create a `your_file.rej` file for each conflicting file. Based on these files' contents you can then apply changes manually. Don't forget to remove the `.rej` files when you're done. This is a bit tedious, but it should get easier with more frequent updates.
35 | 
36 | When `cruft update` is done, you will notice that the `.cruft.json` file has been updated with the new commit hash of the template. Future updates will be based on this commit hash.
37 | 
38 | `cruft check` will check if your repo is up to date with the template and just give you a boolean response.
39 | 
40 | ## Commit
41 | Once you are satisfied, commit the changes to a new branch and open a PR to merge it into `main`.
42 | 
43 | Now, check if your GitHub actions fail... 🤦🏻‍♀️ If everything is fine, you can merge the PR. 🎉 If not, you will have to fix the errors and repeat the process. 😱
44 | 


--------------------------------------------------------------------------------
/docs/source/programming/Mount-ceph-ubuntu-temp.md:
--------------------------------------------------------------------------------
 1 | (target-mount-ceph-ubuntu-temp)=
 2 | # Mount ceph storage on Ubuntu temporarily
 3 | In this example, we will **temporarily** mount a partition of `ceph` in an Ubuntu machine. The mounting is temporary because it will only remain until the next reboot. To mount a partition of `ceph` permanently, see [Mount ceph storage on Ubuntu permanently](target-mount-ceph-ubuntu-perm).
 4 | 
 5 | 
 6 | ## Prerequisites
 7 | - Administrator rights (`sudo`) on the local Ubuntu machine
 8 | - `cifs-utils` installed via `sudo apt-get install cifs-utils`
 9 | 
10 | 
11 | ## Steps
12 | ### 1. Create a mount point
13 | Create a directory to mount the storage to. Sensible places to do this on Ubuntu would be in `/mnt` or `/media`. In the example below, we will use `/mnt/ceph-neuroinformatics`.
14 | 
15 | ```bash
16 | sudo mkdir /mnt/ceph-neuroinformatics
17 | ```
18 | 
19 | ### 2. Mount the `ceph` partition
20 | To mount the desired partition on the directory we just created, run the `mount` command with the appropriate options. In our example, this would be:
21 | ```bash
22 | sudo mount -t cifs -o user=<SWC-USERNAME>,domain=AD.SWC.UCL.AC.UK //ceph-gw02.hpc.swc.ucl.ac.uk/neuroinformatics /mnt/ceph-neuroinformatics
23 | ```
24 | :::{note}
25 | You can check the full set of options for the `mount` command by running `mount --help`
26 | :::
27 | 
28 | Make sure to replace the following for your particular case:
29 | - `//ceph-gw02.hpc.swc.ucl.ac.uk/neuroinformatics` with the path to your desired partition, e.g. `//ceph-gw02.hpc.swc.ucl.ac.uk/<LAB-NAME>`
30 | - `/media/ceph-neuroinformatics` with the path to the local mount point you created in the previous step.
31 | - `<SWC-USERNAME>` with your SWC username
32 | 
33 | If the command is executed without errors you will be prompted for your SWC password.
34 | 
35 | ### 3. Check the partition is mounted correctly
36 | It should show up in the list of mounting points when running `df -h`
37 | 
38 | :::{note}
39 | The command `df` prints out information about the file system
40 | :::
41 | 
42 | ### 4. To unmount the storage
43 | Run the following command
44 | ```bash
45 | sudo umount /mnt/ceph-neuroinformatics
46 | ```
47 | Remember that because the mounting is temporary, the `ceph` partition will be unmounted upon rebooting our machine.
48 | 
49 | You can check that the partition is correctly unmounted by running `df -h` and verifying it does not show in the output.
50 | 
51 | :::{warning}
52 | Do not use `sudo rm -r /media/ceph-neuroinformatics` to unmount, even though it looks like a local folder. This will delete files on `ceph`!
53 | :::
54 | 
55 | ## References
56 | - [Mount network drive under Linux temporarily/permanently](https://www.rz.uni-kiel.de/en/hints-howtos/connecting-a-network-share/Linux/through-temporary-permanent-mounting/mount-network-drive-under-linux-temporarily-permanently)
57 | - [How to Mount a SMB Share in Ubuntu](https://support.zadarastorage.com/hc/en-us/articles/213024986-How-to-Mount-a-SMB-Share-in-Ubuntu)
58 | 


--------------------------------------------------------------------------------
/docs/source/programming/Mount-ceph-ubuntu.md:
--------------------------------------------------------------------------------
 1 | (target-mount-ceph-ubuntu-perm)=
 2 | # Mount ceph storage on Ubuntu permanently
 3 | In this example, we will **permanently** mount the `neuroinformatics` partition of `ceph`. The same procedure can be used to mount other partitions, such as those belonging to particular labs or projects.
 4 | 
 5 | To mount a partition of `ceph` only temporarily, see [Mount ceph storage on Ubuntu temporarily](target-mount-ceph-ubuntu-temp).
 6 | 
 7 | 
 8 | The following instructions have been tested on **Ubuntu 20.04 LTS**.
 9 | 
10 | ## Prerequisites
11 | - Administrator rights (`sudo`) on the local Ubuntu machine
12 | - `cifs-utils` installed via `sudo apt-get install cifs-utils`
13 | 
14 | ## Steps
15 | ### 1. Store your SWC credentials
16 | First, create a file to store your SWC login credentials and save it in your home directory.
17 | You can do that on the terminal via `nano ~/.smb_swc`.
18 | 
19 | The ``.smb_swc`` file contents should look like this:
20 | ```bash
21 | user=<SWC-USERNAME>
22 | password=<SWC-PASSWORD>
23 | domain=AD.SWC.UCL.AC.UK
24 | ```
25 | Save the file and exit.
26 | 
27 | :::{warning}
28 | Storing the password in plain-text format as done above constitutes a security risk.
29 | If someone gets access to your machine, they will be able to see your SWC password.
30 | 
31 | You can harden the security a bit by changing the permissions of the file. Run the following command:
32 | 
33 | ```bash
34 | chmod 600 ~/.smb_swc
35 | ```
36 | This will ensure the file is readable and writable only by the owner (you).
37 | However, if someone gets access to your machine with your user logged in, they will still be able to read the SWC password.
38 | :::
39 | 
40 | ### 2. Create a mount point
41 | Create a directory to mount the storage to. Sensible places to do this on Ubuntu would be in `/mnt` or `/media`. In the example below, we will use `/media/ceph-neuroinformatics`.
42 | 
43 | ```bash
44 | sudo mkdir /media/ceph-neuroinformatics
45 | ```
46 | 
47 | ### 3. Edit the fstab file
48 | This will allow you to mount the storage automatically on boot. Before editing the file, make a backup of it, just in case.
49 | ```bash
50 | sudo cp /etc/fstab /etc/fstab.bak
51 | ```
52 | Now, open the file with your favourite editor (e.g. `sudo nano /etc/fstab`) and add new lines at the end of the file.
53 | 
54 | For example:
55 | ```bash
56 | # Mount ceph/neuroinformatics
57 | //ceph-gw02.hpc.swc.ucl.ac.uk/neuroinformatics /media/ceph-neuroinformatics cifs uid=1002,gid=1002,credentials=/home/<LOCAL-USERNAME>/.smb_swc 0 0
58 | ```
59 | Make sure to replace the following:
60 | - `//ceph-gw02.hpc.swc.ucl.ac.uk/neuroinformatics` with the path to your desired partition, e.g. `//ceph-gw02.hpc.swc.ucl.ac.uk/<LAB-NAME>`
61 | - `/media/ceph-neuroinformatics` with the path to the local mount point you created in the previous step.
62 | - `uid=1002,gid=1002` with your local user ID and group ID. You can find these out by running `id` on the terminal.
63 | - `<LOCAL-USERNAME>` with your username on the local Ubuntu machine on which you are mounting the storage
64 | 
65 | Save the file and exit.
66 | 
67 | ### 4. Mount the storage
68 | You can now mount the storage by running `sudo mount -a`. If you get an error, check the `fstab` file for typos. If you get no error, you should be able to see the mounted storage by running `df -h`.
69 | 
70 | The next time you reboot your machine, the storage should be mounted automatically.
71 | 
72 | ### 5. Unmount the storage
73 | To unmount the storage, run `sudo umount /media/ceph-neuroinformatics` (or whatever your mount point is).
74 | 
75 | To permanently unmount the storage, remove the lines you added to the `fstab` file and run `sudo mount -a` again.
76 | 
77 | :::{warning}
78 | Do not trust your instincts and use `sudo rm -r /media/ceph-neuroinformatics` to permanently unmount, even though it looks like a local folder. This will delete files on `ceph`!
79 | :::
80 | 


--------------------------------------------------------------------------------
/docs/source/programming/SLURM-arguments.md:
--------------------------------------------------------------------------------
  1 | (slurm-arguments-target)=
  2 | # SLURM arguments primer
  3 | 
  4 | ```{include} ../_static/swc-wiki-warning.md
  5 | ```
  6 | 
  7 | ## Abbreviations
  8 | | Acronym                                                         | Meaning                                      |
  9 | | --------------------------------------------------------------- | -------------------------------------------- |
 10 | | [SLURM](https://slurm.schedmd.com/)                             | Simple Linux Utility for Resource Management |
 11 | | [SWC](https://www.sainsburywellcome.org/web/)                   | Sainsbury Wellcome Centre                    |
 12 | | [HPC](https://en.wikipedia.org/wiki/High-performance_computing) | High Performance Computing                   |
 13 | | [MPI](https://en.wikipedia.org/wiki/Message_Passing_Interface)  | Message Passing Interface                    |
 14 | 
 15 | 
 16 | ## Overview
 17 | SLURM is a job scheduler and resource manager used on the SWC HPC cluster.
 18 | It is responsible for allocating resources (e.g. CPU cores, GPUs, memory) to jobs submitted by users.
 19 | When submitting a job to SLURM, you can specify various arguments to request the resources you need.
 20 | These are called SLURM directives, and they are passed to SLURM via the `sbatch` or `srun` commands.
 21 | 
 22 | These are often specified at the top of a SLURM job script,
 23 | e.g. the lines that start with `#SBATCH` in the following example:
 24 | 
 25 | ```{code-block} bash
 26 | #!/bin/bash
 27 | 
 28 | #SBATCH -J my_job # job name
 29 | #SBATCH -p gpu # partition (queue)
 30 | #SBATCH -N 1   # number of nodes
 31 | #SBATCH --mem 16G # memory pool for all cores
 32 | #SBATCH -n 4 # number of cores
 33 | #SBATCH -t 0-06:00 # time (D-HH:MM)
 34 | #SBATCH --gres gpu:1 # request 1 GPU (of any kind)
 35 | #SBATCH -o slurm.%x.%N.%j.out # STDOUT
 36 | #SBATCH -e slurm.%x.%N.%j.err # STDERR
 37 | #SBATCH --mail-type=ALL
 38 | #SBATCH --mail-user=user@domain.com
 39 | #SBATCH --array=1-12%4 # job array index values
 40 | 
 41 | # load modules
 42 | ...
 43 | 
 44 | # execute commands
 45 | ...
 46 | ```
 47 | This guide provides only a brief overview of the most important SLURM arguments,
 48 | to demysify the above directives and help you get started with SLURM.
 49 | For a more detailed description see the [SLURM documentation](https://slurm.schedmd.com/sbatch.html).
 50 | 
 51 | ##  Commonly used arguments
 52 | 
 53 | ### Partition (Queue)
 54 | - *Name:* `--partition`
 55 | - *Alias:* `-p`
 56 | - *Description:* Specifies the partition (or queue) to submit the job to. To see a list of all partitions/queues, the nodes they contain and their respective time limits, type `sinfo` when logged in to the HPC cluster.
 57 | - *Example values:* `gpu`, `cpu`
 58 | 
 59 | ### Job Name
 60 | - *Name:* `--job-name`
 61 | - *Alias:* `-J`
 62 | - *Description:* Specifies a name for the job, which will appear in various SLURM commands and logs, making it easier to identify the job (especially when you have multiple jobs queued up)
 63 | - *Example values:* `training_run_24`
 64 | 
 65 | ### Number of Nodes
 66 | - *Name:* `--nodes`
 67 | - *Alias:* `-N`
 68 | - *Description:* Defines the number of nodes required for the job.
 69 | - *Example values:* `1`
 70 | 
 71 | :::{warning}
 72 | This is usually `1` unless you're parallelising your code across multiple nodes with technologies such as MPI.
 73 | :::
 74 | 
 75 | ### Number of Cores
 76 | - *Name:* `--ntasks`
 77 | - *Alias:* `-n`
 78 | - *Description:* Defines the number of cores (or tasks) required for the job.
 79 | - *Example values:* `1`, `5`, `20`
 80 | 
 81 | ### Memory Pool for All Cores
 82 | - *Name:* `--mem`
 83 | - *Description:* Specifies the total amount of memory (RAM) required for the job across all cores (per node)
 84 | - *Example values:* `4G`, `32G`, `64G`
 85 | 
 86 | ### Time Limit
 87 | - *Name:* `--time`
 88 | - *Alias:* `-t`
 89 | - *Description:* Sets the maximum time the job is allowed to run. The format is D-HH:MM, where D is days, HH is hours, and MM is minutes.
 90 | - *Example values:* `0-01:00` (1 hour), `0-04:00` (4 hours), `1-00:00` (1 day).
 91 | 
 92 | :::{warning}
 93 | If the job exceeds the time limit, it will be terminated by SLURM.
 94 | On the other hand, avoid requesting way more time than what your job needs,
 95 | as this may delay its scheduling (depending on resource availability).
 96 | 
 97 | If needed, the systems administrator can extend long-running jobs.
 98 | :::
 99 | 
100 | ### Generic Resources (GPUs)
101 | - *Name:* `--gres`
102 | - *Description:* Requests generic resources, such as GPUs.
103 | - *Example values:* `gpu:1`, `gpu:rtx2080:1`, `gpu:rtx5000:1`, `gpu:a100_2g.10gb:1`
104 | 
105 | :::{warning}
106 | No GPU will be allocated to you unless you specify it via the `--gres` argument (even if you are on the 'gpu' partition).
107 | To request 1 GPU of any kind, use `--gres gpu:1`. To request a specific GPU type, you have to include its name, e.g. `--gres gpu:rtx2080:1`.
108 | You can view the available GPU types on the [SWC internal wiki](https://wiki.ucl.ac.uk/display/SSC/CPU+and+GPU+Platform+architecture).
109 | :::
110 | 
111 | ### Standard Output File
112 | - *Name:* `--output`
113 | - *Alias:* `-o`
114 | - *Description:* Defines the file where the standard output (STDOUT) will be written. In the example script above, it's set to slurm.%x.%N.%j.out, where %x is the job name, %N is the node name and %j is the job ID.
115 | - *Example values:* `slurm.%x.%N.%j.out`, `slurm.MyAwesomeJob.out`
116 | 
117 | :::{note}
118 | This file contains the output of the commands executed by the job (i.e. the messages that normally gets printed on the terminal).
119 | :::
120 | 
121 | ### Standard Error File
122 | - *Name:* `--error`
123 | - *Alias:* `-e`
124 | - *Description:* Specifies the file where the standard error (STDERR) will be written. In the example script above, it's set to slurm.%x.%N.%j.out, where %x is the job name, %N is the node name and %j is the job ID.
125 | - *Example values:* `slurm.%N.%j.err`, `slurm.MyAwesomeJob.err`
126 | 
127 | :::{note}
128 | This file is very useful for debugging, as it contains all the error messages produced by the commands executed by the job.
129 | :::
130 | 
131 | ### Email Notifications
132 | - *Name:* `--mail-type`
133 | - *Description:* Defines the conditions under which the user will be notified by email.
134 | - *Example values:* `ALL`, `BEGIN`, `END`, `FAIL`
135 | 
136 | ### Email Address
137 | - *Name:* `--mail-user`
138 | - *Description:* Specifies the email address to which notifications will be sent.
139 | - *Example values:* `user@domain.com`
140 | 
141 | ### Array jobs
142 | - *Name:* `--array`
143 | - *Description:* Job array index values (a list of integers in increasing order). The task index can be accessed via the `SLURM_ARRAY_TASK_ID` environment variable.
144 | - *Example values:* `--array=1-10` (10 jobs), `--array=1-100%5` (100 jobs, but only 5 of them will be allowed to run in parallel at any given time).
145 | 
146 | :::{warning}
147 | If an array consists of many jobs, using the `%` syntax to limit the maximum number of parallel jobs is recommended to prevent overloading the cluster.
148 | :::
149 | 


--------------------------------------------------------------------------------
/docs/source/programming/SSH-SWC-cluster.md:
--------------------------------------------------------------------------------
  1 | (ssh-cluster-target)=
  2 | # Set up SSH for the SWC HPC cluster
  3 | 
  4 | This guide explains how to connect to the SWC's HPC cluster via SSH from
  5 | any personal computer.
  6 | 
  7 | If you have access to a desktop managed by the SWC's IT team
  8 | the connection is much more straightforward than described here
  9 | (see the [note on managed desktops](ssh-managed-target)).
 10 | 
 11 | ```{include} ../_static/swc-wiki-warning.md
 12 | ```
 13 | 
 14 | ```{include} ../_static/code-blocks-note.md
 15 | ```
 16 | 
 17 | ## Abbreviations
 18 | | Acronym                                                                 | Meaning                                      |
 19 | | ----------------------------------------------------------------------- | -------------------------------------------- |
 20 | | [SSH](https://en.wikipedia.org/wiki/Secure_Shell)                       | Secure (Socket) Shell protocol               |
 21 | | [SWC](https://www.sainsburywellcome.org/web/)                           | Sainsbury Wellcome Centre                    |
 22 | | [HPC](https://en.wikipedia.org/wiki/High-performance_computing)         | High Performance Computing                   |
 23 | | [IT](https://en.wikipedia.org/wiki/Information_technology)              | Information Technology                       |
 24 | | [SLURM](https://slurm.schedmd.com/)                                     | Simple Linux Utility for Resource Management |
 25 | | [IDE](https://en.wikipedia.org/wiki/Integrated_development_environment) | Integrated Development Environment           |
 26 | | [GUI](https://en.wikipedia.org/wiki/Graphical_user_interface)           | Graphical User Interface                     |
 27 | 
 28 | ## Prerequisites
 29 | - You have an SWC account and know your username and password.
 30 | - You have read the [SWC wiki's section on High Performance Computing (HPC)](https://wiki.ucl.ac.uk/display/SSC/High+Performance+Computing), especially the [Logging into the Cluster page](https://wiki.ucl.ac.uk/display/SSC/Logging+into+the+Cluster).
 31 | - You know the basics of using the command line, i.e. using the terminal to navigate the file system and run commands.
 32 | - You have an SSH client installed on your computer. This is usually pre-installed on Linux and macOS. SSH is also available on Windows (since Windows 10), however some steps will differ. If you are a Windows user, read the note below before proceeding.
 33 | 
 34 | ::: {dropdown} Note for Windows users
 35 | :color: info
 36 | :icon: info
 37 | 
 38 | You have two options on how to proceed:
 39 | 
 40 | 1. Install [Git Bash](https://gitforwindows.org/), which emulates a Linux terminal on Windows and includes tools that are not available on Windows by default, such as `nano`, and `ssh-copy-id`. This is the recommended option, as it will allow you to follow along with all commands in this guide, as they are presented. Just assume that all commands are run in Git Bash.
 41 | 
 42 | 2. If you are using Windows 10 or newer, you can follow this guide (except for the section on [SSH keys](#ssh-keys)) using native Windows functionalities as described here.
 43 | 
 44 | 
 45 |     To [Log into the cluster](#log-into-the-cluster), you can use the same commands as in the guide below, but typed in the Windows `cmd`:
 46 |     ```{code-block} console
 47 |     :caption: cmd
 48 |     ssh <SWC-USERNAME>@ssh.swc.ucl.ac.uk
 49 |     ssh hpc-gw2
 50 |     ```
 51 | 
 52 |     The [SSH config file](#ssh-config-file) section can be followed using the file browser and Notepad, instead of the terminal and `nano`.
 53 |     Create the `.ssh` folder in you home directory, i.e. `C:\Users\<USERNAME>\.ssh`,
 54 |     if it does not already exist (don't forget the `.` at the start of `.ssh`).
 55 | 
 56 |     You may create and edit the `config` file with Notepad but beware that the file must not have an extension.
 57 |     To create a file without an extension in Windows, you need to make the file extensions visible
 58 |     (click 'View' in the file browser and check the box 'File name extensions').
 59 |     The `config` file contents should be the same as in the guide below.
 60 | 
 61 |     In day-to-day use, you can use the `ssh swc-gateway` and `ssh swc-bastion` commands natively in Windows `cmd`,
 62 |     provided that you have defined those aliases in your `config` file, as this guide describes.
 63 | :::
 64 | 
 65 | ## Log into the cluster
 66 | Run the following commands on the terminal, typing your `<SWC-PASSWORD>` both times when prompted
 67 | (your password will not be displayed on the screen):
 68 | 
 69 | ```{code-block} console
 70 | $ ssh <SWC-USERNAME>@ssh.swc.ucl.ac.uk
 71 | $ ssh hpc-gw2
 72 | ```
 73 | You have now successfully logged into the cluster 🎉. You may stop reading here, but...
 74 | 
 75 | ::: {note}
 76 | If you want to learn more about why we had to SSH twice, read the [next section](#why-do-we-ssh-twice).
 77 | 
 78 | If you want to make you life easier, you can set yourself up with an [SSH config file](#ssh-config-file)
 79 | and some [SSH keys](#ssh-keys).
 80 | :::
 81 | 
 82 | ## Why do we SSH twice?
 83 | We first need to distinguish the different types of nodes on the SWC HPC system:
 84 | 
 85 | - the *bastion* node (or *login node*) - `ssh.swc.ucl.ac.uk`. This serves as a single entry point to the cluster from external networks. By funneling all external SSH connections through this node, it's easier to monitor, log, and control access, reducing the attack surface. The *bastion* node has very little processing power. It can be used to submit and monitor SLURM jobs, but it shouldn't be used for anything else.
 86 | - the *gateway* node - `hpc-gw2`. This is a more powerful machine and can be used for light processing, such as editing your scripts, creating and copying files etc. However don't use it for anything computationally intensive, since this node's resources are shared across all users.
 87 | - the *compute* nodes - `enc1-node10`, `gpu-sr670-21`, etc. These are the machinces that actually run the jobs we submit, either interactively via `srun` or via batch scripts submitted with `sbatch`.
 88 | 
 89 | ![](../_static/ssh_flowchart_unmanaged.png)
 90 | 
 91 | Your home directory, as well as the locations where filesystems like `ceph` are mounted, are shared across all of the nodes.
 92 | 
 93 | The first `ssh` command - `ssh <SWC-USERNAME>@ssh.swc.ucl.ac.uk` only takes you to the *bastion* node. A second command - `ssh hpc-gw2` - is needed to reach the *gateway* node.
 94 | 
 95 | Similarly, if you are on the *gateway* node, typing `logout` once will only get you one layer outo the *bastion* node. You need to type `logout` again to exit the *bastion* node and return to your local machine.
 96 | 
 97 | The *compute* nodes should only be accessed via the SLURM `srun` or `sbatch` commands. This can be done from either the *bastion* or the *gateway* nodes. If you are running an interactive job on one of the *compute* nodes, you can terminate it by typing `exit`. This will return you to the node from which you entered.
 98 | 
 99 | :::{dropdown} Be mindful of node usage
100 | :color: warning
101 | :icon: alert
102 | 
103 | Avoid running heavy computations on the *bastion* or *gateway* nodes, as
104 | they are meant for light tasks like text editing or job submissions to SLURM.
105 | 
106 | For quick tasks that may burden these nodes,
107 | request an interactive session on a *compute* node using the `srun` command.
108 | Here's an example for creating a new conda environment:
109 | 
110 | ```{code-block} console
111 | $ srun -p cpu -n 4 --mem 8G --pty bash -i
112 | $ module load miniconda
113 | $ conda create -n myenv python=3.10
114 | ```
115 | 
116 | The first command requests 4 cores and 8GB of memory on a node of the `cpu`
117 | partition, meant for jobs that do not require GPUs.
118 | Depending on your needs and node availability, you may need to request
119 | a different partition. See the [SLURM arguments primer](slurm-arguments-target)
120 | for more information.
121 | 
122 | The `--pty bash -i` part specifies
123 | an interactive bash shell. The following two commands are run in this shell,
124 | on the assigned *compute* node.
125 | 
126 | Type `exit` to leave the interactive session when finished.
127 | Avoid keeping sessions open when not in use.
128 | :::
129 | 
130 | (ssh-managed-target)=
131 | ## Note on managed desktops
132 | 
133 | The SWC's IT team offers managed desktop computers equipped with either
134 | a Windows or a Linux image. These machines are already part of the SWC's
135 | trusted network domain, meaning you can access the HPC cluster without
136 | having to go through the *bastion* node.
137 | 
138 | - If you are using a [managed Windows desktop](https://wiki.ucl.ac.uk/display/SSC/SWC+Desktops),
139 | you can SSH directly into the *gateway* node with `ssh hpc-gw2` from the
140 | Windows `cmd` or PowerShell.
141 | You may use that node to prepare your scripts and submit SLURM jobs.
142 | - If you are using a [managed Linux desktop](https://wiki.ucl.ac.uk/display/SSC/Managed+Linux+Desktop),
143 | you can even bypass the *gateway* node. In fact, you may directly submit SLURM jobs
144 | from your terminal, without having to SSH at all. That's because managed Linux desktops
145 | use the same platform as the HPC nodes
146 | and are already equipped with the SLURM job scheduler.
147 | 
148 | A modified version of the flowchart found above, including managed desktops:
149 | 
150 | ![](../_static/ssh_flowchart_full.png)
151 | 
152 | 
153 | ## SSH config file
154 | If you are frequently accessing the cluster from an unmanaged machine,
155 | you may find yourself typing the same SSH commands over and over again.
156 | You can make your life easier by editing the SSH config file.
157 | This is a text file that lives in your home directory and contains
158 | a list of aliases for SSH connections.
159 | 
160 | On your local PC/Laptop, navigate to the `.ssh` folder in your user's home `~` directory:
161 | ```{code-block} console
162 | $ cd ~/.ssh
163 | ```
164 | 
165 | List the files in this directory:
166 | ```{code-block} console
167 | $ ls -1
168 | authorized_keys
169 | config
170 | known_hosts
171 | ```
172 | Some of these files may not exist yet. Next we will open the `config` file
173 | using the terminal text editor `nano`:
174 | ```{code-block} console
175 | $ nano config
176 | ```
177 | If the file doesn't exist yet, it will be created.
178 | Add the following lines to the file:
179 | 
180 | ```{code-block} bash
181 | :caption: config
182 | :name: config-content
183 | 
184 | # Specify our intermediate jump host, aka the bastion node
185 | Host swc-bastion
186 |     HostName ssh.swc.ucl.ac.uk
187 |     User <SWC-USERNAME>
188 | 
189 | # Specify how to get to the gateway node by jumping through the bastion node
190 | # The gateway hostname is specified as the jump-host would see it
191 | Host swc-gateway
192 |     HostName hpc-gw2
193 |     User <SWC-USERNAME>
194 |     ProxyJump swc-bastion
195 | ```
196 | 
197 | Save the file by pressing `Ctrl+O`, then `Enter`.
198 | Exit the `nano` editor by pressing `Ctrl+X`.
199 | 
200 | From now on, you can directly SSH into the *gateway* node by typing:
201 | ```{code-block} console
202 | $ ssh swc-gateway
203 | ```
204 | You can also use the same syntax to SSH into the *bastion* node:
205 | ```{code-block} console
206 | $ ssh swc-bastion
207 | ```
208 | In both cases, typing the `logout` command once will return you to your local machine.
209 | 
210 | ## SSH keys
211 | If you are bored of typing your password every time you SSH into the cluster,
212 | you can set up authentication via SSH keys. You will have to do some work
213 | upfront, but it will save you tons of time in the long run. Plus, it's more secure.
214 | 
215 | ::: {dropdown} How does SSH key authentication work?
216 | :color: info
217 | :icon: info
218 | You generate a pair of keys locally - a public and a private one -
219 | and then copy the public key to the remote machine.
220 | When you try to SSH into the remote machine, the SSH client on your local machine
221 | will use the private key to generate a signature, which the SSH server on the
222 | remote machine will verify using the public key. If the signature is valid,
223 | you will be granted access.
224 | 
225 | There are several cryptographic algorithms that can be used to generate the keys.
226 | They can be selected using the `-t` argument of the `ssh-keygen` command.
227 | In the following example, we use `ed25519`, as it strikes a good balance between
228 | security and speed for most use cases.
229 | :::
230 | 
231 | To generate a pair of SSH keys, run the following command on your local machine:
232 | ```{code-block} console
233 | $ ssh-keygen -t ed25519
234 | ```
235 | 
236 | You will be prompted to enter a file path for the key. You may accept the
237 | default - `~/.ssh/id_ed25519` - or choose another path/name.
238 | 
239 | Next, you will be prompted to enter a passphrase.
240 | This is an extra layer of security, but you can leave it blank if you want.
241 | 
242 | There are now two new files in the `.ssh` directory:
243 | ```{code-block} console
244 | :emphasize-lines: 5,6
245 | $ cd ~/.ssh
246 | $ ls -1
247 | authorized_keys
248 | config
249 | id_ed25519
250 | id_ed25519.pub
251 | known_hosts
252 | ```
253 | The `id_ed25519` file is your private key and **it should never be shared with anyone**.
254 | 
255 | The `id_ed25519.pub` file is your public key.
256 | 
257 | ::: {warning}
258 | In most cases, you don't need to explicitly specify the location of the private key
259 | in your `~/.ssh/config` file because SSH will automatically look for the default key names
260 | (like `id_rsa`, `id_ed25519`, etc.) in the `~/.ssh` directory.
261 | 
262 | However, if you're using a non-default name or location for your private key,
263 | or if you have multiple keys and want to specify which one to use for a particular host,
264 | then you can add the `IdentityFile` directive in your `~/.ssh/config` to
265 | point to the private key.
266 | 
267 | For example, if you have a private key with a custom name `<MY-SPECIAL-KEY>`
268 | in the `~/.ssh` directory, you can add the following lines to your `~/.ssh/config` file:
269 | 
270 | 
271 | ```{code-block} bash
272 | :caption: config
273 | :emphasize-lines: 5,13
274 | 
275 | # Specify our intermediate jump host, aka the bastion node
276 | Host swc-bastion
277 |     HostName ssh.swc.ucl.ac.uk
278 |     User <SWC-USERNAME>
279 |     IdentityFile ~/.ssh/<MY-SPECIAL-KEY>
280 | 
281 | # Specify how to get to the gateway node by jumping through the bastion node
282 | # The gateway hostname is specified as the jump-host would see it
283 | Host swc-gateway
284 |     HostName hpc-gw2
285 |     User <SWC-USERNAME>
286 |     ProxyJump swc-bastion
287 |     IdentityFile ~/.ssh/<MY-SPECIAL-KEY>
288 | ```
289 | :::
290 | 
291 | Next, let's copy the public key you just generated to the remote machines.
292 | 
293 | ```{code-block} console
294 | $ ssh-copy-id -i id_ed25519.pub swc-gateway
295 | ```
296 | 
297 | ::: {dropdown} Explain the above command
298 | :color: info
299 | :icon: info
300 | The `ssh-copy-id` command uses the configuration we previously set up
301 | in the `config` file to figure out how to reach the remote machine.
302 | 
303 | It copies the specified public key to your home directory on the target machine (in this case `swc-gateway`) and adds it to the `.ssh/authorized_keys` file there.
304 | 
305 | Since your SWC home directory is shared across all HPC nodes, the public
306 | key will be available on all of them. That's why you only need to run the above command once, with either `swc-bastion` or `swc-gateway` as the target.
307 | :::
308 | 
309 | 
310 | 🎉 Congrats! You can now directly SSH into the *gateway* node without typing your password:
311 | ```{code-block} console
312 | $ ssh swc-gateway
313 | ```
314 | In case you want to SSH into the *bastion* node, you can do so by typing:
315 | ```{code-block} console
316 | $ ssh swc-bastion
317 | ```
318 | 


--------------------------------------------------------------------------------
/docs/source/programming/SSH-vscode.md:
--------------------------------------------------------------------------------
 1 | # SSH into an unmanaged machine using VSCode
 2 | > **_Example Usecase:_**  When working from home, connect from you local Mac to the SWC office Linux machine
 3 | 
 4 | In your local machine `cd` and ` open .ssh/config` and append the following configurations:
 5 | ```
 6 | Host *
 7 |     ServerAliveInterval 60
 8 | 
 9 | Host jump-host
10 |     User swcUserID
11 |     HostName ssh.swc.ucl.ac.uk
12 | 
13 | Host remote-host
14 |     User remoteMachineUsername
15 |     HostName 172.24.243.000
16 |     ProxyJump jump-host
17 | ```
18 | 
19 | Make sure to replace `172.24.243.000` with the IP address of your remote machine.
20 | On Ubuntu, you can find the IP address in this way:
21 | * Got to `Settings` then `Network`
22 | * Click on the cogwheel next to your connections (usually `Wired`)
23 | * The `IPv4` is the address you are looking for
24 | 
25 | If you do not have a config file in your .ssh folder, create one:
26 | ```bash
27 | cd .ssh/
28 | touch config
29 | ```
30 | 
31 | Connect to VPN, then use the `Open a remote window` (Remote - SSH extension) tool of vscode and connect to `remote-host`. You will be asked for your SWC and Linux passwords.
32 | 


--------------------------------------------------------------------------------
/docs/source/programming/Troubleshooting.md:
--------------------------------------------------------------------------------
 1 | # Troubleshooting
 2 | Small problems and their solutions.
 3 | 
 4 | ## Ubuntu distribution update error
 5 | This is an error that appeared when updating to a new Ubuntu distribution.
 6 | Error msg: Software Updater - Not all updates can be installed.
 7 | Solution: `sudo apt-get dist-upgrade`
 8 | 
 9 | ## Terminal is not opening after installing a new Python version
10 | If you installed a new Python version without the use of `conda`, there might be a mismatch in the python naming in a bin file.
11 | If it's possible open terminal through vscode or open a virtual terminal (VT) with CTRL + ALT + F3 and run `gnome-ternimal`.
12 | Does it throw a Python error? If yes run `sudo nano /usr/bin/gnome-terminal` and change `#!/usr/bin/python3` to `#!/usr/bin/python3.10` if the version you're currently using is 3.10.
13 | Exit the VT via CTRL + ALT + F2.
14 | 
15 | ## Detach a forked repo on GitHub
16 | If you forked a repo and want to detach it from the original repo, use the [Github chatbot-virtual-assistant](https://support.github.com/contact?tags=rr-forks&subject=Detach%20Fork&flow=detach_fork).
17 | Follow the instructions and wait for the response.
18 | 


--------------------------------------------------------------------------------
/docs/source/programming/index.md:
--------------------------------------------------------------------------------
 1 | # Programming
 2 | 
 3 | Guides related to general coding and software development issue, such as version control, programming environments, IDEs, Linux, the command line, etc.
 4 | 
 5 | Small tips and tricks that do not warrant a long-form guide can be found in the [Troubleshooting](Troubleshooting.md) section.
 6 | 
 7 | ```{toctree}
 8 | :maxdepth: 1
 9 | 
10 | SLURM-arguments
11 | SSH-SWC-cluster
12 | SSH-vscode
13 | vscode-with-slurm-job
14 | Mount-ceph-ubuntu
15 | Mount-ceph-ubuntu-temp
16 | Cookiecutter-cruft
17 | Troubleshooting
18 | ```
19 | 


--------------------------------------------------------------------------------
/docs/source/programming/vscode-with-slurm-job.md:
--------------------------------------------------------------------------------
 1 | # Using VSCode with interactive SLURM jobs on the SWC HPC cluster
 2 | 
 3 | This guide explains how to set up and use VSCode in a SLURM interactive job on the SWC HPC cluster, offering a solution for users who require fast access to shared storage or substantial computational resources.
 4 | 
 5 | This solution is easy to set up and constrained by the SLURM job's resource limits, as long as those limits are applied. This ensures that users can work efficiently within the allocated resources, preventing system overload. While the constraints depend on SLURM's enforcement (e.g., memory or time limits), users should remain mindful of their resource requests to avoid program terminations.
 6 | 
 7 | Furthermore, VSCode's code tunnel automatically forwards any HTTP servers launched from the compute node, such as Dash-Plotly apps or Jupyter Notebook servers, offering seamless integration.
 8 | 
 9 | 
10 | ## Instructions
11 | 
12 | First, open a terminal (not the VSCode terminal) and connect to the gateway node by running:
13 | 
14 | ```{code-block} console
15 | $ ssh <SWC-USERNAME>@ssh.swc.ucl.ac.uk
16 | $ ssh hpc-gw2
17 | ```
18 | 
19 | Once connected, request an interactive job via SLURM to access a compute node. For example:
20 | 
21 | ```{code-block} console
22 | $ srun -p cpu -n 4 --mem 8G --pty bash -i
23 | ```
24 | 
25 | In this example, `-p cpu` requests the "cpu" partition, with default time settings, though you may adjust this according to your needs.
26 | You can also try other partitions depending on your needs and node availability.
27 | For more information, see the [SLURM arguments primer](slurm-arguments-target).
28 | 
29 | After connecting to a compute node, initiate [VSCode Remote Tunnel](https://code.visualstudio.com/docs/remote/tunnels) by typing:
30 | 
31 | ```{code-block} console
32 | $ code tunnel
33 | ```
34 | 
35 | A URL (`https://github.com/login/device`) and a PIN code will appear in the terminal.
36 | Follow this link, log in with your GitHub credentials, and enter the provided PIN to authorize access.
37 | 
38 | You have two options to run VSCode:
39 | 
40 | -  **Run VSCode in the browser:**
41 |     After completing the above step, a second link will appear in the terminal (e.g., `https://vscode.dev/tunnel/<node-name>`), which you can follow to launch a VSCode browser-based session running directly on the HPC compute node. If you sign in to your VSCode account and have account syncing enabled, you will have your extensions and settings available.
42 | 
43 | - **Run VSCode on your local machine:**
44 |     If you want instead to use your local VSCode, install the "Remote - Tunnels" extension, click on "Open remote window" in the bottom left corner of the VSCode window, and select "Connect to Tunnel". You should see the node name in the list of available tunnels. Click on it to connect to the VSCode session running on the HPC compute node.
45 | 
46 | :::{note}
47 | The name associated with the tunnel may not match the node name assigned by SLURM. E.g., the assigned compute node may appear as `gpu-380-11` in SLURM, but the corresponding tunnel may be named `gpu-350-02` in VSCode. When using VSCode via the browser, the tunnel name is shown at the end of the URL (e.g., `https://vscode.dev/tunnel/<node-name>`).
48 | :::
49 | 
50 | If by mistake you close your terminal window, the tunnel will continue to run until you reach the time limit. To rejoin the SLURM job, you can use the following command if you know the job ID:
51 | 
52 | ```{code-block} console
53 | $ sattach <JOBID>.0
54 | ```
55 | 
56 | When you’re finished, simply exit the SLURM session to close the VSCode tunnel and release the assigned resources.
57 | 
58 | ::: {dropdown} Why do I have to authenticate via GitHub?
59 | :color: info
60 | :icon: info
61 | 
62 | As explained in [VSCode docs](https://code.visualstudio.com/docs/remote/tunnels) it serves as a secure way to authenticate the user and ensure that only the user who initiated the tunnel can access it:
63 | > Tunneling securely transmits data from one network to another via [Microsoft dev tunnels](https://learn.microsoft.com/azure/developer/dev-tunnels/overview).
64 | >
65 | > Both hosting and connecting to a tunnel requires authentication with the same Github or Microsoft account on each end. In both cases, VS Code will make outbound connections to a service hosted in Azure; no firewall changes are generally necessary, and VS Code doesn't set up any network listeners.
66 | >
67 | >Once you connect from a remote VS Code instance, an SSH connection is created over the tunnel in order to provide end-to-end encryption.
68 | :::
69 | 
70 | ## Additional benefits of code tunnel
71 | 
72 | One advantage of using VSCode's code tunnel is that it forwards any HTTP servers launched from the same node, such as Dash-Plotly apps or Jupyter Notebook servers. To launch your additional server, request a separate slurm job for the same compute node, e.g.:
73 | 
74 | ```{code-block} console
75 | $ srun -p cpu -w <node-name> -n 4 --mem 8G --pty bash -i
76 | ```
77 | When these are initiated, VSCode will notify you with a link that you can follow to access the server's UI directly.
78 | 


--------------------------------------------------------------------------------