├── .gitignore
├── CHANGELOG.md
├── FURTHER_RESOURCES.md
├── Good_Scientific_Code_logo.png
├── LICENSE.md
├── README.md
├── block0_intro
    └── block0_slides.pdf
├── block1_git
    └── block1_slides.pdf
├── block2_clearcode
    ├── badnames.jl
    ├── badnames_improved.jl
    ├── block2_slides.pdf
    ├── eratosthenis_sieve.jl
    ├── eratosthenis_sieve.py
    ├── eratosthenis_sieve_julian.jl
    ├── eratosthenis_sieve_pythonic.py
    ├── eratosthenis_sieve_solved.jl
    └── snippets.jl
├── block3_softwaredev
    ├── block3_slides.pdf
    ├── plot_timeseries_functions.py
    ├── plot_timeseries_monolithic.jl
    ├── plot_timeseries_monolithic.py
    ├── plot_timeseries_monolithic_script.jl
    ├── plot_timeseries_monolithic_source.jl
    ├── plot_timeseries_numpy.py
    ├── plot_timeseries_xarray.py
    ├── running_mean.py
    ├── temporal_means_generalized.jl
    ├── temporal_means_generalized_tests.jl
    └── temporal_means_monthly_specific.jl
├── block4_github
    └── block4_slides.pdf
├── block5_documentation
    ├── Documenter_GitHub_deploy.yaml
    ├── block5_slides.pdf
    ├── block5_slides_juliacon2024.pdf
    ├── documentation_workshop_description.md
    ├── markdown_example.md
    ├── running_mean_documented.py
    └── temporal_means_generalized_docstrings.jl
└── block6_sciencereprod
    └── block6_slides.pdf


/.gitignore:
--------------------------------------------------------------------------------
1 | *.pptx
2 | *.docx
3 | further_reading


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # CHANGELOG
 2 | 
 3 | Changelog for this Good Scientific Code Workshop is kept with respect to version v1 (the first published version).
 4 | 
 5 | ## 1.5
 6 | 
 7 | Some improvements in Block 6:
 8 | 
 9 | - Renamed to Open and Reproducible research
10 | - Solved the reproducibility vs replicability terminology with a dedicated slide
11 | - More emphasis on forgotten variables
12 | - Separated the block conceptually a bit more on reproducibility and openness
13 | - A couple of extra slides
14 | - Reliability of open source
15 | - Small victories for open source
16 | 
17 | ## 1.4
18 | 
19 | Many improvements in Block 3:
20 | 
21 | - Sequence of information and exercises completely re-done and improved drastically!
22 | - Added comments (instead of docstrings since we are before Block 3) to the Julia exercise solutions of temporal means
23 | - Importance of redesign
24 | - Several more slides on modular software + improved Agents.jl exposition and explanation
25 | - Big improvements in code clarity in Julia solved exercises
26 | - Extra slide on general tips
27 | - Global variables
28 | - More tips on unit tests
29 | - Several new memes!
30 | 
31 | 
32 | ## 1.3
33 | 
34 | Improvements on block 2 on clear code.
35 | Dedicated slides for shorter functions, for consistent naming, and other minor improvements.
36 | 
37 | ## 1.2
38 | 
39 | Drastically improved the Documentation block (block 5), in preparation for presenting it to JuliaCon 2024. The block is now more extensive with more useful information, much more examples and hyperlinks to external documentations, and much more exercises.
40 | 
41 | However, it focuses more on the Julia language as it was presented at JuliaCon2024.
42 | For Python users, one would need to **contribute** to this repository a couple of slides that
43 | 
44 | 1. Add examples from excellently documented Python packages
45 | 2. Add links to the Python go-to documentation builder (likely Sphinx)
46 | 3. Add premade GitHub CI files for building docs on GitHub CI.
47 | 
48 | As this material does not exist yet, the PDF for the _previous_ (v1) documentation block is kept in the repo as well, even though it has much less material.
49 | 
50 | We would recommend even Python users to just go through the JuliaCon2024 new block instead of the older version, due to its higher quality and numerous examples.
51 | 
52 | ## 1.1
53 | 
54 | Improved a lot the content on scientific project reproducibility (block 6) after presenting it to the JuliaHEP conference.
55 | 
56 | 


--------------------------------------------------------------------------------
/FURTHER_RESOURCES.md:
--------------------------------------------------------------------------------
 1 | # Further resources
 2 | The file contains links to useful material for good scientific coding. Some were already used to develop the workshop, some were discovered later.
 3 | Feel free to add more resources here via the form of a pull request!
 4 | 
 5 | ## Documentation
 6 | - [A beginner’s guide to docs](https://www.writethedocs.org/guide/writing/beginners-guide-to-docs/), by Eric Holscher (co-founder of readthedocs)
 7 | - [Unified Theory of Documentation](https://documentation.divio.com/) blogpost
 8 | - [Write The Docs](https://www.writethedocs.org/) community website
 9 | 
10 | ## Code writing and architecture
11 | - Clean Architecture - A Craftsman's Guide to Software Structure and Design, a textbook by Robert C Martin
12 | - Clean Code - A Handbook of Agile Software Craftsmanship, a textbook by Robert C Martin
13 | - [Single Responsibility Principle blog post](https://blog.cleancoder.com/uncle-bob/2014/05/08/SingleReponsibilityPrinciple.html), by Robert C Martin
14 | - [Clean Code Blog](https://blog.cleancoder.com/), by Robert C Martin
15 | 
16 | ## Reproducibility
17 | - [DrWatson paper](https://joss.theoj.org/papers/10.21105/joss.02673) and references therein
18 | - [A Guide to Reproducible Code](https://www.britishecologicalsociety.org/wp-content/uploads/2019/06/BES-Guide-Reproducible-Code-2019.pdf?utm_source=web&utm_medium=web&utm_campaign=better_science), by the British Ecological Society
19 | 
20 | ## Git and code collaboration
21 | - [Git information page on remotes](https://git-scm.com/book/en/v2/Git-Branching-Remote-Branches)
22 | - [Introduction to GitHub and Open-Source Projects](https://www.digitalocean.com/community/tutorial_series/an-introduction-to-open-source), by Digital Ocean
23 | - [How To Use Git: A Reference Guide](https://www.digitalocean.com/community/cheatsheets/how-to-use-git-a-reference-guide), by Digital Ocean
24 | - [How to contribute to open source](https://opensource.guide/how-to-contribute/), by GitHub
25 | 
26 | ## Full-range tutorials/workshops
27 | - [Code refinery lessons](https://coderefinery.org/lessons/)
28 | - [Software carpentry](https://software-carpentry.org/) organization website
29 | - [Short course on scientific computing and package development](https://www.youtube.com/watch?v=x4oi0IKf52w), by Tim Holy
30 | 


--------------------------------------------------------------------------------
/Good_Scientific_Code_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JuliaDynamics/GoodScientificCodeWorkshop/cf0daf5912f0bccb08b3c7f994ab24221e9c83fe/Good_Scientific_Code_logo.png


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | 
 2 | The MIT License (MIT)
 3 | 
 4 | Copyright (c) 2022 George Datseris (https://github.com/Datseris/)
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | of this software and associated documentation files (the "Software"), to deal
 8 | in the Software without restriction, including without limitation the rights
 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Good Scientific Code Workshop
 2 | ![](Good_Scientific_Code_logo.png)
 3 | 
 4 | [![DOI](https://zenodo.org/badge/515197201.svg)](https://zenodo.org/badge/latestdoi/515197201)
 5 | 
 6 | **Table of Contents**
 7 | 1. [Elevator Pitch](#elevator-pitch)
 8 | 2. [Information](#information)
 9 | 3. [Contents](#contents)
10 | 4. [Bring your own code!](#bring-your-own-code)
11 | 5. [YouTube recording](#youtube-recording)
12 | 6. [Citing](#citing)
13 | 
14 | 
15 | ## Elevator Pitch
16 | 
17 | > _Scientific code is notorious for being hard to read and navigate, difficult to reproduce, and badly documented. One reason leading to this situation is that curricula that traditionally train scientists do not explicitly treat writing good code, and during the scientific life there is little time for the individual to practice this on their own. In this intensive block-based-workshop we will change that and teach you all you need to know to write code that is **Clear, Easy to understand, Well-documented, Reproducible, Testable, Reliable, Reusable, Extendable, and Generic.**_
18 | >
19 | > _Sounds too good to be true…? Join this workshop, and you will be surprised to find out that attributing all these properties to your code does not take much effort; once you have the proper education on the subject, you get all of this good stuff practically for free!_
20 | 
21 | ## Information
22 | 
23 | This repository contains the materials (presentation slides and exercises) for the "Good Scientific Code" workshop. Powerpoint version of the slides (for editing) is available on demand.
24 | 
25 | The workshop is (mostly) language-agnostic, meaning that the principles are about general coding. Examples and exercises will be in Julia and Python.
26 | It is structured as an intensive six-blocks course, aimed to be performed with tutors overseeing the exercises parts and helping the students. Students and their tutors are recommended to go through each block, one by one, and spend as much time as necessary until the students have understood the block and were able to solve all exercises (especially the application to their own code, see below). The expected time span for this to happen is about a day or two for each block.
27 | 
28 | This workshop was developed over 3 years by [George Datseris](https://github.com/Datseris/), combining textbooks, other workshops, online tutorials from field experts, blog posts, personal experience developing and documenting 10+ software, and research on how to make reproducible science.
29 | 
30 | [Lukas Kluft](https://github.com/lkluft/) helped the workshop substantially by providing Python examples, translating Julia code to Python, reviewing the slides, and being a tutor during a live version.
31 | 
32 | ## Contents
33 | 
34 | The workshop is divided into the following six blocks:
35 | 
36 | - **Version control**: retraceable and safe code history using git
37 | - **Clear code**: write code that is easy to understand and reason for
38 | - **Software developing paradigms**: write your code like a software developer
39 | - **Collaboration & publishing code**: modern team-based code development on GitHub
40 | - **Documenting software**: documentation that conveys information efficiently and intuitively
41 | - **Scientific project reproducibility**: publish reproducible papers
42 | 
43 | ## Bring your own code!
44 | 
45 | The exercise sessions have two components. On the first, illustrative but simple exercises are given to the participants to practice each topic. Participants are expected to solve the exercises live during the workshop! The second component requires the participants to apply this new knowledge to their very own code base. Therefore, please bring along all code you have used in your latest published paper. If you haven't published yet, no worries, bring along all the code you have at the moment for your science project. Decide in advance on 2-3 figures of your paper/project, which will be the central focus of the exercises. The exercise plan will transform your code from random scripts to a self-contained code base that is understandable, extendable, continuously tested, documented, and hosted on open and accessible platforms.
46 | 
47 | _Note: for this plan to have meaningful impact, you should bring a code base where you had to write a substantial amount of source code._
48 | 
49 | ## YouTube recording
50 | 
51 | A video recording of the live version of this workshop, performed at the Max Planck Institute for Meteorology, is available on YouTube: https://youtu.be/x3swaMSCcYk .
52 | 
53 | ## Citing
54 | 
55 | You can cite this material using the DOI 10.5281/zenodo.7789872, or the following BibTeX entry:
56 | 
57 | ```
58 | @software{goodscientificcode,
59 |   author       = {George Datseris},
60 |   title        = {{Good scientific code workshop}},
61 |   month        = mar,
62 |   year         = 2023,
63 |   publisher    = {Zenodo},
64 |   version      = {v1.0},
65 |   doi          = {10.5281/zenodo.7789871},
66 |   url          = {https://doi.org/10.5281/zenodo.7789871}
67 | }
68 | ```
69 | 
70 | 


--------------------------------------------------------------------------------
/block0_intro/block0_slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JuliaDynamics/GoodScientificCodeWorkshop/cf0daf5912f0bccb08b3c7f994ab24221e9c83fe/block0_intro/block0_slides.pdf


--------------------------------------------------------------------------------
/block1_git/block1_slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JuliaDynamics/GoodScientificCodeWorkshop/cf0daf5912f0bccb08b3c7f994ab24221e9c83fe/block1_git/block1_slides.pdf


--------------------------------------------------------------------------------
/block2_clearcode/badnames.jl:
--------------------------------------------------------------------------------
 1 | # # Case 1
 2 | function traj(ds, t::Int, u = ds.u0; kwargs...)
 3 |     i = integrator(ds, u)
 4 |     trajdiscr(i, t; kwargs...)
 5 | end
 6 | 
 7 | function trajdiscr(i, t; dt::Int = 1, ttr::Int = 0)
 8 |     t0 = current_time(i)
 9 |     tv = (t0+ttr):dt:(t0+total_time+ttr)
10 |     ttr ≠ 0 && step!(i, ttr)
11 |     data = Vector{typeof((get_state(i))}(undef, L)
12 |     data = [get_state(i)]
13 |     for _ in 2:length(timevec)
14 |         step!(i, dt)
15 |         push!(data, get_state(i))
16 |     end
17 |     return timevec, data
18 | end
19 | 
20 | # Case 2
21 | function sat_pres(t)
22 |     e_eq_w_t = e_eq_water_mk.(t)
23 |     e_eq_i_t = e_eq_ice_mk.(t)
24 |     water = t .> constants.Ttr
25 |     ice = t .< (constants.Ttr - 23.0)
26 | 
27 |     e_eq = @. (
28 |         e_eq_i_t
29 |         + (e_eq_w_t - e_eq_i_t)
30 |         * ((t - constants.Ttr + 23) / 23) ^ 2
31 |     )
32 |     e_eq[ice] = e_eq_i_t[water]
33 |     e_eq[water] = e_eq_w_t[water]
34 |     return e_eq
35 | end


--------------------------------------------------------------------------------
/block2_clearcode/badnames_improved.jl:
--------------------------------------------------------------------------------
 1 | # # Case 1
 2 | function trajectory(ds::DiscreteDynamicalSystem, total_time::Int, state = ds.u0; kwargs...)
 3 |     integ = integrator(ds, state)
 4 |     trajectory_discrete(integ, total_time; kwargs...)
 5 | end
 6 | 
 7 | # Notice how we can use `integrator` inside the function!
 8 | # It is a local name, and we don't use the existing `DynamicalSystems.integrator`
 9 | # function, so it is valid to use. There is no name conflict.
10 | # However, generally speaking, I would not advice using established
11 | # library names (like `integrator` here) as variables, as it could lead to
12 | # confusion. In actual package code I would use `integ` instead of `integrator``
13 | function trajectory_discrete(integrator, total_time;
14 |         time_step::Int = 1, transient_time::Int = 0
15 |     )
16 |     t0 = current_time(integrator)
17 |     timevec = (t0+transient_time):time_step:(t0+total_time+transient_time)
18 |     transient_time ≠ 0 && step!(integrator, transient_time)
19 |     traj = [get_state(integrator)]
20 |     for _ in 2:length(timevec)
21 |         step!(integrator, time_step)
22 |         push!(traj, get_state(integrator))
23 |     end
24 |     return timevec, traj
25 | end
26 | 
27 | 
28 | 
29 | # Case 2
30 | function saturation_pressure(temperatures)
31 |     satur_press_water = equilibrium_vapor_pressure_water.(temperatures)
32 |     satur_press_ice = equilibrium_vapor_pressure_ice.(temperatures)
33 |     is_only_water = temperatures .> constants.triple_point_h2o
34 |     is_only_ice = temperatures .< (constants.triple_point_h2o - 23.0)
35 | 
36 |     satur_press = @. (
37 |         satur_press_ice
38 |         + (satur_press_water - satur_press_ice)
39 |         * ((temperatures - constants.triple_point_h2o + 23) / 23) ^ 2
40 |     )
41 |     satur_press[is_only_ice] = satur_press_ice[is_only_ice]
42 |     satur_press[is_only_water] = satur_press_water[is_only_water]
43 |     return satur_press
44 | end


--------------------------------------------------------------------------------
/block2_clearcode/block2_slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JuliaDynamics/GoodScientificCodeWorkshop/cf0daf5912f0bccb08b3c7f994ab24221e9c83fe/block2_clearcode/block2_slides.pdf


--------------------------------------------------------------------------------
/block2_clearcode/eratosthenis_sieve.jl:
--------------------------------------------------------------------------------
 1 | # This function generates prime numbers up to a user specified maximum `N`.
 2 | # The algorithm used is the Sieve of Eratosthenes.
 3 | # It is quite simple. Given an array of integers from 1 to `N`, cross out all multiples
 4 | # of 2. Find the next uncrossed integer, and cross out all of its multiples.
 5 | # Repeat until you have passed the square root of `N`.
 6 | # The uncrossed numbers that remain are all the primes less than `N`.
 7 | 
 8 | # quick notice: this is a verbatim port of a Java code to Julia,
 9 | # and as such it is unreasonably low level.
10 | 
11 | function eratosthenis_sieve(N)
12 |     if N >= 2 # the only valid case
13 |         # Declarations:
14 |         f = Bool[]
15 |         i = 0
16 |         # initialize array to true
17 |         for i in 1:N
18 |             push!(f, true)
19 |         end
20 | 
21 |         # get rid of known non-primes
22 |         f[1] = false
23 | 
24 |         # sieve
25 |         j = 0
26 |         for i in 2:round(Int, sqrt(N)+1)
27 |             if f[i] # if i is uncrossed, cross its multiples
28 |                 for j in 2*i:i:N
29 |                     f[j] = false # multiple is not a prime
30 |                 end
31 |             end
32 |         end
33 | 
34 |         # how many primes are there
35 |         count = 0
36 |         for i in 1:N
37 |             if f[i]
38 |                 count += 1
39 |             end
40 |         end
41 | 
42 |         primes = zeros(Int, count)
43 | 
44 |         # move the primes into the result
45 |         j = 1
46 |         for i in 1:N
47 |             if f[i] # if prime
48 |                 primes[j] = i
49 |                 j += 1
50 |             end
51 |         end
52 |         return primes # return the primes
53 |     else # if N < 2
54 |         return Int[] # return null array if bad input
55 |     end
56 | end
57 | 
58 | eratosthenis_sieve(10)


--------------------------------------------------------------------------------
/block2_clearcode/eratosthenis_sieve.py:
--------------------------------------------------------------------------------
 1 | # This function generates prime numbers up to a user specified maximum `N`.
 2 | # The algorithm used is the Sieve of Eratosthenes.
 3 | # It is quite simple. Given an array of integers from 1 to `N`, cross out all multiples
 4 | # of 2. Find the next uncrossed integer, and cross out all of its multiples.
 5 | # Repeat until you have passed the square root of `N`.
 6 | # The uncrossed numbers that remain are all the primes less than `N`.
 7 | 
 8 | # quick notice: this is a verbatic port of a Java code to Python,
 9 | # and as such it is unreasonably low level.
10 | import numpy as np
11 | 
12 | 
13 | def eratosthenis_sieve(N):
14 |     if N >= 2:  # the only valid case
15 |         # Declerations:
16 |         # initialize array to true
17 |         f = np.ones(N, dtype=bool)
18 | 
19 |         # get rid of known non-primes
20 |         f[:2] = False
21 | 
22 |         # sieve
23 |         for i in range(int(np.round(np.sqrt(N) + 1))):
24 |             if f[i]:  # if i is uncrossed, cross its multiples
25 |                 for j in range(i**2, N, i):
26 |                     f[j] = False  # multiple is not a prime
27 | 
28 |         # how many primes are there
29 |         count = 0
30 |         for i in range(N):
31 |             if f[i]:
32 |                 count += 1
33 | 
34 |         primes = np.zeros(count)
35 | 
36 |         # move the primes into the result
37 |         j = 0
38 |         for i in range(N):
39 |             if f[i]:  # if prime
40 |                 primes[j] = i
41 |                 j += 1
42 |         return primes  # return the primes
43 |     else:  # if N < 2
44 |         return np.empty(0)  # return null array if bad imput
45 | 
46 | 
47 | def test_primes():
48 |     assert 0 not in eratosthenis_sieve(10), "0 is not a prime."
49 |     assert 1 not in eratosthenis_sieve(10), "1 is not a prime."
50 |     assert len(eratosthenis_sieve(10)) == 4, "There are four primes until 10."
51 |     assert len(eratosthenis_sieve(100)) == 25, "There are 25 primes until 100."
52 | 
53 | 
54 | if __name__ == "__main__":
55 |     print(eratosthenis_sieve(10))
56 | 


--------------------------------------------------------------------------------
/block2_clearcode/eratosthenis_sieve_julian.jl:
--------------------------------------------------------------------------------
 1 | # For the first iteration, we will simply transform this function from Java to Julia.
 2 | # This shows so nicely why using a new language will skyrocket your productivity.
 3 | 
 4 | # Differences: arrays can be immediatelly initialized to a value. e.g. a `for`
 5 | # loop over this `f` does not exist. We use `f = trues(N)`.
 6 | # Variables like i, and j, do not need to be declared. Starting a loop already
 7 | # declares them.
 8 | # There is no reason to make a `for` loop to count the true elements in an array.
 9 | # that's literally what the `count` function does.
10 | function eratosthenis_sieve(N)
11 |     if N >= 2 # the only valid case
12 |         # Declerations:
13 |         f = trues(N)
14 |         # get rid of known non-primes
15 |         f[1] = false
16 | 
17 |         # sieve
18 |         for i in 2:round(Int, sqrt(N)+1)
19 |             if f[i] # if i is uncrossed, cross its multiples
20 |                 for j in 2*i:i:N
21 |                     f[j] = false # multiple is not a prime
22 |                 end
23 |             end
24 |         end
25 | 
26 |         # how many primes are there
27 |         count = Base.count(f)
28 | 
29 |         primes = zeros(Int, count)
30 | 
31 |         # move the primes into the result
32 |         j = 1
33 |         for i in 1:N
34 |             if f[i] # if prime
35 |                 primes[j] = i
36 |                 j += 1
37 |             end
38 |         end
39 |         return primes # return the primes
40 |     else # if N < 2
41 |         return Int[] # return null array if bad imput
42 |     end
43 | end
44 | 
45 | eratosthenis_sieve(10)


--------------------------------------------------------------------------------
/block2_clearcode/eratosthenis_sieve_pythonic.py:
--------------------------------------------------------------------------------
 1 | # This function generates prime numbers up to a user specified maximum `N`.
 2 | # The algorithm used is the Sieve of Eratosthenes.
 3 | # It is quite simple. Given an array of integers from 1 to `N`, cross out all multiples
 4 | # of 2. Find the next uncrossed integer, and cross out all of its multiples.
 5 | # Repeat until you have passed the square root of `N`.
 6 | # The uncrossed numbers that remain are all the primes less than `N`.
 7 | 
 8 | 
 9 | def eratosthenis_sieve(N):
10 |     prime = [i > 1 for i in range(N + 1)]  # (initialize 0 and 1 to False)
11 |     p = 0
12 |     while p * p <= N:
13 |         # If prime[p] is not changed, then it is a prime
14 |         if prime[p]:
15 |             # Updating all multiples of p
16 |             for i in range(p * p, N + 1, p):
17 |                 prime[i] = False
18 |         p += 1
19 | 
20 |     return [i for i, p in enumerate(prime) if p]
21 | 
22 | 
23 | def test_primes():
24 |     assert 0 not in eratosthenis_sieve(10), "0 is not a prime."
25 |     assert 1 not in eratosthenis_sieve(10), "1 is not a prime."
26 |     assert len(eratosthenis_sieve(10)) == 4, "There are four primes until 10."
27 |     assert len(eratosthenis_sieve(100)) == 25, "There are 25 primes until 100."
28 | 
29 | 
30 | if __name__ == "__main__":
31 |     print(eratosthenis_sieve(10))
32 | 


--------------------------------------------------------------------------------
/block2_clearcode/eratosthenis_sieve_solved.jl:
--------------------------------------------------------------------------------
 1 | # The second iteration is the "good code version". Differences:
 2 | # * better variable names
 3 | # * Initial `if` clause is removed in favor of an early return statement
 4 | # * the crossing of integers is made its own function
 5 | # * the final collection of primes uses an existing function from base Julia
 6 | #   that simply finds all true elements. Because in Julia array indexing starts from 1
 7 | #   and because our `isprime` also contains an entry for 1, the indices of `isprime`
 8 | #   that are true are also the prime numbers themselves.
 9 | 
10 | # God I love 1-based indexing.
11 | 
12 | """
13 |     eratosthenis_sieve(N::Int) → primes
14 | 
15 | Return a vector of all primes that are ≤ `N` using the the "Sieve of Eratosthenis".
16 | """
17 | function eratosthenis_sieve(N::Int)
18 |     N < 2 && return Int[]
19 |     isprime = trues(N)   # number `n` is prime if `isprime[n] == true`
20 |     isprime[1] = false   # 1 is not a prime number by definition
21 |     cross_prime_multiples!(isprime)
22 |     primes = findall(isprime)
23 | end
24 | 
25 | """
26 |     cross_prime_multiples!(isprime::AbstractVector{Bool})
27 | 
28 | For all primes in `isprime` (elements that are `true`), set all their multiples to `false`.
29 | Assumes `isprime` starts counting from 1.
30 | """
31 | function cross_prime_multiples!(isprime::AbstractVector{Bool})
32 |     N = length(isprime)
33 |     for i in 2:round(Int, sqrt(N)+1)
34 |         if isprime[i]
35 |             for j in 2i:i:N
36 |                 isprime[j] = false
37 |             end
38 |         end
39 |     end
40 | end
41 | 
42 | eratosthenis_sieve(100)


--------------------------------------------------------------------------------
/block2_clearcode/snippets.jl:
--------------------------------------------------------------------------------
  1 | # name comparison
  2 | 
  3 | function get_them(b)
  4 |     list = []
  5 |     for x in b
  6 |         if x.status == 4
  7 |             push!(list, x)
  8 |         end
  9 |     end
 10 |     return list
 11 | end
 12 | 
 13 | function flaggedcells(gameboard)
 14 |     flagged = []
 15 |     for cell in gameboard
 16 |         if cell.status == FLAGGED
 17 |             push!(flagged, cell)
 18 |         end
 19 |     end
 20 |     return flagged
 21 | end
 22 | 
 23 | # intention revealing names
 24 | a, b, c = 1, 2, 3
 25 | # versus
 26 | speed = 3
 27 | days_since_creation = 1
 28 | 
 29 | # magical constant
 30 | function add_nucleotide_matrices(m1, m2)
 31 |     L = size(m1)[1]
 32 |     out = zeros(L, 4)
 33 |     for i in 1:L
 34 |         for j in 1:4
 35 |             out[i, j] = m1[i, j] + m2[i, j]
 36 |         end
 37 |     end
 38 |     return out
 39 | end
 40 | 
 41 | # Verbosity
 42 | for left_index in 1:10
 43 |     for right_index in 1:10
 44 |         m[left_index, right_index] = rand()
 45 |     end
 46 | end
 47 | # versus
 48 | for i in 1:10
 49 |     for j in 1:10
 50 |         m[i, j] = rand()
 51 |     end
 52 | end
 53 | 
 54 | # unicode
 55 | cross(psi1, psi2)
 56 | ψ₁ ⊗ ψ₂
 57 | 
 58 | ∇ρ = gradient(ρ)
 59 | ⟨ε★⟩ = Γ * mean(ε★) / λ
 60 | 
 61 | F↑ = upwards_solar_radiation(data)
 62 | F↓ = downwards_solar_radiation(data)
 63 | α = F↑/F↓ # surface albedo
 64 | 
 65 | const ℜ = Real
 66 | 
 67 | 
 68 | 
 69 | # Functional programming
 70 | function load_sequence(id)
 71 |     # first find a suitable download location
 72 |     download_repo = nothing
 73 |     for repo in ALL_REPOS
 74 |         if id in repo.index
 75 |             download_repo = repo
 76 |             break
 77 |         end
 78 |     end
 79 |     isnothing(download_repo) && error("No download")
 80 | 
 81 |     # actually download the sequence
 82 |     validate(download_repo.connection, id)
 83 |     protein_sq = download_sq(id, download_repo)
 84 | 
 85 |     # Check sequence
 86 |     for aacid in "BXZJOU"
 87 |         if aacid in protein_sq
 88 |             error("Invalid sequence")
 89 |         end
 90 |     end
 91 | 
 92 |     return protein_sq
 93 | end
 94 | 
 95 | function load_sequence(id)
 96 |     repo = find_repo(id)
 97 |     protein_sq = download_sq(repo, id)
 98 |     validate(protein_sq)
 99 |     return protein_sq
100 | end
101 | 
102 | function load_sequence(id)
103 |     seq = download_seq(id)
104 | 
105 |     # calculate the complement; exchange C-G, A-T
106 |     complement_map = Dict(
107 |         'A' => 'T', 'T' => 'A',
108 |         'C' => 'G', 'G' => 'C'
109 |     )
110 |     complement = copy(seq)
111 |     not_recognized = 'N'
112 |     for i in 1:length(complement)
113 |         n = seq[i]
114 |         complement[i] = get(complement_map, n, not_recognized)
115 |     end
116 | 
117 |     cleaned_seq = remove_flaning_n(complement)
118 |     return cleaned_seq
119 | end
120 | 
121 | # unwanted side effect
122 | function check_authentification(username, password)
123 |     user = get_user(username)
124 |     encoded = encode(password)
125 |     if user.encoded_pass == encoded
126 |         initialize_user_session()
127 |         return true
128 |     else
129 |         return false
130 |     end
131 | end
132 | 
133 | 
134 | # %% Keyword propagation
135 | function plot_field_cor(X, Y; kwargs...)
136 |     z = spatial_cor(X, Y)
137 |     color = maximum(z) > 10.0 ? "C0" : "C1"
138 |     plot_field(z; color, kwargs...)
139 | end
140 | 
141 | function plot_field(X; color = "C0", marker = "o")
142 |     # do the actual plotting
143 | end
144 | 
145 | # main function
146 | function load_sequence(id)
147 |     seq = download_seq(id)
148 |     validate(seq)
149 |     return seq
150 | end
151 | 
152 | function download_seq(id)
153 |     # implementation
154 | end
155 | 
156 | function validate(seq)
157 |     # implementation
158 | end
159 | 
160 | 
161 | 
162 | # Bad comments:
163 | # validate protein sequence
164 | valid = validate_sequence(protein_seq)
165 | 
166 | # return true if all amino acids are valid
167 | !valid && error("Invalid")
168 | 
169 | toks = split(line)
170 | # toks[5] contains the raw p-value, toks[6] the test number
171 | adj_pval = calculate_adj_pval(float(toks[5]), float(toks[6]))
172 | 
173 | # versus:
174 | raw_pval = toks[5]; testno = toks[6]
175 | adj_pval = calculate_adj_pval(raw_pval, testno)
176 | 
177 | # Good comments
178 | # this test requires ~1h on local machine; can't run on CI
179 | @test crazy = longtest(val)
180 | 
181 | # matches genomic regions in the format <id>:<start>-<end>
182 | genomic_regex = r"([^:]+):(\d+)-(\d+)$"
183 | 
184 | # TODO: Generalize to higher dims
185 | for i in 1:5
186 |     stuff...
187 | end
188 | 
189 | # Perform eqs. (19) and (20) from Datseris et al., 2019
190 | x = 5y^2 + 2
191 | 
192 | B = 5 # magnetic field (in Tesla)
193 | V = 2.5 # potential (in eV)
194 | 
195 | # good value for peak detection
196 | const PEAK = 7
197 | 
198 | 
199 | # spacing
200 | b*b-4*a*c         # no
201 | b * b - 4 * a * c # no
202 | b*b - 4*a*c       # yes!
203 | 
204 | a = b && (x .< y)/(z^2) # no!
205 | a=b&&(x.<y)/(z ^ 2)     # yes
206 | 
207 | f(x,y;z=3)     # no!
208 | f(x, y; z = 3) # yes!
209 | f(x, y; z=3)   # debatable!
210 | 
211 | # Floats:
212 | # Yes:     # No:
213 | 0.1        .1
214 | 2.0        2.
215 | 3.0f0      3.f0
216 | 
217 | 
218 | # unicode
219 | ⊗ = crossproduct
220 | ψ₁ = gaussianstate(x, y)
221 | ψ₂ = gaussianstate(x, y+0.1)
222 | prod = ψ₁ ⊗ ψ₂
223 | 
224 | 
225 | 
226 | # Metrics implementation
227 | module Metrics
228 | 
229 | abstract type PreMetric end
230 | abstract type SemiMetric <: PreMetric end
231 | abstract type Metric <: SemiMetric end
232 | 
233 | """
234 |     distance(x, y, m::Metric)
235 | Calculate the distance between `x, y` according to the
236 | metric space defined by `m`, which can be any
237 | subtype of `Metric`.
238 | """
239 | function distance(x, y, metric::Metric)
240 |     error("Distance not implemented for $(typeof(metric))")
241 | end
242 | 
243 | struct Chebyshev <: Metric end
244 | function distance(x, y, ::Chebyshev)
245 |     s = zero(eltype(x))
246 |     for i in eachindex(x)
247 |         s += abs(x[i] - y[i])
248 |     end
249 |     return s
250 | end
251 | 
252 | function pairwise(metric::PreMetric, a, b = a)
253 |     r = zeros(length(a), length(b))
254 |     @inbounds for (j, bj) in enumerate(b)
255 |         for (i, ai) in enumerate(a)
256 |             r[i, j] = metric(ai, bj)
257 |         end
258 |     end
259 |     return r
260 | end
261 | 
262 | 
263 | 
264 | export Metric, Chebyshev, distance, pairwise
265 | end # module Metrics
266 | 
267 | 
268 | # Extension
269 | using Metrics
270 | struct RandomMetric <: Metric end
271 | function Metrics.distance(x, y, ::RandomMetric)
272 |     return rand()
273 | end
274 | 
275 | # now this works:
276 | x = y = [rand(100) for _ in 1:10]
277 | pairwise(x, y, RandomMetric())
278 | 
279 | 
280 | 
281 | 
282 | 
283 | # Testing code
284 | using Test # module from Standard Library
285 | 
286 | @testset "MyPackageTests" begin
287 |     @testset "arithmetic" begin include("math_tests.jl") end
288 |     @testset "trigonometric" begin include("trig_tests.jl") end
289 | end
290 | 
291 | # e.g., "math_tests.jl" has:
292 | @test 1 + 1 == 2
293 | @test 1 - 1 == 0
294 | 


--------------------------------------------------------------------------------
/block3_softwaredev/block3_slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JuliaDynamics/GoodScientificCodeWorkshop/cf0daf5912f0bccb08b3c7f994ab24221e9c83fe/block3_softwaredev/block3_slides.pdf


--------------------------------------------------------------------------------
/block3_softwaredev/plot_timeseries_functions.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import matplotlib.pyplot as plt
 4 | import numpy as np
 5 | import requests
 6 | 
 7 | 
 8 | def download_data(url):
 9 |     response = requests.get(url)
10 | 
11 |     temp = Path("temp")
12 |     temp.write_bytes(response.content)
13 |     timeseries = np.genfromtxt("temp")
14 |     temp.unlink()
15 | 
16 |     return timeseries
17 | 
18 | 
19 | def moving_average(t, x, w=12):
20 |     n = len(x)
21 |     m = np.zeros(len(x) - w)
22 |     m[0] = sum(x[:w]) / w
23 |     for i in range(1, n - w):
24 |         m[i] = m[i - 1] + (x[i + w] - x[i - 1]) / w
25 | 
26 |     return t[:-w], m
27 | 
28 | 
29 | def determine_trend(y):
30 |     x = np.arange(y.size) + 1
31 |     mx = np.mean(x)
32 |     my = np.mean(y)
33 |     b = np.cov(y, x, bias=y.mean())[0, 1] / np.var(x)
34 |     a = my - b * mx
35 |     trend = a + b * x
36 | 
37 |     return trend
38 | 
39 | 
40 | def nrmse(y, z):
41 |     n = np.size(y)
42 |     mse = np.sum(np.abs(y - z)) / n
43 |     msemean = np.sum(np.abs(y - np.mean(y))) / n
44 |     print(mse, msemean)
45 | 
46 |     return np.sqrt(mse / msemean)
47 | 
48 | 
49 | def plot_timeseries(x, ax=None):
50 |     if ax is None:
51 |         ax = plt.gca()
52 | 
53 |     t = np.arange(x.size)
54 |     ax.plot(t, x, linewidth=1)
55 | 
56 |     ax.plot(*moving_average(t, x), linewidth=2)
57 | 
58 |     trend = determine_trend(x)
59 |     rmse = nrmse(x, trend)
60 |     ax.plot(t, trend, linewidth=2, linestyle=":", label=f"nrmse={rmse:.3f}")
61 |     ax.legend()
62 | 
63 | 
64 | def main():
65 |     url = "https://raw.githubusercontent.com/JuliaDynamics/NonlinearDynamicsTextbook/master/exercise_data/11.csv"
66 |     timeseries = download_data(url)
67 | 
68 |     fig, axes = plt.subplots(nrows=2)
69 |     for ts, ax in zip(timeseries.T, axes.flatten()):
70 |         plot_timeseries(ts, ax=ax)
71 |     plt.show()
72 | 
73 | 
74 | if __name__ == "__main__":
75 |     main()
76 | 


--------------------------------------------------------------------------------
/block3_softwaredev/plot_timeseries_monolithic.jl:
--------------------------------------------------------------------------------
 1 | # Plot timesries
 2 | url = raw"https://raw.githubusercontent.com/JuliaDynamics/NonlinearDynamicsTextbook/master/exercise_data/11.csv"
 3 | w = 12
 4 | 
 5 | using DelimitedFiles
 6 | using Downloads
 7 | response = Downloads.request(url)
 8 | @assert response.status == 200 "URL doesn't exist!"
 9 | Downloads.download(url, "temp")
10 | X = try
11 |     readdlm("temp")
12 | catch err
13 |     throw(ArgumentError("Downloaded file isn't tabular text format!"))
14 | end
15 | rm("temp")
16 | timeseries = X[:,1], X[:,2]
17 | 
18 | moving_averaged = []
19 | for x in timeseries
20 |     n = length(x)
21 |     m = zeros(length(x)-w)
22 |     m[1] = sum(x[1:w])/w
23 |     for i in 2:n-w
24 |         m[i] = m[i-1] + (x[i+w] - x[i-1])/w
25 |     end
26 |     push!(moving_averaged, m)
27 | end
28 | 
29 | using Statistics: mean, covm, varm
30 | trends = []
31 | nrmses = []
32 | for y in timeseries
33 |     x = 1:length(y)
34 |     mx = mean(x)
35 |     my = mean(y)
36 |     b = covm(x, mx, y, my)/varm(x, mx)
37 |     a = my - b*mx
38 |     trend = @. a + b*x
39 |     push!(trends, trend)
40 |     my = my
41 |     n = length(y)
42 |     mse = sum(abs2(trend[i] - y[i]) for i in 1:n) / n
43 |     msemean = sum(abs2(y[i] - my) for i in 1:n) / n
44 |     nrmse = sqrt(mse/msemean)
45 |     push!(nrmses, nrmse)
46 | end
47 | 
48 | fig = figure()
49 | ax = subplot(2, 1, 1)
50 | x = timeseries[1]
51 | t = 1:length(timeseries[1])
52 | plot(t, timeseries[1]; linewidth = 1)
53 | plot(t[1:end-w], moving_averaged[1]; linewidth = 2)
54 | plot(t, trends[1]; linewidth = 2, linestyle = ":", label = "nrmse=$(nrmses[1])")
55 | ylabel("quantity 1")
56 | legend()
57 | ax = subplot(2, 1, 2)
58 | x = timeseries[2]
59 | t = 1:length(timeseries[2])
60 | plot(t, timeseries[2]; linewidth = 1)
61 | plot(t[1:end-w], moving_averaged[2]; linewidth = 2)
62 | plot(t, trends[2]; linewidth = 2, linestyle = ":", label = "nrmse=$(nrmses[2])")
63 | ylabel("quantity 2")
64 | legend()
65 | 


--------------------------------------------------------------------------------
/block3_softwaredev/plot_timeseries_monolithic.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import matplotlib.pyplot as plt
 4 | import numpy as np
 5 | import requests
 6 | 
 7 | 
 8 | url = "https://raw.githubusercontent.com/JuliaDynamics/NonlinearDynamicsTextbook/master/exercise_data/11.csv"
 9 | w = 12
10 | 
11 | response = requests.get(url)
12 | 
13 | temp = Path("temp")
14 | temp.write_bytes(response.content)
15 | timeseries = np.genfromtxt("temp")
16 | temp.unlink()
17 | 
18 | moving_averaged = []
19 | for x in timeseries.T:
20 |     n = len(x)
21 |     m = np.zeros(len(x) - w)
22 |     m[0] = sum(x[:w]) / w
23 |     for i in range(1, n - w):
24 |         m[i] = m[i - 1] + (x[i + w] - x[i - 1]) / w
25 |     moving_averaged.append(m)
26 | 
27 | trends = []
28 | nrmses = []
29 | 
30 | for y in timeseries.T:
31 |     x = np.arange(y.size) + 1
32 |     mx = np.mean(x)
33 |     my = np.mean(y)
34 |     b = np.cov(y, x, bias=y.mean())[0, 1] / np.var(x)
35 |     a = my - b * mx
36 |     trend = a + b * x
37 |     trends.append(trend)
38 | 
39 |     n = y.size
40 |     mse = np.sum(np.abs(trend - y)) / n
41 |     msemean = np.sum(np.abs(y - my)) / n
42 |     nrmse = np.sqrt(mse / msemean)
43 |     nrmses.append(nrmse)
44 | 
45 | fig, axes = plt.subplots(nrows=2)
46 | for i, x in enumerate(timeseries.T):
47 |     t = np.arange(x.size)
48 |     axes[i].plot(t, x, linewidth=1)
49 |     axes[i].plot(t[:-w], moving_averaged[i], linewidth=2)
50 |     axes[i].plot(
51 |         t, trends[i], linewidth=2, linestyle=":", label=f"nrmse={nrmses[i]:.3f}"
52 |     )
53 |     axes[i].set_ylabel(f"quantity {i + 1}")
54 |     axes[i].legend()
55 | plt.show()
56 | 


--------------------------------------------------------------------------------
/block3_softwaredev/plot_timeseries_monolithic_script.jl:
--------------------------------------------------------------------------------
 1 | # plot timeseries
 2 | # Here is a descritpion for the script.
 3 | include("plot_timeseries_source.jl")
 4 | url = raw"https://raw.githubusercontent.com/JuliaDynamics/NonlinearDynamicsTextbook/master/exercise_data/11.csv"
 5 | w = 12
 6 | 
 7 | timeseries = download_timeseries(url)
 8 | moving_averaged = moving_average.(timeseries, w)
 9 | trends = fit_trend.(timeseries)
10 | nrmses = nrmse.(trends, timeseries)
11 | 
12 | fig = figure()
13 | for i in 1:length(timeseries)
14 |     ax = subplot(2, 1, i)
15 |     x = timeseries[i]
16 |     t = 1:length(timeseries[i])
17 |     plot(t, timeseries[i]; linewidth = 1)
18 |     plot(t[1:end-w], moving_averaged[i]; linewidth = 2)
19 |     plot(t, trends[1]; linewidth = 2, linestyle = ":", label = "nrmse=$(nrmses[i])")
20 |     ylabel("quantity $i")
21 |     legend()
22 | end
23 | 


--------------------------------------------------------------------------------
/block3_softwaredev/plot_timeseries_monolithic_source.jl:
--------------------------------------------------------------------------------
 1 | using DelimitedFiles
 2 | using Downloads
 3 | function download_timeseries(url)
 4 |     response = Downloads.request(url)
 5 |     @assert response.status == 200 "URL doesn't exist!"
 6 |     Downloads.download(url, "temp")
 7 |     X = try
 8 |         readdlm("temp")
 9 |     catch err
10 |         throw(ArgumentError("Downloaded file isn't tabular text format!"))
11 |     end
12 |     rm("temp")
13 |     timeseries = eachcol(x)
14 | end
15 | 
16 | function moving_average(x, w)
17 |     n = length(x)
18 |     m = zeros(length(x)-w)
19 |     m[1] = sum(x[1:w])/w
20 |     for i in 2:n-w
21 |         m[i] = m[i-1] + (x[i+w] - x[i-1])/w
22 |     end
23 |     return m
24 | end
25 | 
26 | using Statistics: mean, covm, varm
27 | function fit_trend(y, x = 1:length(y))
28 |     x = 1:length(y)
29 |     mx = mean(x)
30 |     my = mean(y)
31 |     b = covm(x, mx, y, my)/varm(x, mx)
32 |     a = my - b*mx
33 |     trend = @. a + b*x
34 |     return trend, nrmse(y, trend, my)
35 | end
36 | 
37 | function nrmse(y, z, my = mean(y))
38 |     n = length(y)
39 |     mse = sum(abs2(z[i] - y[i]) for i in 1:n) / n
40 |     msemean = sum(abs2(y[i] - my) for i in 1:n) / n
41 |     nrmse = sqrt(mse/msemean)
42 | end


--------------------------------------------------------------------------------
/block3_softwaredev/plot_timeseries_numpy.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import numpy as np
 3 | import requests
 4 | from io import StringIO
 5 | 
 6 | url = "https://raw.githubusercontent.com/JuliaDynamics/NonlinearDynamicsTextbook/master/exercise_data/11.csv"
 7 | w = 12
 8 | 
 9 | response = requests.get(url)
10 | timeseries = np.genfromtxt(StringIO(response.text))
11 | 
12 | fig, axes = plt.subplots(nrows=2)
13 | for ts, ax in zip(timeseries.T, axes):
14 |     x = np.arange(ts.shape[0])
15 |     moving_average = np.convolve(ts, np.ones(w) / w, mode="same")
16 |     popt = np.polyfit(x, ts, deg=1)
17 |     rmse = np.sqrt(np.mean((np.polyval(popt, x) - ts) ** 2))
18 | 
19 |     ax.plot(x, ts)
20 |     ax.plot(x, moving_average)
21 |     ax.plot(x, np.polyval(popt, x), ls=":", label=f"nrmse={rmse:.3f}")
22 |     ax.legend()
23 | plt.show()
24 | 


--------------------------------------------------------------------------------
/block3_softwaredev/plot_timeseries_xarray.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import numpy as np
 3 | import pandas as pd
 4 | import xarray as xr
 5 | 
 6 | 
 7 | url = "https://raw.githubusercontent.com/JuliaDynamics/NonlinearDynamicsTextbook/master/exercise_data/11.csv"
 8 | w = 12
 9 | 
10 | ds = pd.read_csv(url, delimiter="\t", names=["var1", "var2"]).to_xarray()
11 | 
12 | fig, axes = plt.subplots(nrows=2)
13 | for var, ax in zip(ds, axes):
14 |     ds[var].plot(ax=ax)
15 |     ds[var].rolling(index=w).mean().plot(ax=ax)
16 |     p = ds[var].polyfit(deg=1, dim="index")
17 |     rmse = np.sqrt(
18 |         np.mean((xr.polyval(ds.index, p.polyfit_coefficients) - ds[var]) ** 2)
19 |     ).data
20 |     xr.polyval(ds.index, p.polyfit_coefficients).plot(
21 |         ls=":", label=f"nrmse={rmse:.3f}", ax=ax
22 |     )
23 |     ax.legend()
24 | plt.show()
25 | 


--------------------------------------------------------------------------------
/block3_softwaredev/running_mean.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | 
 4 | 
 5 | def running_mean(x, win_size):
 6 |     return np.convolve(x, np.ones(w) / w, mode="valid")
 7 | 
 8 | 
 9 | def running_mean(x, win_size, win_type=np.ones):
10 |     w = win_type(win_size)
11 |     return np.convolve(x, w / w.sum(), mode="valid")
12 | 
13 | 
14 | def running_mean(x, win_size, win_type=np.ones, aggregation=None):
15 |     w = win_type(win_size)
16 | 
17 |     if aggregation is None:
18 |         # In the default case, we can use fast Fourier transform
19 |         return np.convolve(x, w / w.sum(), mode="valid")
20 |     else:
21 |         # In the generic case, we have to group our array which is slower.
22 |         # Therefore, it is good to have a separate API (i.e. keyword) for this case.
23 |         return aggregation(
24 |             np.lib.stride_tricks.sliding_window_view(x, win_size) * w,
25 |             axis=1,
26 |         ) / w.mean()
27 | 
28 | 
29 | def main():
30 |     windows = (
31 |         np.ones,
32 |         np.bartlett,
33 |         np.blackman,
34 |         np.hamming,
35 |     )
36 | 
37 |     np.random.seed(1)
38 |     x = np.random.randn(256) + 2
39 | 
40 |     fig, ax = plt.subplots(figsize=(10, 6))
41 |     ax.plot(x, c="grey")
42 |     for window in windows:
43 |         ax.plot(
44 |             running_mean(x, win_size=16, win_type=window),
45 |             linewidth=2,
46 |             label=window.__name__.capitalize(),
47 |         )
48 |     ax.legend()
49 |     ax.set_ylim(np.percentile(x, [5, 95]))
50 | 
51 |     plt.show()
52 | 
53 | 
54 | if __name__ == "__main__":
55 |     main()
56 | 


--------------------------------------------------------------------------------
/block3_softwaredev/temporal_means_generalized.jl:
--------------------------------------------------------------------------------
 1 | #=
 2 | This version shows the code that generalizable and is practically a simplification
 3 | of the code surrounding `monthlyagg` and co. in ClimateBase.jl.
 4 | =#
 5 | using Dates
 6 | using Statistics
 7 | 
 8 | # This is the "main" function of the codebase:
 9 | # it performs a temporal aggregation over provided "window"
10 | # using provided aggregator. The "window" function can actually be anything
11 | # that takes in a date and returns a value: consecutive entries with the same
12 | # value belong to the same temporal window.
13 | # The function then returns the coarse (middle point)
14 | # time vector of the windows and the corresponding aggregated timeseries
15 | function temporal_aggregation(t::AbstractVector{<:TimeType}, x::AbstractVector;
16 |         aggregator = mean, window = Dates.month
17 |     )
18 |     tranges = temporal_ranges(t, window)
19 |     y = [aggregator(view(x, r)) for r in tranges]
20 |     coarse_t = [middle_date(t[r[1]], t[r[end]]) for r in tranges]
21 |     return coarse_t, y
22 | end
23 | 
24 | # splits the dates into ranges, each range corresponding to the requested window
25 | function temporal_ranges(t::AbstractArray{<:TimeType}, window = Dates.month)
26 |     issorted(t) || error("Sorted time vector required.") # checker step!
27 |     L = length(t)
28 |     ranges = Vector{UnitRange{Int}}() # initialize an empty vector
29 |     i, previous = 1, window(t[1])
30 |     for j in 2:L
31 |         current = window(t[j])
32 |         previous == current && continue
33 |         push!(ranges, i:(j-1))
34 |         i, previous = j, current
35 |     end
36 |     push!(ranges, i:L) # final range not included in for loop
37 |     return ranges
38 | end
39 | 
40 | # function for creating a new time vector for the aggregated timeseries
41 | middle_date(t0, t1) = DateTime(t0) + (DateTime(t1) - DateTime(t2))/2
42 | 
43 | # the previous month mean functionality can still exist as a convenience function:
44 | monthlymeans(t, x) = temporal_aggregation(t, x; aggregator = mean, window = Dates.month)
45 | 
46 | # Test from monthly means exercise
47 | t = Date(2015, 1, 1):Day(1):Date(2020, 12, 31)
48 | x = float.(month.(t))
49 | m, y = monthlymeans(t, x)
50 | 
51 | # So how do we do it with summer and winter? Easy; just make a window function
52 | # that assigns the same value to all summer months, and another value to all winter months!
53 | summer(x) = month(x) ∈ (3,4,5,6,7,8)
54 | m, y = temporal_aggregation(t, x; window = summer)
55 | 


--------------------------------------------------------------------------------
/block3_softwaredev/temporal_means_generalized_tests.jl:
--------------------------------------------------------------------------------
 1 | include("temporal_means_generalized.jl")
 2 | using Dates
 3 | using Test
 4 | using Statistics
 5 | 
 6 | # This file tests only the `monthlymeans, monthlyagg` functions.
 7 | # Of course, many more tests should be written for aggregating
 8 | # over different time spans!
 9 | monthlyagg(x, t; aggregator = mean) = temporal_aggregation(t, x; aggregator, window = Dates.month)
10 | 
11 | @testset "monthly means" begin
12 |     @testset "API" begin
13 |         t2 = [Date(2003, 3, 15), Date(2001, 11, 2)]
14 |         @test_throws AssertionError monthlyagg(t2, rand(2))
15 |         t = [Date(2003, 3, 15), Date(2004, 11, 2)]
16 |         w, y = monthlymeans(t, x)
17 |         w2, y2 = monthlyagg(t, x)
18 |         @test w == w2
19 |         @test y == y2
20 |     end
21 |     @testset "standard ranges" begin
22 |         # hourly
23 |         t = DateTime(2000, 3, 1):Hour(1):DateTime(2001, 4, 15)
24 |         x = [month(a) for a in t]
25 |         w, y = monthlyagg(t, x)
26 |         @test length(y) == length(w) == 14
27 |         # all output midpoints must have day in 14, 15, 16
28 |         # irrespectively of month, because of how dates work.
29 |         # However, last month has half the days!
30 |         for i in 1:length(w)-1
31 |             @test 14 ≤ day(w[i]) ≤ 16
32 |         end
33 |         @test 7 ≤ day(w[end]) ≤ 8
34 |         @test y == month.(w)
35 |         # daily
36 |         t = Date(2001, 3, 1):Day(1):Date(2002, 1, 31)
37 |         x = [month(a) for a in t]
38 |         w, y = monthlyagg(t, x)
39 |         @test length(y) == length(w) == 11
40 |         # Mid point day depends on whether month has 30 or 31 days
41 |         # (no leap years on this span)
42 |         mids = [isodd(daysinmonth(a)) ? 16 : 15 for a in w]
43 |         @test day.(w) == mids
44 |         @test y == month.(w)
45 |     end
46 |     @testset "already monthly" begin
47 |         t1 = Date(2000, 3, 15):Month(1):Date(2005, 3, 31)
48 |         t2 = [Date(2003, 3, 15), Date(2004, 11, 2)]
49 |         for t in (t1, t2)
50 |             x = ones(length(t))
51 |             w, y = monthlyagg(t, x)
52 |             @test all(isequal(1), y)
53 |             @test w == t
54 |         end
55 |     end
56 |     @testset "actual averaging" begin
57 |         t = [Date(2001, 3, 1), Date(2001, 3, 2), Date(2001, 4, 1), Date(2001, 4, 2)]
58 |         x = [1, 2, 1, 2]
59 |         # output must have 2 months, each with the average of 1, 2, i.e., 1.5
60 |         w, y = monthlyagg(t, x)
61 |         @test length(y) == length(w) == 2
62 |         @test y[1] == y[2] == 1.5
63 |     end
64 |     @testset "different aggregations" begin
65 |         t = [Date(2001, 3, 1), Date(2001, 3, 2), Date(2001, 4, 1), Date(2001, 4, 2)]
66 |         x = [1, 3, 1, 3]
67 |         s2 = sqrt(2)
68 |         # std
69 |         w, y = monthlyagg(t, x; aggregator = std)
70 |         @test y[1] == y[2] == s2
71 |         # length
72 |         w, y = monthlyagg(t, x; aggregator = length)
73 |         @test y[1] == y[2] == 2
74 |         # negative fractions
75 |         x = [0.5, -0.5, 0.25, 0.1]
76 |         neg_fracts(vals) = count(v -> v < 0, vals)/length(vals)
77 |         w, y = monthlyagg(t, x; aggregator = neg_fracs)
78 |         @test y[1] == 0.5
79 |         @test y[2] == 0
80 |     end
81 | end
82 | 


--------------------------------------------------------------------------------
/block3_softwaredev/temporal_means_monthly_specific.jl:
--------------------------------------------------------------------------------
 1 | #=
 2 | This version shows the code that is not generalizable and could be
 3 | the typical way a scientist would write the solution to the exercise
 4 | =#
 5 | using Dates
 6 | using Statistics
 7 | 
 8 | function monthlymeans(t::AbstractVector{<:TimeType}, x::Vector; daynumber = 15)
 9 |     @assert issorted(t)
10 |     @assert daynumber ≤ 28
11 |     startdate = Date(year(t[1]), month(t[1]), daynumber)
12 |     finaldate = Date(year(t[end]), month(t[end]), daynumber)
13 |     m = startdate:Month(1):finaldate
14 |     output = average_over_same_months(t, x, m)
15 |     return m, output
16 | end
17 | 
18 | function average_over_same_months(t, x, m)
19 |     output = zeros(length(m)) # output, monthly means of `x`
20 |     first_index_in_month = 1
21 |     for j in 1:length(m)
22 |         current_month = month(m[j])
23 |         # Define search range so that we start search from current index
24 |         search_range = first_index_in_month:length(t)
25 |         k = findfirst(i -> month(t[i]) ≠ current_month, search_range)
26 |         if isnothing(k) # we didn't find any index with different month
27 |             last_index_in_month = length(t)
28 |         else
29 |             last_index_in_month = search_range[k - 1]
30 |         end
31 |         output[j] = mean(x[first_index_in_month:last_index_in_month])
32 |         first_index_in_month = last_index_in_month + 1
33 |     end
34 |     return output
35 | end
36 | 
37 | # Testing vectors
38 | t = Date(2015, 1, 1):Day(1):Date(2020, 12, 31)
39 | x = float.(month.(t))
40 | m, output = monthlymeans(t, x)
41 | 


--------------------------------------------------------------------------------
/block4_github/block4_slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JuliaDynamics/GoodScientificCodeWorkshop/cf0daf5912f0bccb08b3c7f994ab24221e9c83fe/block4_github/block4_slides.pdf


--------------------------------------------------------------------------------
/block5_documentation/Documenter_GitHub_deploy.yaml:
--------------------------------------------------------------------------------
 1 | name: Documentation
 2 | # This is a standard Julia setup for GitHub CI
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main # update to match your main branch
 7 |     tags: '*'
 8 |   pull_request:
 9 | 
10 | jobs:
11 |   build:
12 |     permissions:
13 |       contents: write
14 |       pull-requests: read
15 |       statuses: write
16 |     runs-on: ubuntu-latest
17 |     steps:
18 |       - uses: actions/checkout@v4
19 |       - uses: julia-actions/setup-julia@v2
20 |         with:
21 |           version: '1'
22 |       - uses: julia-actions/cache@v1
23 | 
24 | # Now this is where things focus on the documentation
25 | # build and deployment.
26 | # The next steps continue with the `doc` folder build.
27 | 
28 |       # This is really all you care about; 2-steps process
29 |       # step 1, assuming the documentation Project.toml file is in `docs` folder
30 |       # This works for the standard Julia package layouts (i.e. a Project.toml and
31 |       # a src/ directory at the top-level).
32 |       - name: Install docs dependencies
33 |         run: julia -e 'using Pkg; Pkg.develop(path="."); Pkg.instantiate()'
34 |         # If you are committing the docs/Manifest.toml file, to fully fix the package
35 |         # versions used by the documentation package environment, you can use this
36 |         # line instead. You just need to make sure that the package is a develop-dependency
37 |         # in the docs/Manifest.toml
38 |         #run: julia --project=docs/ -e 'using Pkg; Pkg.instantiate()'
39 |       # step 2 (note project path remains the same!)
40 |       - name: Build and deploy
41 |         env:
42 |           # An access token must be provided to "push" the documentation build.
43 |           # GitHub automates this via your account with this line:
44 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
45 |           # See the discussion around "TagBot" in the Documenter.jl website:
46 |           # https://documenter.juliadocs.org/stable/man/hosting/#GitHub-Actions
47 |           # for automating creating tagged documentation builds after
48 |           # registering your package in the Julia General Registry
49 |         run: julia --project=docs/ docs/make.jl
50 | 


--------------------------------------------------------------------------------
/block5_documentation/block5_slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JuliaDynamics/GoodScientificCodeWorkshop/cf0daf5912f0bccb08b3c7f994ab24221e9c83fe/block5_documentation/block5_slides.pdf


--------------------------------------------------------------------------------
/block5_documentation/block5_slides_juliacon2024.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JuliaDynamics/GoodScientificCodeWorkshop/cf0daf5912f0bccb08b3c7f994ab24221e9c83fe/block5_documentation/block5_slides_juliacon2024.pdf


--------------------------------------------------------------------------------
/block5_documentation/documentation_workshop_description.md:
--------------------------------------------------------------------------------
 1 | This workshop is all about documentation. It's one of the most important aspects of code and software: it makes your code accessible to others, makes their experience with it pleasant, makes them wanna keep using it for the long run, and lowers the threshold for them to become contributors! Whether you are a maintainer of several registered Julia packages, or a just someone with an unregistered codebase that you believe can be useful for others, this workshop will be of use to you!
 2 | 
 3 | The workshop will be fully hands-on, and will be composed by several iterations of [lecture -> application] for the covered topics. The main things it will cover are (among others, and depending on the real-time flow):
 4 | 
 5 | 1. Core principles of what makes a good documentation.
 6 | 1. The skeleton of a good documentation: exceptional, unambiguous docstrings.
 7 | 1. The different depths of exposition approach to documentation.
 8 | 1. The introductory tutorial.
 9 | 1. Layouting documentation pages.
10 | 1. Tips & tricks for increasing clarity in the docs.
11 | 1. Intro to Documenter.jl: key syntax and functions.
12 | 1. Using Documenter.jl to generate runnable examples.
13 | 1. Using Documenter.jl to deploy documentation online on a GitHub repository.
14 | 1. Using DocumenterCitations.jl to cite literature robustly.
15 | 1. Tips for reducing maintenance burden.


--------------------------------------------------------------------------------
/block5_documentation/markdown_example.md:
--------------------------------------------------------------------------------
 1 | # Markdown example
 2 | Markdown is a way to style text by using plain text.
 3 | It was created by John Gruber in 2004. It is widely used on the web and is the best tool for creating documentation 😄
 4 | 
 5 | ## Basics
 6 | Headers denote sections. They are text prefaced with one to six `#` and then an empty space.
 7 | 
 8 | - Markdown can do lists like the one you read now with `-`
 9 | - Make text **bold** or *italic*, and can even do [links](www.google.com)
10 | 
11 | 1. Numbered lists, like the one you read now.
12 | 2. Tables, in-line `code`, code blocks, horizontal lines, and many more!
13 | 3. See https://www.markdownguide.org/ for more!
14 | 
15 | Here is a code block with Julia highlighting:
16 | ```julia
17 | rand()
18 | x = 1 + 1
19 | ```
20 | 
21 | This text is already highlighted in my VSCode editor!


--------------------------------------------------------------------------------
/block5_documentation/running_mean_documented.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | 
 4 | 
 5 | def running_mean(x, win_size):
 6 |     """Compute a running mean with a given window size.
 7 | 
 8 |     Parameters:
 9 |         x (ndarray): Input data.
10 |         win_size (int): Window size.
11 | 
12 |     Returns:
13 |         ndarray: Smoothened data.
14 |     """
15 |     return np.convolve(x, np.ones(w) / w, mode="valid")
16 | 
17 | 
18 | def running_mean(x, win_size, win_type=np.ones):
19 |     """Compute a running mean with a given window size.
20 | 
21 |     Parameters:
22 |         x (ndarray): Input data.
23 |         win_size (int): Window size.
24 |         win_type (callable): A callable object that when passed a `win_size`
25 |             will return an array of weights.
26 | 
27 |     Returns:
28 |         ndarray: Smoothened data.
29 |     """
30 |     w = win_type(win_size)
31 |     return np.convolve(x, w / w.sum(), mode="valid")
32 | 
33 | 
34 | def running_mean(x, win_size, win_type=np.ones, aggregation=None):
35 |     """Compute a running mean with a given window size.
36 | 
37 |     Parameters:
38 |         x (ndarray): Input data.
39 |         win_size (int): Window size.
40 |         win_type (callable): A callable object that when passed a `win_size`
41 |             will return an array of weights.
42 |         aggregation (callable): A callable object which aggregates the data
43 |             within the window region. By default, the function computes a
44 |             running mean.
45 | 
46 |     Note:
47 |         In the default configuration, i.e. a running mean, the function makes
48 |         use of a convolution which is implemented in a very efficient way.
49 |         When passing the `aggregation` keyword this approach is no
50 |         longer feasible because the data has to be explicitly "grouped".
51 | 
52 |     Returns:
53 |         ndarray: Smoothened data.
54 |     """
55 |     w = win_type(win_size)
56 | 
57 |     if aggregation is None:
58 |         # In the default case, we can use fast Fourier transform
59 |         return np.convolve(x, w / w.sum(), mode="valid")
60 |     else:
61 |         # In the generic case, we have to group our array which is slower.
62 |         # Therefore, it is good to have a separate API (i.e. keyword) for this case.
63 |         return aggregation(
64 |             np.lib.stride_tricks.sliding_window_view(x, win_size) * w,
65 |             axis=1,
66 |         ) / w.mean()
67 | 
68 | 
69 | def main():
70 |     windows = (
71 |         np.ones,
72 |         np.bartlett,
73 |         np.blackman,
74 |         np.hamming,
75 |     )
76 | 
77 |     np.random.seed(1)
78 |     x = np.random.randn(256) + 2
79 | 
80 |     fig, ax = plt.subplots(figsize=(10, 6))
81 |     ax.plot(x, c="grey")
82 |     for window in windows:
83 |         ax.plot(
84 |             running_mean(x, win_size=16, win_type=window),
85 |             linewidth=2,
86 |             label=window.__name__.capitalize(),
87 |         )
88 |     ax.legend()
89 |     ax.set_ylim(np.percentile(x, [5, 95]))
90 | 
91 |     plt.show()
92 | 
93 | 
94 | if __name__ == "__main__":
95 |     main()
96 | 


--------------------------------------------------------------------------------
/block5_documentation/temporal_means_generalized_docstrings.jl:
--------------------------------------------------------------------------------
 1 | using Dates
 2 | using Statistics
 3 | 
 4 | """
 5 |     monthlymeans(t, x) → w, y
 6 | Given a time vector `t` and `x` some values corresponding to it,
 7 | average `x` over months, and return `w`, the new monthly-spaced time vector
 8 | and `y` the monthly-averaged `x`.
 9 | 
10 | See also [`monthlyagg`](@ref).
11 | """
12 | monthlymeans(t, x) = monthlyagg(t, x; agg = mean)
13 | 
14 | """
15 |     monthlyagg(t, x; agg = mean) → w, y
16 | Given a time vector `t` and `x` some values corresponding to it,
17 | aggregate `x` over months. Return `w`, the new monthly-spaced time vector
18 | corresponding to the aggregates, and `y` the aggregated value.
19 | 
20 | Aggregation is done via `agg` which maps a vector of values to a number.
21 | 
22 | See also [`temporal_aggregation`](@ref).
23 | """
24 | function monthlyagg(t, x; agg = mean)
25 |     return temporal_aggregation(t, x; agg, info = Dates.month)
26 | end
27 | 
28 | """
29 |     temporal_aggregation(t::AbstractVector{<:TimeType}, x::Vector;
30 |     agg = mean, info = Dates.month)
31 | Calculate the temporally aggregated version of `x`, where it has been aggregated
32 | over periods of time dictated by the `info` function and the time vector `t`.
33 | Return `w, y` with `y` the aggregated values and `w` a new time vector
34 | corresponding to `y`. `w` has the temporal mid point of each used intervals.
35 | 
36 | `info` decides the intervals used for aggregation. All sequential values
37 | of `t` that have the same `info` value belong to the same interval.
38 | For each interval `x` is aggregated using `agg`.
39 | Typical values of `info` are `Dates.year, Dates.month, Dates.day`.
40 | You could define `summer(t) = month(t) ∈ (3,4,5,6,7,8)` and use `info = summer`
41 | to find the ranges that correspond to successive "summers" and "winters".
42 | 
43 | `agg` is the aggregating function, e.g., `mean, std`.
44 | 
45 | See also [`temporal_ranges`](@ref).
46 | """
47 | function temporal_aggregation(t::AbstractVector{<:TimeType}, x::Vector;
48 |         agg = mean, info = Dates.month
49 |     )
50 |     tranges = temporal_ranges(t, info)
51 |     y = [agg(view(x, r)) for r in tranges]
52 |     coarse_t = [middle_date(t[r[1]], t[r[end]]) for r in tranges]
53 |     # TODO: We can have a `prettify_coarse_t` function to make
54 |     # the time vector better in cases where it is possible,
55 |     # e.g. like t[1]:Month(1):t[end]
56 |     return coarse_t, y
57 | end
58 | 
59 | """
60 |     middle_date(t0, t1) → tm
61 | Given two dates `t0, t1`, return the date that is approximately their midpoint.
62 | """
63 | middle_date(t0, t1) = ((d0, d1) = DateTime.((t0, t1)); d0 + (d1 - d0)/2)
64 | 
65 | """
66 |     temporal_ranges(t::AbstractVector{<:TimeType}}, info = Dates.month)
67 | Return a vector of ranges so that each range of indices are consecutive values of `t` that
68 | belong in either the same month, year, day, or other, depending on `info`.
69 | See [`temporal_aggregation`](@ref) for more info on `info`.
70 | """
71 | function temporal_ranges(t::AbstractArray{<:TimeType}, info = Dates.month)
72 |     @assert issorted(t) "Sorted time required."
73 |     L = length(t)
74 |     r = Vector{UnitRange{Int}}()
75 |     i, x = 1, info(t[1]) # previous entries
76 |     for j in 2:L
77 |         y = info(t[j])
78 |         x == y && continue
79 |         push!(r, i:(j-1))
80 |         i, x = j, y
81 |     end
82 |     push!(r, i:L) # final range not included in for loop
83 |     return r
84 | end
85 | 
86 | 


--------------------------------------------------------------------------------
/block6_sciencereprod/block6_slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JuliaDynamics/GoodScientificCodeWorkshop/cf0daf5912f0bccb08b3c7f994ab24221e9c83fe/block6_sciencereprod/block6_slides.pdf


--------------------------------------------------------------------------------