├── book
    ├── .gitignore
    ├── data_viz
    │   ├── index.md
    │   └── plotting_data.nim
    ├── index.md
    ├── external_language_integration
    │   ├── nim_with_R.nim
    │   ├── index.md
    │   ├── julia
    │   │   ├── basics.nim
    │   │   ├── nimjl_conversions.nim
    │   │   └── nimjl_arrays.nim
    │   └── nim_with_py.nim
    ├── basics
    │   ├── index.md
    │   ├── basic_plotting.nim
    │   ├── units_basics.nim
    │   ├── common_datatypes.nim
    │   └── data_wrangling.nim
    ├── numerical_methods
    │   ├── index.md
    │   ├── curve_fitting.nim
    │   └── integration1d.nim
    └── overview
    │   └── index.md
├── .gitignore
├── nimib.toml
├── netlify.toml
├── .github
    └── workflows
    │   ├── pr_tasks.yml
    │   ├── valid.yml
    │   ├── docs.yml
    │   └── pr_preview.yml
├── scinim_getting_started.nimble
├── getting_started.nim
├── README.md
└── LICENSE


/book/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | bin/
2 | book/*.mustache
3 | getting_started
4 | .vscode/
5 | docs/
6 | book/book.json


--------------------------------------------------------------------------------
/book/data_viz/index.md:
--------------------------------------------------------------------------------
1 | # Data visualization
2 | 
3 | This section is dedicated to data visualization in Nim.
4 | 


--------------------------------------------------------------------------------
/book/index.md:
--------------------------------------------------------------------------------
1 | # SciNim Getting Started
2 | 
3 | This is the SciNim getting-started book.
4 | 
5 | It's a collection of examples and code snippet to help people use the scientific libraries of the Nim programming language.
6 | 


--------------------------------------------------------------------------------
/book/external_language_integration/nim_with_R.nim:
--------------------------------------------------------------------------------
 1 | import nimib, nimibook
 2 | 
 3 | nbInit(theme = useNimibook)
 4 | 
 5 | nbText: """
 6 | This section remains to be written. In the meantime, check-out [rnim](https://github.com/SciNim/rnim).
 7 | """
 8 | 
 9 | nbSave
10 | 


--------------------------------------------------------------------------------
/book/basics/index.md:
--------------------------------------------------------------------------------
1 | # Basics
2 | 
3 | This first chapter of getting-started is meant as an overview over the most fundamental aspects
4 | usually used in scientific computing in Nim.
5 | 
6 | This ranges from an introduction to the most used data types, using data frames and performing
7 | simple plots and more.
8 | 


--------------------------------------------------------------------------------
/nimib.toml:
--------------------------------------------------------------------------------
 1 | [nimib]
 2 | srcDir = "book"
 3 | homeDir = "docs"
 4 | 
 5 | [nimibook]
 6 | title = "SciNim Getting Started"
 7 | description = "Tutorials for libraries in SciNim ecosystems"
 8 | git_repository_url = "https://github.com/SciNim/getting-started"
 9 | plausible_analytics_url = "scinim.github.io/getting-started"
10 | 


--------------------------------------------------------------------------------
/book/external_language_integration/index.md:
--------------------------------------------------------------------------------
1 | This section is a series of tutorial of how to interface with various programming language.
2 | 
3 | C and C++ are not going to be covered here because C / C++ <-> Nim interop is already well documented in the manual and various tutorial.
4 | 
5 | Therefore, this tutorial will focus on how to use the "classical" programming language used in scientific computing :  Python, Julia and R.
6 | 


--------------------------------------------------------------------------------
/book/numerical_methods/index.md:
--------------------------------------------------------------------------------
 1 | # Numerical methods
 2 | 
 3 | This tutorial will cover a wider range of topics. If you have trouble following, make sure
 4 | you have read the "Introduction" section.
 5 | 
 6 | In this section we will consider different numerical methods to perform calculations like
 7 | integration, interpolation and curve fitting.
 8 | 
 9 | Feel free to skip any section if you feel like knowing the discussed topic.
10 | 


--------------------------------------------------------------------------------
/netlify.toml:
--------------------------------------------------------------------------------
 1 | [build]
 2 |   # Directory that contains the deploy-ready HTML files and assets generated by
 3 |   # the build. This is relative to the base directory if one has been set, or the
 4 |   # root directory if a base has not been set.
 5 |   publish = "docs/"
 6 | 
 7 |   # The build command
 8 |   # It first downloads choosenim and installs it. Then adds nimble/bin to path and then installs the nimble dependencies. Finally it builds the book.
 9 |   command = "curl https://nim-lang.org/choosenim/init.sh -sSf -o ./init.sh && sh ./init.sh -y && export PATH=/opt/buildhome/.nimble/bin:$PATH && nimble install -d -y && nim r -d:release getting_started.nim build"
10 | 


--------------------------------------------------------------------------------
/.github/workflows/pr_tasks.yml:
--------------------------------------------------------------------------------
 1 | name: PR Tasks
 2 | 
 3 | on:
 4 |   pull_request_target:
 5 |     branches:
 6 |       - main
 7 | 
 8 | # This is executed in an unsafe environment so no user-code should be executed here!
 9 | 
10 | jobs:
11 |   tests:
12 |     runs-on: ubuntu-latest
13 |     steps:
14 |       - uses: actions/checkout@v2
15 |       - run: echo Commit hash = ${{ github.event.pull_request.head.sha }} 
16 |       - name: Show pending status check
17 |         uses: Sibz/github-status-action@v1.1.6
18 |         with:
19 |           authToken: ${{ secrets.GITHUB_TOKEN }}
20 |           context: Netlify preview
21 |           sha: ${{ github.event.pull_request.head.sha }}
22 |           description: Waiting for build to finish...
23 |           state: pending
24 | 


--------------------------------------------------------------------------------
/.github/workflows/valid.yml:
--------------------------------------------------------------------------------
 1 | name: valid
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     branches:
 6 |       - main
 7 | 
 8 | jobs:
 9 |   tests:
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |       - uses: actions/checkout@v2
13 |       - run: pip install scipy numpy
14 |       - name: "install_nim"
15 |         id: install_nim
16 |         uses: iffy/install-nim@v4.1.1
17 |       - run: nimble install -y
18 |       - name: GenBook
19 |         run: nimble genbook
20 |       - run: echo Commit hash = ${{ github.event.pull_request.head.sha }}
21 |       - uses: actions/upload-artifact@v2
22 |         with:
23 |           name: build-${{ github.event.pull_request.head.sha }}
24 |           path: docs/
25 |           retention-days: 1
26 |           if-no-files-found: error
27 | 


--------------------------------------------------------------------------------
/.github/workflows/docs.yml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   push:
 3 |     branches:
 4 |       - main
 5 | 
 6 | jobs:
 7 |   gh-docs:
 8 |     runs-on: ubuntu-latest
 9 |     steps:
10 |       - uses: actions/checkout@v2
11 |       - run: pip install scipy numpy
12 |       - name: "install_nim"
13 |         id: install_nim
14 |         uses: iffy/install-nim@v3
15 |       - name: install dependencies
16 |         run: nimble install -y
17 |       - name: Compile Book
18 |         run: nim c getting_started.nim
19 |       - name: Init Book
20 |         run: ./getting_started init
21 |       - name: Build Book
22 |         run: ./getting_started build
23 |       - name: Deploy
24 |         uses: peaceiris/actions-gh-pages@v3
25 |         with:
26 |           github_token: ${{ secrets.GITHUB_TOKEN }}
27 |           publish_dir: docs
28 | 


--------------------------------------------------------------------------------
/scinim_getting_started.nimble:
--------------------------------------------------------------------------------
 1 | # Package
 2 | version       = "0.1.0"
 3 | author        = "SciNim contributors"
 4 | description   = "SciNim getting started examples"
 5 | license       = "MIT"
 6 | skipDirs      = @["book"]
 7 | bin           = @["getting_started"]
 8 | binDir        = "bin"
 9 | 
10 | # Dependencies
11 | requires "nim >= 1.2.0"
12 | requires "nimib#head"
13 | requires "nimibook#head"
14 | requires "ggplotnim >= 0.5.1"
15 | requires "datamancer >= 0.2.1"
16 | requires "mpfit"
17 | requires "numericalnim >= 0.7.1"
18 | requires "unchained >= 0.1.9"
19 | requires "benchy"
20 | requires "scinim >= 0.2.2"
21 | requires "nimpy >= 0.2.0"
22 | requires "nimjl >= 0.6.3"
23 | 
24 | 
25 | task genbook, "build book":
26 |   exec("nimble build -d:release")
27 |   exec("./bin/getting_started init")
28 |   exec("./bin/getting_started build")
29 | 


--------------------------------------------------------------------------------
/book/numerical_methods/curve_fitting.nim:
--------------------------------------------------------------------------------
 1 | import nimib, nimibook
 2 | import mpfit, ggplotnim
 3 | 
 4 | nbInit(theme = useNimibook)
 5 | 
 6 | nbText: """
 7 | # Curve fitting using [mpfit](https://github.com/Vindaar/nim-mpfit)
 8 | 
 9 | This section will cover a curve fitting example. It assumes you are familiar with the
10 | data type and plotting introductions from the "Introduction" section. Here we will
11 | combine this knowledge to perform a simple curve fit at the end.
12 | 
13 | With our acquired knowledge, we will now:
14 | - read some data from a CSV file into a data frame
15 | - perform a curve fit on the data using some functio
16 | - compute the fit result as a line
17 | - draw a plot containing: input data + error bars, fit, fit results
18 | 
19 | ## read csv
20 | 
21 | ## fit function
22 | 
23 | ## compute fit result
24 | 
25 | ## plot data + fit results
26 | 
27 | yes.
28 | 
29 | """
30 | 
31 | nbText: """
32 | """
33 | 
34 | nbSave
35 | 


--------------------------------------------------------------------------------
/getting_started.nim:
--------------------------------------------------------------------------------
 1 | import nimibook
 2 | 
 3 | var book = initBookWithToc:
 4 |   entry("Introduction", "index.md")
 5 |   entry("Ecosystem overview", "overview/index.md")
 6 |   section("Basic topics", "basics/index.md"):
 7 |     entry("Common datatypes", "common_datatypes")
 8 |     entry("Data wrangling with dataframes", "data_wrangling")
 9 |     entry("Plotting", "basic_plotting")
10 |     entry("Units", "units_basics")
11 |   section("Numerical methods", "numerical_methods/index.md"):
12 |     entry("Curve fitting", "curve_fitting")
13 |     entry("Integration (1D)", "integration1d")
14 |   section("Data visualization", "data_viz/index.md"):
15 |     entry("Plotting data", "plotting_data")
16 |   section("Interfacing with other language", "external_language_integration/index.md"):
17 |     entry("Nimpy - The Nim Python bridge", "nim_with_py")
18 |     section("Nimjl - The Nim Julia bridge", "julia/basics"):
19 |       entry("Advanced types", "nimjl_conversions")
20 |       entry("Julia Arrays from Nim", "nimjl_arrays")
21 |     entry("Interfacing with R", "nim_with_R")
22 | 
23 | nimibookCli(book)
24 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ![workflow](https://github.com/SciNim/getting-started/actions/workflows/docs.yml/badge.svg)
 2 | 
 3 | # Tutorials on using packages in SciNim
 4 | Use ``nimble genbook`` to generate the tutorials.
 5 | 
 6 | ## Contribution
 7 | We glady accept relevant submissions in the form of ideas in Issue or written articles in PRs. 
 8 | If you have any questions, reach out to us on the Nim Science channel on Discord/Matrix/IRC.
 9 | ### PR Previews
10 | When you push a PR the CI will build a preview build which can be accessed through the `Checks` and then clicking on the `Details` link on the `Netlify Preview` item.  
11 | 
12 | ## Analytics
13 | 
14 | This website is tracking analytics with [plausible.io](plausible.io), a lightweight and open-source website analytics tool with no cookies and fully compliant with GDPR, CCPA and PECR. Analytics for this website are publicly available: <https://plausible.io/scinim.github.io%2Fgetting-started>. You can opt out from analytics tracking [with standard ad-blocking](https://plausible.io/docs/excluding) or typing [`localStorage.plausible_ignore=true`](https://plausible.io/docs/excluding-localstorage) in browser console. 
15 | 


--------------------------------------------------------------------------------
/book/basics/basic_plotting.nim:
--------------------------------------------------------------------------------
 1 | import nimib, nimibook
 2 | import ggplotnim
 3 | 
 4 | nbInit(theme = useNimibook)
 5 | 
 6 | nbText: """
 7 | # Simple plotting using [ggplotnim](https://github.com/Vindaar/ggplotnim)
 8 | 
 9 | ## Line plot
10 | """
11 | 
12 | nbCode:
13 |   import ggplotnim
14 |   let x1 = @[0.0, 1.0, 2.0, 3.0]
15 |   let y1 = @[0.0, 1.0, 4.0, 9.0]
16 |   let df1 = toDf(x1, y1)
17 |   ggplot(df1, aes("x1", "y1")) +
18 |     geom_line() +
19 |     ggsave("images/line_plot.png")
20 | 
21 | nbImage("images/line_plot.png")
22 | 
23 | nbText: """
24 | Every plot in `ggplotnim` is created from an input `DataFrame` using the `ggplot` procedure. The `aes`
25 | argument describes the so called "aesthetics", which are simply the columns of the `DataFrame` to
26 | visualize. From here a plot is built up "by layers".
27 | 
28 | Note: the `theme_opaque` is only used here to give the output PNG a white background so it's more
29 | visible in this book when using dark themes.
30 | """
31 | 
32 | nbText: """## Scatter plot"""
33 | nbCode:
34 |   import ggplotnim
35 |   let x2 = @[0.0, 1.0, 2.0, 3.0]
36 |   let y2 = @[0.0, 1.0, 4.0, 9.0]
37 |   let df2 = toDf(x2, y2)
38 |   ggplot(df2, aes("x2", "y2")) +
39 |     geom_point() +
40 |     ggsave("images/scatter_plot.png")
41 | 
42 | nbImage("images/scatter_plot.png")
43 | 
44 | nbText: """
45 | Notice how similar the two code snippets are. This is the general structure of `ggplotnim`, you add geoms and combine them in different ways.
46 | Try for yourself what happens if you have *both* `geom_line` and `geom_point`.
47 | """
48 | 
49 | nbSave
50 | 


--------------------------------------------------------------------------------
/.github/workflows/pr_preview.yml:
--------------------------------------------------------------------------------
 1 | 
 2 | name: PR Preview
 3 | 
 4 | on:
 5 |   workflow_run:
 6 |     workflows:
 7 |       - valid
 8 |     types:
 9 |       - completed
10 | 
11 | jobs:
12 |   deploy:
13 |     name: Deploy Preview
14 |     runs-on: ubuntu-latest
15 |     steps:
16 |       - uses: actions/checkout@v2
17 |       
18 |       - uses: potiuk/get-workflow-origin@v1_1
19 |         id: source-run-info
20 |         with:
21 |           token: ${{ secrets.GITHUB_TOKEN }}
22 |           sourceRunId: ${{ github.event.workflow_run.id }}
23 |       - run: echo sourceHeadSha = ${{ steps.source-run-info.outputs.sourceHeadSha }}
24 |       
25 |       - name: Set env
26 |         run: echo "GITHUB_SHA_SHORT=$(echo ${{ steps.source-run-info.outputs.sourceHeadSha }} | cut -c 1-20)" >> $GITHUB_ENV
27 |       - run: echo Short hash = ${{ env.GITHUB_SHA_SHORT }}
28 |         
29 |       - name: Show pending status check
30 |         uses: Sibz/github-status-action@v1.1.6
31 |         with:
32 |           authToken: ${{ secrets.GITHUB_TOKEN }}
33 |           context: Netlify preview
34 |           sha: ${{ steps.source-run-info.outputs.sourceHeadSha }}
35 |           description: Deploying site to Netlify. Please wait...
36 |           state: pending
37 |       
38 |       - name: 'Download artifact'
39 |         uses: actions/github-script@v3.1.0
40 |         with:
41 |           script: |
42 |             var artifacts = await github.actions.listWorkflowRunArtifacts({
43 |                owner: context.repo.owner,
44 |                repo: context.repo.repo,
45 |                run_id: ${{github.event.workflow_run.id }},
46 |             });
47 |             console.log("Target artifact: " + "build-${{ steps.source-run-info.outputs.sourceHeadSha }}")
48 |             var matchArtifact = artifacts.data.artifacts.filter((artifact) => {
49 |               console.log("Found artifacts: " + artifact.name)
50 |               return artifact.name == "build-${{ steps.source-run-info.outputs.sourceHeadSha }}"
51 |             })[0];
52 |             if (matchArtifact == undefined) {
53 |               core.setFailed('Artifact not found!');
54 |             }
55 |             var download = await github.actions.downloadArtifact({
56 |                owner: context.repo.owner,
57 |                repo: context.repo.repo,
58 |                artifact_id: matchArtifact.id,
59 |                archive_format: 'zip',
60 |             });
61 |             var fs = require('fs');
62 |             fs.writeFileSync('${{github.workspace}}/docs.zip', Buffer.from(download.data));
63 |       - run: rm -rf docs
64 |       - run: unzip -d docs/ docs.zip
65 | 
66 |       - run: echo Deploy Alias = ${{ env.GITHUB_SHA_SHORT }}
67 |       - uses: jsmrcaga/action-netlify-deploy@master
68 |         with:
69 |           NETLIFY_AUTH_TOKEN: ${{ secrets.NETLIFY_AUTH_TOKEN }}
70 |           NETLIFY_SITE_ID: ${{ secrets.NETLIFY_SITE_ID }}
71 |           deploy_alias: ${{ env.GITHUB_SHA_SHORT }}
72 |           build_directory: docs/
73 |           install_command: ls
74 |           build_command: ls
75 |       
76 |       #- uses: geekyeggo/delete-artifact@v1
77 |       #  with:
78 |       #    name: build-${{ github.event.workflow_run.head_branch }}
79 |             
80 |       # Creates a status check with link to preview
81 |       - name: Status check
82 |         uses: Sibz/github-status-action@v1.1.6
83 |         with:
84 |           authToken: ${{ secrets.GITHUB_TOKEN }}
85 |           context: Netlify preview
86 |           description: Click link to preview ⇒
87 |           sha: ${{ steps.source-run-info.outputs.sourceHeadSha }}
88 |           state: success
89 |           target_url: https://${{ env.GITHUB_SHA_SHORT }}--scinim-getting-started.netlify.app
90 | 


--------------------------------------------------------------------------------
/book/external_language_integration/julia/basics.nim:
--------------------------------------------------------------------------------
  1 | import nimib, nimibook
  2 | 
  3 | nbInit(theme = useNimibook)
  4 | 
  5 | nbText: """
  6 | # Using Julia with Nim
  7 | 
  8 | In this tutorial, we explore how to use [Nimjl](https://github.com/Clokk/nimjl) to integrate [Julia](https://julialang.org/) code with Nim.
  9 | 
 10 | ## What is Julia ?
 11 | 
 12 | Julia is a dynamically typed scripting language designed for high performance; it compiles to efficient native code through LLVM.
 13 | 
 14 | Most notably, it has a strong emphasis on scientific computing and Julia's Array types are one of the fastest multi-dimensional arrays - or Tensor-like - data structures out there.
 15 | 
 16 | ## Why use Julia inside Nim ?
 17 | 
 18 | * Extending Nim ecosystem with Julia Scientific package
 19 | * As an efficient scripting language in a compiled application.
 20 | 
 21 | # Tutorial
 22 | 
 23 | [Nimjl](https://github.com/Clokk/nimjl) already has some [examples](https://github.com/Clonkk/nimjl/examples/) that explains the basics, make sure to go through them in order.
 24 | 
 25 | ## Basic stuff
 26 | 
 27 | Nimjl is mostly a Nim wrapper around the C API of Julia; and then some syntax sugar around it to make it easier to use. That means that inherently, Nimjl is limited to the capabilities of the C-API of Julia.
 28 | 
 29 | Now let's see what the code looks like :
 30 | """
 31 | nbCode:
 32 |   import nimjl
 33 |   Julia.init() # Must be done once in the lifetime of your program
 34 | 
 35 |   discard Julia.println("Hello world !") # Invoke the println function from Julia. This function return a nil JlValue
 36 | 
 37 |   Julia.exit() # -> This call is optionnal since it's called at the end of the process but making the exit explicit makes code more readable.
 38 |   # All successive Julia calls after the exit will probably segfault
 39 | 
 40 | nbText: """The ``Julia.init()`` calls initialize the Julia VM. No call before the init will work.
 41 | 
 42 | The ``Julia.exit()`` calls is optional since it's added as an exit procs (see [std/exitprocs](https://nim-lang.org/docs/exitprocs.html) )
 43 | 
 44 | Internally, this is rewritten to :
 45 | """
 46 | 
 47 | nbCode:
 48 |   discard jlCall("println", "Hello world !")
 49 | 
 50 | nbText: """Both codes are identical; like mentioned above the ``Julia.`` is syntactic sugar for calling Julia functions; it always returns a JlValue (that can be nil if the Julia function does not return anything).
 51 | 
 52 | The equivalent C code would be :
 53 | ```c
 54 |     jl_function_t *func = jl_get_function(jl_base_module, "println");
 55 |     jl_value_t *argument = jl_eval_string("Hello world !");
 56 |     jl_call1(func, argument);
 57 | ```
 58 | 
 59 | """
 60 | 
 61 | nbText:"""As mentioned, Julia is **dynamically typed**, which means that from Nim's point of view, every Julia object is a pointer of the C struct ``jl_value_t`` - mapped in Nim to ``JlValue``.
 62 | 
 63 | ### Converting Nim type to Julia value
 64 | 
 65 | Most Nim values can be converted to JlValue through the function ``toJlVal`` or its alias ``toJlValue`` (I always got the two name confused so I ended up defining both...).
 66 | 
 67 | When passing a Nim value as a Julia argument through ``jlCall`` or ``Julia.myfunction``, Nim will automatically convert the argument by calling ``toJlVal``.
 68 | 
 69 | Let's see in practice what it means:
 70 | """
 71 | 
 72 | nbCode:
 73 |   import nimjl
 74 |   Julia.init()
 75 |   var res = Julia.sqrt(255.0)
 76 |   echo res
 77 | 
 78 |   echo typeof(res)
 79 |   echo jltypeof(res)
 80 | 
 81 | nbText: """
 82 | **This operation will perform a copy** (almost always).
 83 | 
 84 | For reference, the equivalent C code would be :
 85 | ```c
 86 | jl_function_t *func = jl_get_function(jl_base_module, "sqrt");
 87 | jl_value_t *argument = jl_box_float64(255.0);
 88 | jl_value_t *ret = jl_call1(func, argument);
 89 | double cret = jl_unbox_float64(ret);
 90 | printf("cret=%f \n", cret);
 91 | ```
 92 | 
 93 | ### Converting from Julia to Nim
 94 | 
 95 | In the previous example we calculated the square root of 255.0, stored in a JlValue. But using JlValue in Nim is hardly practical, so let's how to convert it back to a float:
 96 | 
 97 | """
 98 | 
 99 | nbCode:
100 |   var nimRes = res.to(float64)
101 |   echo nimRes
102 |   echo typeof(nimRes)
103 |   import std/math
104 |   # Check the result
105 |   assert nimRes == sqrt(255.0)
106 | 
107 | 
108 | nbText:"""
109 | For convenience:
110 | * ``proc jltypeof(x: JlVal) : string`` will invoke the Julia function ``typeof`` and convert the result to a string.
111 | * ``proc `$`(x: JlVal) : string`` will call the Julia function ``string`` and convert the result to a string - this allow us to call ``echo`` with JlValue and obtain the same output as Julia's ``println``.
112 | 
113 | Keep these procs in mind as they will often be used in the following examples.
114 | """
115 | 
116 | nbSave
117 | 


--------------------------------------------------------------------------------
/book/external_language_integration/julia/nimjl_conversions.nim:
--------------------------------------------------------------------------------
  1 | import nimib, nimibook
  2 | import std/os
  3 | 
  4 | nbInit(theme = useNimibook)
  5 | 
  6 | nbText: """Previously, we've seen how the basics of Nimjl works; now let's explore how to work with non-trivial types when calling functions.
  7 | 
  8 | ## Julia Dict() and Nim Table[U, V]
  9 | 
 10 | Julia Dict() type will be mapped to ``Table`` in Nim and vice-versa. A copy is performed.
 11 | 
 12 | """
 13 | 
 14 | nbCode:
 15 |   import nimjl
 16 |   Julia.init()
 17 | 
 18 | nbCode:
 19 |   var nimTable: Table[int64, float64] = {1'i64: 0.90'f64, 2'i64: 0.80'f64, 3'i64: 0.70'f64}.toTable
 20 |   block:
 21 |     var r = Julia.`pop!`(nimTable, 1)
 22 |     echo r
 23 |     echo nimTable # The initial object has not been modified because a copy is performed when passing Nim type to Julia. call
 24 | 
 25 | nbText: """As you can see, the Nim object here has not been modified despite a value being pop'ed.
 26 | 
 27 | So if you want to handle the value, it is best to first convert the Table then modify it.
 28 | 
 29 | """
 30 | 
 31 | nbCode:
 32 |   var jlDict = toJlVal(nimTable)
 33 |   block:
 34 |     var r = Julia.`pop!`(jlDict, 1)
 35 |     echo r
 36 |     echo jlDict
 37 | 
 38 | nbText: """The key ``1`` has effectively been removed.
 39 | 
 40 | Note, that you can also use ``[]`` and ``[]=`` operator on Dict().
 41 | """
 42 | 
 43 | nbCode:
 44 |   block:
 45 |     var r = jlDict[2]
 46 |     echo r
 47 |     jlDict[2] = -1.0
 48 |     echo jlDict
 49 | 
 50 | nbText:"""## Tuples
 51 | 
 52 | Julia named tuples will be mapped to Nim named tuple. Note that since Nim tuples type are CT defined while Julia tuples can be made at run-time, using Tuple is not always trivial.
 53 | The key is to know beforehand the fields of the Tuple in order to easily use it from Nim. A copy is always performed.
 54 | 
 55 | ## Object
 56 | 
 57 | For object, the conversion proc is done by iterating over the object's fields and calling the conversion proc.
 58 | 
 59 | In order for the conversion to be possible, it is necessary that the type is declared in both Nim and Julia (as a mutable struct) and that the empty constructor is defined in Julia.
 60 | 
 61 | If the type is not known to Julia, the Nim object will be mapped to a NamedTuple (losing the mutability).
 62 | 
 63 | Let's how it works in practice. First we will have to create a local module and include it.
 64 | 
 65 | """
 66 | 
 67 | nbFile("mymod.jl"):"""
 68 | module localexample
 69 |   mutable struct Foo
 70 |     x::Int
 71 |     y::Float64
 72 |     z::String
 73 |     # Nim initialize the Julia variable with empty constructor by default
 74 |     Foo() = new()
 75 |     Foo(x, y, z) = new(x, y, z)
 76 |   end
 77 |   function applyToFoo(foo::Foo)
 78 |     foo.x += 1
 79 |     foo.y *= 2/3
 80 |     foo.z *= " General Kenobi !"
 81 |   end
 82 |   export Foo
 83 |   export applyToFoo
 84 | end
 85 | """
 86 | 
 87 | nbText: """Now that we have our local Julia module, let's include it and convert object to Nim.
 88 | """
 89 | 
 90 | nbCode:
 91 |   # Create the same Foo type as the one defined in Julia
 92 |   type
 93 |     Foo = object
 94 |       x: int
 95 |       y: float
 96 |       z: string
 97 | 
 98 |   # Include the file
 99 |   jlInclude(getCurrentDir() / "mymod.jl")
100 |   # This is equivalent to Julia `using ...`
101 |   jlUseModule(".localexample")
102 | 
103 | nbText: """Now let's see how conversion works for object:
104 | 
105 | """
106 | 
107 | nbCode:
108 |     var foo = Foo(x: 144, y: 12.0, z: "123")
109 |     var jlfoo = toJlVal(foo)
110 |     echo jlfoo
111 |     echo typeof(jlfoo) # From Nim's point of view it's still a JlValue
112 |     echo jltypeof(jlfoo) # From Julia's point of view, it's a Foo object.
113 | 
114 | nbText: """The object gets converted to the mutable struct type "Foo" in Julia.
115 | 
116 | Despite being a JlValue for Nim, you can still access and modify its field using `.` - just as you would a Nim object.
117 | 
118 | Internally, this will call Julia's metaprogramming function getproperty / setproperty!.
119 | 
120 | Let's see :
121 | """
122 | 
123 | nbCode:
124 |   echo jlfoo.x
125 |   echo typeof(jlfoo.x)
126 |   echo jltypeof(jlfoo.x)
127 | 
128 |   jlfoo.x = 20
129 |   jlfoo.y = -11.0
130 |   jlfoo.z = "Hello there !"
131 |   echo jlfoo
132 | 
133 | nbText: """
134 | 
135 | 
136 | And like all JlValue, it can be used as a function argument. For example, let's call the function ``applyToFoo`` we previously defined in Julia.
137 | 
138 | This function adds 1 to the x field; multiply the y field by 2/3; append the string " General Kenobi !" to the z field.
139 | """
140 | 
141 | nbCode:
142 |   discard Julia.applyToFoo(jlfoo)
143 |   echo jlfoo
144 | 
145 | nbText: """And there we have it. ``jlfoo`` has been modified.
146 | 
147 | Finally, let's convert back the Julia object to a Nim "Foo" object :
148 | 
149 | """
150 | 
151 | nbCode:
152 |   var foo2 = jlfoo.to(Foo)
153 |   echo foo2
154 |   echo typeof(foo2)
155 | 
156 | nbText: """foo2 is now back in Nim land with the previously modified value.
157 | 
158 | Of course, this dummy examples doesn't do much but it demonstrate the type of workflow you can setup between Nim and Julia.
159 | 
160 | Next, let's talk about arrays
161 | """
162 | 
163 | removeFile(getCurrentDir() / "mymod.jl")
164 | 
165 | nbSave
166 | 


--------------------------------------------------------------------------------
/book/external_language_integration/nim_with_py.nim:
--------------------------------------------------------------------------------
  1 | import nimib, nimibook
  2 | import std/os
  3 | 
  4 | nbInit(theme = useNimibook)
  5 | 
  6 | nbText: """
  7 | # Using Python with Nim
  8 | 
  9 | In this tutorial, we explore how to use [Nimpy](https://github.com/yglukhov/nimpy) to integrate Python code with Nim.
 10 | 
 11 | There are 2 potential motivations for using Python:
 12 | * Extending the Nim Scientific computing ecosystem; mostly with Scipy / Numpy.
 13 | * Having a scripting language inside a compiled application
 14 | 
 15 | There is a third use case : it's implementing Python module in Nim (for example, to speed up Python). While potentially useful, we will not cover it in this tutorial but you can read go check-out
 16 | [nimporter](https://github.com/Pebaz/Nimporter).
 17 | 
 18 | ## Using Python as a scripting language in Nim
 19 | """
 20 | 
 21 | nbCode:
 22 |   import nimpy
 23 |   let py = pyBuiltinsModule()
 24 |   discard py.print("Hello world from Python..")
 25 | 
 26 | nbText: """
 27 |   That's basically all there is to it.
 28 |   If you don't want to use the dot operator (which can get confusing), it is also possible use ``callMethod`` directly
 29 | """
 30 | 
 31 | nbCode:
 32 |   discard callMethod(py, "print", "This is effectively identical to the previous call")
 33 | 
 34 | nbText: """
 35 |   Most type conversion Nim -> Python will be done automatically though Nimpy templates. Python -> Nim type conversion has to be called manually with the ``to()`` API.
 36 | 
 37 |   Let's see how it works in practice. In order to do that, we are going to create a local Python file with our custom functions, import it in Nim and call the Python function from Nim and convert the result back to Nim types.
 38 | 
 39 |   The next portion will create said Python file using Nim code. If you're looking to reproduce this tutorial  at home, you can (and probably should) do it using your favorite text editor.
 40 | """
 41 | 
 42 | nbFile("mymod.py"):"""
 43 | def myfunc(inputArg):
 44 |     outputArg = {}
 45 |     outputArg["argFloat"] = inputArg["argFloat"] / 2
 46 |     outputArg["argStr"] = inputArg["argStr"][::-1]
 47 |     sortedList = sorted(inputArg["argSeq"])
 48 |     outputArg["argSeq"] = sortedList
 49 |     return outputArg
 50 | """
 51 | 
 52 | nbText: """
 53 |   Now, onto the good parts :
 54 | """
 55 | 
 56 | nbCode:
 57 |   type
 58 |     MyObj* = object
 59 |       argFloat: float
 60 |       argStr: string
 61 |       argSeq: seq[int]
 62 | 
 63 |   let
 64 |     nimSeq: seq[int] = @[6, 3, 4, 2, 7, 1, 8, 5]
 65 |     nimTup = MyObj(argFloat: 36.66, argStr: "I'm a string", argSeq: nimSeq)
 66 | 
 67 |   # Let's import our Python file
 68 |   # First, add the location of the Pythong to sys.path, as you would do in Python
 69 |   let sys = pyImport("sys")
 70 |   discard sys.path.append(getCurrentDir())
 71 |   # Second, import your file
 72 |   let mymod = pyImport("mymod")
 73 | 
 74 |   let retValue = mymod.myfunc(nimTup)
 75 |   echo typeof(retValue)
 76 |   # We can still use retValue as an argument for Python function
 77 |   discard py.print(retValue)
 78 | 
 79 | nbText: """
 80 |   As you can see, by default every Python function called will return a PyObject. To convert this PyObject into a useful Nim type simply do :
 81 | """
 82 | 
 83 | nbCode:
 84 |   let nimValue = retValue.to(MyObj)
 85 |   echo typeof(nimValue)
 86 |   echo nimValue
 87 | 
 88 | nbText: """
 89 |   Note that this example works with an object, but most Nim data structure are convertible to PyObject through Nimpy, including (but not limited to) : `Table, JsonNode, Set, OpenArray, Enum, Tuple` etc..
 90 | 
 91 | ## Extending Nim through Scipy & Numpy
 92 | 
 93 |   Now that we know how to use Python through Nim, let's see how we can use Nimpy / Scipy scientific functions through Nim.
 94 | 
 95 |   The main difficulty is to work with the numpy ndarray type in Nim.
 96 | 
 97 |   In order to do that, we'll use the [scinim/numpyarrays API](https://github.com/SciNim/scinim/blob/main/scinim/numpyarrays.nim).
 98 |   By default, the conversion is done from/to Arraymancer Tensor; but the API covers ``ptr UncheckedArray[T]`` so it can be extended to any type with an underlying data buffer.
 99 | 
100 | """
101 | 
102 | nbCode:
103 |   import arraymancer
104 |   import scinim/numpyarrays
105 | 
106 |   let np = pyImport("numpy")
107 |   # Create a Tensor
108 |   var mytensor = @[
109 |     @[1.0, 2.0, 3.0],
110 |     @[4.0, 5.0, 6.0],
111 |     @[7.0, 8.0, 9.0],
112 |   ].toTensor
113 | 
114 |   # As you can see, Tensor are converted automatically to np.ndarray
115 |   discard py.print(mytensor)
116 |   var myarray = toNdArray(mytensor)
117 |   echo myarray.dtype()
118 |   echo myarray.shape
119 | 
120 | nbText: """
121 |   Now let's do a simple 1d interpolation using Scipy.
122 | 
123 |   For simplicity, let's use a simple, straightforward function : $$f(x) = 10*x$$ and do a linear 1D interpolation. This makes the result easy to verify.
124 | """
125 | 
126 | nbCode:
127 |   let interp = pyImport("scipy.interpolate")
128 |   var
129 |     mypoints = @[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, ].toTensor
130 |     myvalues = @[10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, ].toTensor
131 |     x_coord = toNdArray(mypoints)
132 |     y_coord = toNdArray(myvalues)
133 | 
134 |   var f_interp = interp.interp1d(x_coord, y_coord, "linear")
135 |   discard py.print(f_interp)
136 | 
137 | nbText: """
138 |   The result of interp1d is an interpolator function. In Nim, it's necessary to call it explicitly using ``callObject`` proc.
139 | """
140 | 
141 | nbCode:
142 |   # The result of interp2d is a function object that can be called through __call__
143 |   var val_at_new_point = callObject(f_interp, 1.5).to(float)
144 |   # Yay, we just did a BiCubic interpolation !
145 |   echo val_at_new_point
146 | 
147 | nbText: """
148 |   As expected, the result of the linear 1D interpolation evaluated on the coordinate 1.5 is 15.
149 | 
150 |   Now let's do it on an Array :
151 | """
152 | 
153 | nbCode:
154 |   var new_points_coord = @[2.5, 3.5, 4.5, 5.5]
155 |   var new_values = callObject(f_interp, new_points_coord).toTensor[:float]()
156 |   echo new_values
157 | 
158 | nbText: """
159 |   The result of the linear 1D interpolation on `[2.5, 3.5, 4.5, 5.5]` is `[25, 35, 45, 55]`, as we expected !
160 | 
161 | 
162 |   (if you executed the code snippet as-is, don't forget to remove the generated Python file ``mymod.py``).
163 | """
164 | 
165 | removeFile(getCurrentDir() / "mymod.py")
166 | 
167 | nbText: """
168 |   And that's it, for this tutorial !
169 | 
170 |   While simple, the approach presented here allows to re-use most (if not all) of the Scipy / Numpy API relying on ndarray and convert them to Arraymancer Tensor, a format easily used in Nim.
171 | """
172 | 
173 | nbSave
174 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | Creative Commons Legal Code
  2 | 
  3 | CC0 1.0 Universal
  4 | 
  5 |     CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE
  6 |     LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN
  7 |     ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS
  8 |     INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES
  9 |     REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS
 10 |     PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM
 11 |     THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED
 12 |     HEREUNDER.
 13 | 
 14 | Statement of Purpose
 15 | 
 16 | The laws of most jurisdictions throughout the world automatically confer
 17 | exclusive Copyright and Related Rights (defined below) upon the creator
 18 | and subsequent owner(s) (each and all, an "owner") of an original work of
 19 | authorship and/or a database (each, a "Work").
 20 | 
 21 | Certain owners wish to permanently relinquish those rights to a Work for
 22 | the purpose of contributing to a commons of creative, cultural and
 23 | scientific works ("Commons") that the public can reliably and without fear
 24 | of later claims of infringement build upon, modify, incorporate in other
 25 | works, reuse and redistribute as freely as possible in any form whatsoever
 26 | and for any purposes, including without limitation commercial purposes.
 27 | These owners may contribute to the Commons to promote the ideal of a free
 28 | culture and the further production of creative, cultural and scientific
 29 | works, or to gain reputation or greater distribution for their Work in
 30 | part through the use and efforts of others.
 31 | 
 32 | For these and/or other purposes and motivations, and without any
 33 | expectation of additional consideration or compensation, the person
 34 | associating CC0 with a Work (the "Affirmer"), to the extent that he or she
 35 | is an owner of Copyright and Related Rights in the Work, voluntarily
 36 | elects to apply CC0 to the Work and publicly distribute the Work under its
 37 | terms, with knowledge of his or her Copyright and Related Rights in the
 38 | Work and the meaning and intended legal effect of CC0 on those rights.
 39 | 
 40 | 1. Copyright and Related Rights. A Work made available under CC0 may be
 41 | protected by copyright and related or neighboring rights ("Copyright and
 42 | Related Rights"). Copyright and Related Rights include, but are not
 43 | limited to, the following:
 44 | 
 45 |   i. the right to reproduce, adapt, distribute, perform, display,
 46 |      communicate, and translate a Work;
 47 |  ii. moral rights retained by the original author(s) and/or performer(s);
 48 | iii. publicity and privacy rights pertaining to a person's image or
 49 |      likeness depicted in a Work;
 50 |  iv. rights protecting against unfair competition in regards to a Work,
 51 |      subject to the limitations in paragraph 4(a), below;
 52 |   v. rights protecting the extraction, dissemination, use and reuse of data
 53 |      in a Work;
 54 |  vi. database rights (such as those arising under Directive 96/9/EC of the
 55 |      European Parliament and of the Council of 11 March 1996 on the legal
 56 |      protection of databases, and under any national implementation
 57 |      thereof, including any amended or successor version of such
 58 |      directive); and
 59 | vii. other similar, equivalent or corresponding rights throughout the
 60 |      world based on applicable law or treaty, and any national
 61 |      implementations thereof.
 62 | 
 63 | 2. Waiver. To the greatest extent permitted by, but not in contravention
 64 | of, applicable law, Affirmer hereby overtly, fully, permanently,
 65 | irrevocably and unconditionally waives, abandons, and surrenders all of
 66 | Affirmer's Copyright and Related Rights and associated claims and causes
 67 | of action, whether now known or unknown (including existing as well as
 68 | future claims and causes of action), in the Work (i) in all territories
 69 | worldwide, (ii) for the maximum duration provided by applicable law or
 70 | treaty (including future time extensions), (iii) in any current or future
 71 | medium and for any number of copies, and (iv) for any purpose whatsoever,
 72 | including without limitation commercial, advertising or promotional
 73 | purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each
 74 | member of the public at large and to the detriment of Affirmer's heirs and
 75 | successors, fully intending that such Waiver shall not be subject to
 76 | revocation, rescission, cancellation, termination, or any other legal or
 77 | equitable action to disrupt the quiet enjoyment of the Work by the public
 78 | as contemplated by Affirmer's express Statement of Purpose.
 79 | 
 80 | 3. Public License Fallback. Should any part of the Waiver for any reason
 81 | be judged legally invalid or ineffective under applicable law, then the
 82 | Waiver shall be preserved to the maximum extent permitted taking into
 83 | account Affirmer's express Statement of Purpose. In addition, to the
 84 | extent the Waiver is so judged Affirmer hereby grants to each affected
 85 | person a royalty-free, non transferable, non sublicensable, non exclusive,
 86 | irrevocable and unconditional license to exercise Affirmer's Copyright and
 87 | Related Rights in the Work (i) in all territories worldwide, (ii) for the
 88 | maximum duration provided by applicable law or treaty (including future
 89 | time extensions), (iii) in any current or future medium and for any number
 90 | of copies, and (iv) for any purpose whatsoever, including without
 91 | limitation commercial, advertising or promotional purposes (the
 92 | "License"). The License shall be deemed effective as of the date CC0 was
 93 | applied by Affirmer to the Work. Should any part of the License for any
 94 | reason be judged legally invalid or ineffective under applicable law, such
 95 | partial invalidity or ineffectiveness shall not invalidate the remainder
 96 | of the License, and in such case Affirmer hereby affirms that he or she
 97 | will not (i) exercise any of his or her remaining Copyright and Related
 98 | Rights in the Work or (ii) assert any associated claims and causes of
 99 | action with respect to the Work, in either case contrary to Affirmer's
100 | express Statement of Purpose.
101 | 
102 | 4. Limitations and Disclaimers.
103 | 
104 |  a. No trademark or patent rights held by Affirmer are waived, abandoned,
105 |     surrendered, licensed or otherwise affected by this document.
106 |  b. Affirmer offers the Work as-is and makes no representations or
107 |     warranties of any kind concerning the Work, express, implied,
108 |     statutory or otherwise, including without limitation warranties of
109 |     title, merchantability, fitness for a particular purpose, non
110 |     infringement, or the absence of latent or other defects, accuracy, or
111 |     the present or absence of errors, whether or not discoverable, all to
112 |     the greatest extent permissible under applicable law.
113 |  c. Affirmer disclaims responsibility for clearing rights of other persons
114 |     that may apply to the Work or any use thereof, including without
115 |     limitation any person's Copyright and Related Rights in the Work.
116 |     Further, Affirmer disclaims responsibility for obtaining any necessary
117 |     consents, permissions or other rights required for any use of the
118 |     Work.
119 |  d. Affirmer understands and acknowledges that Creative Commons is not a
120 |     party to this document and has no duty or obligation with respect to
121 |     this CC0 or use of the Work.
122 | 


--------------------------------------------------------------------------------
/book/basics/units_basics.nim:
--------------------------------------------------------------------------------
  1 | import nimib, nimibook
  2 | 
  3 | nbInit(theme = useNimibook)
  4 | nb.useLatex
  5 | 
  6 | nbText: """
  7 | # Using and checking units with [Unchained](https://github.com/SciNim/Unchained)
  8 | 
  9 | Units are crucial in physics, without them we wouldn't have any consistent way of comparing two measurements. 
 10 | You don't solve an equation without checking units afterwards as a safety-check. But when it comes to coding 
 11 | physical formulas, units are often ignored or in best cases in a comment somewhere. This can be solved by using
 12 | unit libraries like `Unchained`. What puts `Unchained` apart from many other libraries is that it does the majority
 13 | of the work at compile-time so that you know that if the code compiles, then the units are correct. 
 14 | 
 15 | Many other libraries check this at runtime, and they only complain when they run the piece of code they want to check
 16 | the units of. As an added bonus, doing most of the work when compiling the code will make the code run faster as there will at most
 17 | be a conversion factor injected.
 18 | 
 19 | ## Motivating example
 20 | 
 21 | One example of a real disaster caused by conversion errors is the Mars Climate Orbiter that NASA sent to the red planet in 1999. 
 22 | It crashed. Why? Because the navigators at JPL used metric units while the manufacturers had used imperial units. So when
 23 | the navigators thought they read 1 N⋅s from the craft's sensors it was in fact 1 lbf⋅s.
 24 | 
 25 | As an introduction to `Unchained`, let's find out what 1 lbf⋅s is in N⋅s to understand how much of a difference it made.
 26 | """
 27 | nbCode:
 28 |   import unchained
 29 | 
 30 |   ## Define composite units
 31 |   defUnit(N•s)
 32 |   defUnit(lbf•s)
 33 | 
 34 |   ## Assign variables
 35 |   let lbfs = 1.lbf•s
 36 |   let Ns = lbfs.to(N•s)
 37 | 
 38 |   echo lbfs, " equals ", Ns
 39 | 
 40 | nbText: """
 41 | As you can see, they thought the value they read was 4.4 times smaller than it really was! 
 42 | 
 43 | But what does the code do? Let's dissect it block-by-block:
 44 | 
 45 | 1. `defUnit(N•s)` - There are a ridiculous amount of combinations of base units.
 46 | Therefore you must define composite units manually before you use them. More on this in a later section.
 47 | 2. `let lbfs = 1.lbf•s` - We use the dot (`.`) to assign a unit to a number. In this case we assign a variable with the value "1 lbf•s".
 48 | 3. `let Ns = lbfs.to(N•s)` - We use the `to` proc to convert a variable from one unit to another.
 49 | """
 50 | 
 51 | nbText: """
 52 | ## Defining and assigning units
 53 | 
 54 | The first thing you probably will want to do is to associate a value with a unit, for example 1 Newton. There are multiple ways
 55 | to specify a unit, both short and long versions:
 56 | """
 57 | 
 58 | nbCodeInBlock:
 59 |   ## Short version
 60 |   let n1 = 1.N
 61 |   ## Long version
 62 |   let n2 = 1.Newton
 63 |   ## Short version with prefix (milli)
 64 |   let n3 = 1.mN
 65 |   ## Long version with prefix
 66 |   let n4 = 1.MilliNewton
 67 | 
 68 | nbText: """
 69 | That was easy enough, and being able to use prefixes makes for a saner user experience. Forgetting to multiply by the
 70 | correct prefix factor is a very common mistake after all. This is solved by including prefixes directly in the library,
 71 | so you don't have to deal with them manually.
 72 | 
 73 | When it comes to composite types it gets a bit more complicated though. For example if we want to use a unit `kg•m•s⁻¹` we
 74 | have a few more things to consider. First and foremost, composite types must either be defined in a `defUnit` or be used in a
 75 | dot expression (eg `10.kg•m•s⁻¹`) *before* it can be used in other parts of the code. Here are a few valid and invalid cases:
 76 | """
 77 | 
 78 | nbCodeInBlock:
 79 |   ## Correct way of doing it!
 80 |   defUnit(kg•m•s⁻¹)
 81 |   proc unitProc(k: kg•m•s⁻¹) =
 82 |     echo k
 83 | 
 84 | nbText: """
 85 | ```nim
 86 | ## Incorrect way of doing it! Missing `defUnit`
 87 | proc unitProc(k: kg•m•s⁻¹) =
 88 |   echo k
 89 | ```
 90 | """
 91 | 
 92 | #[
 93 | nbCodeInBlock:
 94 |   ## Correct way of doing it!
 95 |   let a = 5.kg•m•s⁻¹
 96 |   proc unitProc(k: kg•m•s⁻¹) =
 97 |     echo k
 98 | 
 99 | nbText: """
100 | ```nim
101 | ## Incorrect way of doing it! Missing first use in `.`
102 | proc unitProc(k: kg•m•s⁻¹) =
103 |   echo k
104 | ```
105 | """
106 | 
107 | ]#
108 | 
109 | nbText: """
110 | There is also the `UnitLess` type which represents a quantity without a unit like for example a count or a percentage.
111 | It is used like the other units with the addition that `UnitLess` numbers can be passed to procs accepting `float`.
112 | This is done through converters behind the scenes, so you don't have to manually convert between the two: 
113 | """
114 | 
115 | nbCodeInBlock:
116 |   proc f(x: float): float = x*x + x + 1
117 |   let ul = 100.UnitLess
118 |   echo f(ul)
119 |   ## This will fail beacuse `x` isn't UnitLess:
120 |   ## let x = 100.kg
121 |   ## echo f(x)
122 | 
123 | nbText: """
124 | To get the unit of a variable you can use `typeof` and checking units is done using `is`:
125 | """
126 | 
127 | nbCodeInBlock:
128 |   let mass = 10.kg
129 |   echo typeof(mass) is kg
130 | 
131 | nbText: """
132 | ### Different ways to write units
133 | As you might have noticed, we used a few unicode characters in the code above (•, ⁻¹). Most keyboard don't have these symbols on them but
134 | there are ways to work around that. On Linux you could check if your distro supports the "Compose key" which lets you use sensible
135 | key combinations to type symbols. For example `²` can be written using `Compose + ^ + 2`, it makes sense! 
136 | 
137 | On Windows there is WinCompose which tries to emulate the compose key. There is also the Emoji/Symbols popup menu
138 | when you press `Windows + .` where you can find a multitude of symbols. 
139 | 
140 | If you don't want to use these kind of (totally awesome) tools,
141 | you can also write the types in backticks (``` ` ```) and use `*` and `^`
142 | instead (`/` is not allowed, use negative exponents instead). 
143 | Here are a few examples of equivalent ways of writing the same unit:
144 | """
145 | 
146 | nbCodeInBlock:
147 |   let unicodeUnit = 1.kg•m•s⁻¹
148 |   let textUnit = 1.`kg*m*s^-1`
149 |   echo unicodeUnit == textUnit
150 | 
151 | nbCodeInBlock:
152 |   let unicodeUnit = 1.N•s
153 |   let textUnit = 1.`N*s`
154 |   echo unicodeUnit == textUnit
155 | 
156 | nbText: """
157 | ### Arithmetics with units
158 | Units can of course be used in formulas. The supported operators are `+`, `-`, `*`, `/` and `^` (Note: `^` only work with integer exponents!). 
159 | You don't have to `defUnit` the resulting unit of an operation, it's done automatically.
160 | There are a few rules worth remembering, some more obvious than others:
161 | 
162 | - You can only add and subtract units of the same quantity (hopefully pretty obvious). 
163 | Example: `1.kg + 1.lbs` works. `1.kg + 1.m` doesn't work.
164 | - You can multiply and divide units of different quantities. Example: `1.kg * 1.m` works.
165 | - If it is ambiguious what the resulting unit should be, they are converted to base SI units. Example: `1.kg + 1.lbs` will result in `1.45359 KiloGram`.
166 | - Conversion from prefixed to non-prefixed units only happen if multiple different units or prefixes of the same quantity are mixed.
167 | Example: `1.mm * 1.kg` results in `1 KiloGram•MilliMeter` while `1.mm * 1.cm` results in `1e-005 Meter²`.
168 | - Division by same quantities gives a `UnitLess` result. Example: `1.cm / 1.m` gives `0.01 UnitLess`.
169 | 
170 | Let's see some actual code now! You are probably familiar with the harmonic oscillator:
171 | $$y(t) = A \cos (ω t + φ)$$
172 | where $ω$ is the frequency in $rad ⋅ s^{-1}$, $φ$ is an angle and $A$ is the maximum amplitude in $cm$.
173 | Let's get coding now!
174 | """
175 | 
176 | nbCode: import math
177 | nbCodeInBlock:
178 |   proc y(t: Second): CentiMeter =
179 |     let ω = 100.rad•s⁻¹ ## or 100.`rad*s^-1`
180 |     let A = 10.cm
181 |     let φ = Pi.rad
182 |     let argument = ω * t + φ
183 |     result = A * cos(argument)
184 | 
185 |   echo y(0.s)
186 | 
187 | 
188 | 
189 | nbSave
190 | 


--------------------------------------------------------------------------------
/book/basics/common_datatypes.nim:
--------------------------------------------------------------------------------
  1 | import nimib, nimibook
  2 | 
  3 | nbInit(theme = useNimibook)
  4 | 
  5 | nbText: """
  6 | # Basic data types encountered in scientific computing in Nim
  7 | 
  8 | Most operations using scientific computing packages in Nim will require one of three
  9 | different data types:
 10 | - `seq[T]`
 11 | - `Tensor[T]`
 12 | - `DataFrame`
 13 | 
 14 | to store multiple scalar values (typically `float` values).
 15 | 
 16 | `T` is the typical letter used to indicate generics in Nim. This means the explicit type
 17 | will be determined by the argument / desired type to store in the container, for example
 18 | `seq[int], Tensor[float]` etc.
 19 | 
 20 | There are of course further types used in many packages, but these three are typically
 21 | used to actually store data. Other objects may wrap any of these for different purposes,
 22 | e.g. [numericalnim](https://github.com/hugogranstrom/numericalnim) contains different
 23 | helper objects for integration or interpolation.
 24 | 
 25 | We will now look at each of these three data types individually and discuss how to create
 26 | variables of each type and what typical use cases are.
 27 | 
 28 | ## `seq[T]` - homogeneous, dynamically resizable sequence
 29 | 
 30 | `seq[T]` is the default, dynamically resizable container from Nim's standard library. As the
 31 | single generic argument `T` implies it is homogeneous, which means one sequence stores
 32 | elements of a single data type.
 33 | 
 34 | Their implementation is essentially a pointer to a memory array, the length of the allocated
 35 | memory as well as the length of elements actually stored in it. We will discuss this further
 36 | down in section "Length and capacity of a sequence".
 37 | 
 38 | In addition to `seq[T]` Nim also supports fixed size arrays. While these can be very useful
 39 | they won't be discussed here.
 40 | 
 41 | The standard library provides different ways to construct a sequence. Let's look at the
 42 | default two constructors first:
 43 | """
 44 | 
 45 | nbCodeInBlock:
 46 |   let x = @[0.0, 1.0, 2.0, 3.0]
 47 |   echo x
 48 |   echo "Length: ", len(x)
 49 | nbText: """
 50 | The first constructor explicitly converts a number of elements into a sequence with
 51 | 4 elements. The length of the sequence can be accessed using `len`.
 52 | """
 53 | nbCodeInBlock:
 54 |   var x = newSeq[float]()
 55 |   echo "Length: ", x.len
 56 | nbText: """
 57 | The second way to construct a sequence uses the `newSeq` procedure. It receives the
 58 | generic type that should be housed in the sequence and as an argument the number of
 59 | initial elements (the default being 0).
 60 | """
 61 | nbCodeInBlock:
 62 |   var x = newSeq[float](4)
 63 |   echo x
 64 |   echo "Length: ", x.len
 65 | nbText: """
 66 | `x` then uses the `newSeq` constructor to directly construct a sequence of floats of
 67 | length 4. All elements in the sequence are initialized to zero!
 68 | 
 69 | From here we can modify any created sequence, remove elements or add new elements as
 70 | long as the variable is declared as a `var` (instead of `let`).
 71 | 
 72 | ### Access
 73 | 
 74 | Elements in the sequence are accessed using bracket `[]` access:
 75 | """
 76 | nbCodeInBlock:
 77 |   let x = @[0.0, 1.0, 2.0, 3.0]
 78 |   echo x[2]
 79 | 
 80 | nbText: """
 81 | ### Mutation
 82 | 
 83 | Basic mutation of elements in the sequence is done using `[]=` (in Nim terms), which is simply
 84 | bracket access and an assignment:
 85 | """
 86 | nbCodeInBlock:
 87 |   var x = newSeq[float](4)
 88 |   x[0] = 5.0
 89 |   echo x
 90 | nbText: """
 91 | 
 92 | New elements are added using `add` as is typical in Nim:
 93 | """
 94 | nbCodeInBlock:
 95 |   var x = newSeq[float]()
 96 |   x.add 10.0
 97 |   echo x
 98 |   echo "Length: ", x.len
 99 | nbText: """
100 | So `y1` now contains 1 element instead of 0.
101 | 
102 | Deleting elements is also supported, via `delete` or `del`. Both procedures take the index
103 | to be removed. `delete` keeps the order of the sequence intact, whereas `del` simply overwrites
104 | the given index with the last element of the sequence and reduces the length by one. Compare:
105 | """
106 | nbCodeInBlock:
107 |   let x1 = @[0.0, 1.0, 2.0, 3.0]
108 |   var x2 = x1
109 |   var x3 = x1
110 |   echo "Starting from: ", x1
111 |   x2.delete(1)
112 |   echo "Remove index 1 using `delete`: ", x2
113 |   x3.del(1)
114 |   echo "Remove index 1 using `del`: ", x3
115 | nbText: """
116 | See how the order of `x3` is now different, whereas `x2` has the same order just with
117 | index 1 removed.
118 | 
119 | ### Length and capacity of a sequence
120 | 
121 | Consider the following code:
122 | """
123 | nbCodeInBlock:
124 |   var x = newSeq[int]()
125 |   for i in 0 ..< 10:
126 |     x.add i
127 | nbText: """
128 | A naive implementation of a sequence would have to reallocate the memory underlying the sequence for
129 | each call to `add`. To avoid the overhead of all these copying operations, the implementation
130 | overallocates by a certain amount. This means reallocation is only required if the actual underlying
131 | capacity is exceeded.
132 | 
133 | This has practical use cases as well. Sometimes we may not know *exactly* how many elements we will
134 | store in a sequence, but we have a good idea of the order. In those cases we cannot very well create
135 | a sequence with an existing length using `newSeq` (if we overestimate we suddenly have a number of
136 | empty entries).
137 | 
138 | For that usecase we can use `newSeqOfCap`. It creates a sequence of length 0 but whose capacity is the
139 | given number:
140 | """
141 | nbCodeInBlock:
142 |   var x = newSeqOfCap[int](100)
143 |   echo "Length: ", x.len
144 | nbText: """
145 | As we can see the sequence is currently empty. But if we add to it, the sequence won't have to
146 | reallocate several times. In this way we can often get away with at most one reallocation or
147 | zero, if we accept a bit of overallocation.
148 | """
149 | nbCodeInBlock:
150 |   var x = newSeqOfCap[int](100)
151 |   for i in 0 ..< 100:
152 |     x.add i
153 | nbText: """
154 | So this operation won't reallocate.
155 | 
156 | Note: for even more performance critical code there is also `newSeqUninitialized`, which creates a
157 | sequence of N elements that are *not* zero initialized to save one more (possibly useless) loop
158 | over the memory.
159 | 
160 | ### Filling a seq with a fixed value
161 | 
162 | Sometimes we wish to create a sequence that is initialized not to zero, but some other constant
163 | value. For this we can use `newSeqWith` from `sequtils`:
164 | """
165 | nbCode:
166 |   import sequtils
167 |   echo newSeqWith(3, 5.5)
168 | nbText: """
169 | which takes as the first argument the size of the resulting sequence and as the second argument the
170 | value to initialize all values to.
171 | 
172 | Note: this can also be used to create nested sequences:
173 | """
174 | nbCode:
175 |   echo newSeqWith(3, newSeqWith(3, 5))
176 | nbText: """
177 | which gives us a nested sequence of `seq[seq[int]]` where each element is a sequence of
178 | integers with value 5.
179 | 
180 | ### A few more typical ways to create sequences
181 | 
182 | To finish of this section, let's look at a few more sequence constructors that are often useful.
183 | 
184 | Nim supports slices using the syntax `a .. b`, which includes all values from `a` to including `b`.
185 | Together with `toSeq` it can be used to generate a sequence:
186 | """
187 | nbCode:
188 |   echo toSeq(10 .. 14)
189 | nbText: """
190 | This essentially takes the role of `arange` from numpy. Of course this only generates sequences of integers.
191 | 
192 | For succinctness (but not performance) we can convert such a sequence using `mapIt` to map
193 | each element from an input type to some other type:
194 | """
195 | nbCode:
196 |   echo toSeq(10 .. 14).mapIt(it.float)
197 | nbText: """
198 | Returns a sequence of floats instead.
199 | 
200 | Similarly, it is often desirable to get a linearly spaced sequence of numbers. `numericalnim` also
201 | provides a `linspace` implementation. Let's create 5 evenly spaced points between 1 and 2:
202 | """
203 | nbCode:
204 |   import numericalnim
205 |   echo linspace(1.0, 2.0, 5)
206 | nbText: """
207 | Finally, one may need a sequence of randomly sampled numbers. The `random` module of the Nim
208 | standard library provides a `rand` procedure we can combine with `mapIt`:
209 | """
210 | nbCode:
211 |   import random
212 |   randomize()
213 |   echo toSeq(0 ..< 5).mapIt(rand(10.0))
214 | nbText: """
215 | samples 5 floating point numbers between 0 and 10.
216 | 
217 | ## `Tensor[T]` - an ND-array type from [Arraymancer](https://github.com/mratsim/Arraymancer)
218 | 
219 | Arraymancer provides an ND-array type called `Tensor[T]` that is best compared to a numpy
220 | ndarray. Same as a sequence `seq[T]` it can only contain a single type. In contrast to it
221 | however, it cannot be resized easily (only *reshaped*).
222 | 
223 | Under the hood the data is stored as a pointer + length pair for types that can be copied
224 | using `copyMem` (Nim's `memcpy`). Otherwise it contains a `seq[T]` for the data. The major
225 | difference between a sequence and a tensor is the ability to handle multidimensional data
226 | efficiently.
227 | 
228 | In case of a `seq[T]` we either have to manually handle the indexing of the sequence (if we
229 | store ND data in a 1D sequence) or deal with the inefficiencies of a nested sequence `seq[seq[T]]`.
230 | In that case *every* access requires an additional pointer indirection."""
231 | nbCodeInBlock:
232 |   let x = @[ @[1, 2, 3], @[4, 5, 6] ]
233 |   echo x[1][0]
234 | nbText: """
235 | The access `[1][0]` in this example first returns a sequence, which we have to dereference
236 | *again* to get to an element. This makes accessing data expensive.
237 | An Arraymancer tensor on the other hand always stores data in a one-dimensional data storage.
238 | Not only does it make iterating over and accessing data faster, it also allows for essentially
239 | free reshaping of the data, because the shape is only a piece of meta data.
240 | 
241 | Another important bit of information is that tensors have reference semantics. That means
242 | assigning a tensor to a new variable and modifying that variable also modifies the initial
243 | tensor! This is for efficiency reasons to not copy all the data for each assignment.
244 | 
245 | Two most basic ways to create are shown below:
246 | """
247 | nbCode:
248 |   import arraymancer
249 |   let t = @[1.0, 2.0, 3.0].toTensor
250 | nbText: """
251 | First we can just create a tensor from a (possibly nested) sequence or array using `toTensor`.
252 | 
253 | Secondly:
254 | """
255 | nbCodeInBlock:
256 |   let t = newTensor[float](9)
257 | nbText: """
258 | This is the default tensor constructor. It creates a tensor of type `Tensor[float]` with
259 | 10 elements that is zero initialized. If multiple elements are given to the procedure a tensor
260 | of different shape is created.
261 | """
262 | nbCodeInBlock:
263 |   let t = newTensor[float](3, 3)
264 | nbText: """
265 | creates a tensor 2 dimensional tensor of size 3 in both dimensions (essentiall a 3x3 matrix).
266 | 
267 | Note that due to the shape being a piece of meta data, it is cheap to convert from one shape
268 | to another using `reshape`.
269 | """
270 | nbCodeInBlock:
271 |   let t = newTensor[float](9).reshape(3, 3)
272 | nbText: """
273 | This essentially does not have any meaningful overhead over the creation of `t3` above.
274 | 
275 | Some more ways to construct a tensor:
276 | """
277 | nbCodeInBlock:
278 |   let t1 = zeros[float](9) ## a tensor that is explicit 0, the default
279 |   let t2 = ones[float](9) ## a tensor that is initialized to 1
280 |   let t3 = newTensorWith[float]([3, 3], 5) ## a 3x3 tensor initialized to 5
281 |   let t4 = newTensorUninit[float](10) ## a tensor that is *not* initialized
282 |   let t5 = arange(0, 10) ## the range 0 to 10 as a `Tensor[int]`
283 |   let t6 = linspace(0.0, 10.0, 1000) ## 1000 linearly spaced points between 0 and 10
284 | nbText: """
285 | These are only a few common ways to create a tensor.
286 | 
287 | ### Access and mutation
288 | 
289 | Arraymancer tensors are very similar to the Nim standard library `seq[T]` in terms of
290 | their element access and element mutation, with the aforementioned difference of reference
291 | semantics.
292 | 
293 | However, because tensors deal with possibly multidimensional data, there are ways to
294 | slice and select parts of a tensor using syntax comparable to numpy's fancy indexing.
295 | Furthermore, support for element-wise operations between multiple tensors are supported.
296 | 
297 | As we won't make use of that in this tutorial, we won't cover it here. See the Arraymancer
298 | [tutorial section](https://mratsim.github.io/Arraymancer/tuto.slicing.html) to get an idea.
299 | 
300 | ### More
301 | 
302 | Of course Arraymancer provides a large amount of additional functionality, starting from
303 | linear algebra, to statistics, machine learning and more. View the full documentation here:
304 | 
305 | [Arraymancer documentation](https://mratsim.github.io/Arraymancer/)
306 | """
307 | nbSave
308 | 


--------------------------------------------------------------------------------
/book/external_language_integration/julia/nimjl_arrays.nim:
--------------------------------------------------------------------------------
  1 | import nimib, nimibook
  2 | 
  3 | nbInit(theme = useNimibook)
  4 | 
  5 | nbText: """## Julia Arrays
  6 | 
  7 | Julia Arrays are one of the best NDArray data structures available. That's why a special emphasis is made on handling Julia Arrays.
  8 | 
  9 | nimjl defines the generic type ``JlArray[T]``. A JlArray is a special JlValue that represent the type ``Array{T}`` in Julia. It's generic so Nim has the information of the underlying type and it's possible to access its buffer and iterate over it.
 10 | 
 11 | The closest Nim equivalent would be [Arraymancer](https://github.com/mratsim/Arraymancer) Tensor type.
 12 | 
 13 | Just keep in mind, **Julia's Arrays are column-major**, while Nim usually follows C's convention of row-major.
 14 | 
 15 | This is important because you may end up having confusing results if you don't take it into account.
 16 | 
 17 | ## Creating Arrays
 18 | 
 19 | Array creation can be done in multiple different way.
 20 | 
 21 | ### Native constructor
 22 | 
 23 | The most "natural" way of creating a ``JlArray[T]`` is by calling a Julia function that returns an Array.
 24 | 
 25 | Important to note, on this case the memory is allocated and owned by Julia, and the JlValue needs to be gc-rooted in order to be used between calls (more on that later):
 26 | """
 27 | 
 28 | nbCode:
 29 |   import nimjl
 30 |   Julia.init()
 31 | 
 32 | nbCodeInBlock:
 33 |   # Use a Julia constructor to create 5x5 Matrix of Float
 34 |   var localArray = Julia.zeros(5, 5)
 35 |   # localArray memory is owned by Julia
 36 |   echo localArray
 37 | 
 38 | nbText:"""### Construct from existing buffer
 39 | 
 40 | When a ``JlArray[T]`` has to be constructed from existing values - i.e. an existing Nim buffer - the easiest way is to either copy the buffer into a ``JlArray[T]`` OR have the array point to the buffer.
 41 | 
 42 | #### Copying an existing buffer
 43 | 
 44 | By copying an existing buffer / Tensor / seq - memory is allocated and owned by Julia during copy; JlValue needs to be gc-rooted in order to be used between calls:
 45 | """
 46 | 
 47 | nbCode:
 48 |   import std/sequtils
 49 | 
 50 | nbCodeInBlock:
 51 |   var localNimArray = newSeqWith(5, newSeq[float](5))
 52 |   var localArray = toJlArray(localNimArray)
 53 |   # localArray memory is owned by Julia
 54 |   echo localArray
 55 | 
 56 | 
 57 | nbText:"""
 58 | pros:
 59 | * Julia owning the memory makes it more robust.
 60 | 
 61 | cons:
 62 | * If you need to go from Nim to Julia to Nim, you have to perform multiple copies
 63 | 
 64 | #### Using an existing buffer
 65 | 
 66 | * By using an existing buffer (or Tensor) - no memory allocation is performed and Julia does not own the memory. The memory has to be contiguous:
 67 | """
 68 | nbCodeInBlock:
 69 |   var localNimArray = newSeq[float](25) # Create a Nim buffer of contiguous memory
 70 |   var localArray = jlArrayFromBuffer(localNimArray).reshape(5, 5)
 71 |   echo localArray
 72 |   localNimArray[0] = 14
 73 |   # localArray memory is NOT owned by Julia
 74 |   # As you can see modifying the buffer modify the Julia Array.
 75 |   # Keep in mind when using buffer directly that Julia Array are Column Major.
 76 |   echo localArray
 77 | 
 78 | nbText:"""As you can see in the previous example, modifying the original sequence modify the ``JlArray[T]``.
 79 | 
 80 | pros:
 81 | * No copy is performed; you may use a JlArray[T] as a view of the Nim buffer with no-cost.
 82 | 
 83 | cons:
 84 | * If the Nim buffer is free'd while the ``JlArray[T]`` is still in-use, it will cause a dangling pointer.
 85 | * Julia Arrays are column major while Nim usually uses row-major convention. This means you have to be careful when iterating over the Array, to do so continuously (or lose performance).
 86 | 
 87 | 
 88 | ### Julia GC & rooting values
 89 | 
 90 | When using JlArray whose memory is handled by the Julia VM in Nim, you need to gc-root the Arrays in the Julia VM so it doesn't get collected by Julia's gc over successive calls.
 91 | 
 92 | This is done by using the ``jlGcRoot`` which calls the C macros ``JL_GC_PUSH`` with the arguments and then calls the C macro ``JL_GC_POP()`` at the end of the template's scope.
 93 | 
 94 | For more detailed explanantion regarding ``JL_GC_PUSH()``/ ``JL_GC_POP``, please refer to Julia's official documentation on [embbedding](https://docs.julialang.org/en/v1/manual/embedding/#Memory-Management).
 95 | """
 96 | 
 97 | nbCodeInBlock:
 98 |   # Use a Julia constructor to create 5x5 Matrix of Float
 99 |   var localArray = Julia.zeros(5, 5)
100 |   jlGcRoot(localArray):
101 |     # localArray is gc-rooted as long as you're in ``jlGcRoot`` template scope
102 |     echo localArray
103 |     # Do more stuff... localArray will not be collected by Julia's GC
104 |     echo localArray
105 |     # localArray "rooting" ends here
106 | 
107 | nbText: """
108 |   Note that if Julia does **not** own the memory, then calling ``jlGcRoot`` on the value is forbidden (and will probably result in a segfault). The Julia VM cannot refer to memory it does not own regarding its gc collection routine.
109 | """
110 | 
111 | nbText:"""
112 | 
113 | ### Indexing
114 | 
115 | ``JlArray[T]`` can be indexed in native Nim; through the power of macros, ``[]`` and ``[]=`` operator are mapped to Julia's [getindex](https://docs.julialang.org/en/v1/base/arrays/#Base.getindex-Tuple{Type,%20Vararg{Any,%20N}%20where%20N}) and [setindex!](https://docs.julialang.org/en/v1/base/arrays/#Base.setindex!-Tuple{AbstractArray,%20Any,%20Vararg{Any,%20N}%20where%20N}).
116 | 
117 | Some examples :
118 | """
119 | 
120 | nbCodeInBlock:
121 |     var localArray = @[
122 |       @[1, 2, 3, 4],
123 |       @[5, 6, 7, 8]
124 |     ].toJlArray()
125 | 
126 |     echo localArray.shape()
127 |     echo localArray
128 |     let
129 |       e11 = localArray[1, 1]
130 |       e12 = localArray[1, 2]
131 |       e21 = localArray[2, 1]
132 |       e22 = localArray[2, 2]
133 | 
134 |     echo "e11=", e11, " e12=", e12, " e21=", e21, " e22=", e22
135 |     echo typeof(e11)
136 |     echo jltypeof(e11)
137 | 
138 | nbText: """Several things to notice here:
139 | * calling ``toJlArray()`` perform a copy and re-order the elements into column major order so the ``JlArray[T]`` is of shape [2, 4].
140 | * Index starts at 1; following Julia's indexing rules.
141 | * When indexing "single-element", the result returned is represented as a ``JlArray[T]`` for Nim, but is actually a scalar for Julia.
142 | 
143 | Let's see a few more examples.
144 | 
145 | Select a single index on the first axis; select all index on the second axis:
146 | """
147 | 
148 | nbCodeInBlock:
149 |   var localArray = @[
150 |     @[1, 2, 3, 4],
151 |     @[5, 6, 7, 8]
152 |   ].toJlArray()
153 | 
154 |   let e10 = localArray[1, _]
155 |   echo e10
156 |   echo e10.shape
157 |   echo typeof(e10)
158 |   echo jltypeof(e10)
159 | 
160 | nbText: """Select a single index on the first axis; select the indexes >=2 and <= 4 on the second axis:
161 | """
162 | 
163 | nbCodeInBlock:
164 |   var localArray = @[
165 |     @[1, 2, 3, 4],
166 |     @[5, 6, 7, 8]
167 |   ].toJlArray()
168 | 
169 |   let e1 = localArray[2, 2..4]
170 |   echo e1
171 |   echo e1.shape
172 |   echo typeof(e1)
173 |   echo jltypeof(e1)
174 | 
175 | 
176 | nbText: """To exclude the last value, the syntax is simply ..<:
177 | """
178 | 
179 | nbCodeInBlock:
180 |   var localArray = @[
181 |     @[1, 2, 3, 4],
182 |     @[5, 6, 7, 8]
183 |   ].toJlArray()
184 | 
185 |   let e1inf = localArray[2, 2..<4]
186 |   echo e1inf
187 |   echo e1inf.shape
188 |   echo typeof(e1inf)
189 |   echo jltypeof(e1inf)
190 | 
191 | nbText:"""
192 | Select a single index on the first axis; Select all index between the second element and the second-to-last element from  on the second axis:
193 | """
194 | 
195 | nbCodeInBlock:
196 |   var localArray = @[
197 |     @[1, 2, 3, 4],
198 |     @[5, 6, 7, 8]
199 |   ].toJlArray()
200 | 
201 |   let e1hat2 = localArray[2, 2..^2]
202 |   echo e1hat2
203 |   echo e1hat2.shape
204 |   echo typeof(e1hat2)
205 |   echo jltypeof(e1hat2)
206 | 
207 | nbText:"""Note that the slicing syntax is based on Arraymancer slicing syntax, but respect Julia's indexing convention.
208 | 
209 | ## Conversion between JlArray[T] and Arraymancer's Tensor[T] (and dealing with RowMajor/ColMajor)
210 | 
211 | Working with Arraymancer Tensor isn't that different from working with Array at first glance; the major difference is that Tensor can be either column major or row major so when creating a JlArray by copy from a Tensor, the data will be set to column major order before copying.
212 | 
213 | """
214 | 
215 | nbCode:
216 |   import arraymancer
217 | 
218 | nbCodeInBlock:
219 |   var localTensor = newTensor[int64](3, 5)
220 |   var i = 0
221 |   localTensor.apply_inline:
222 |     inc(i)
223 |     i
224 |   echo localTensor
225 | 
226 |   var localArray = localTensor.toJlArray()
227 |   echo localArray
228 | 
229 | nbText: """Despite localTensor being row major by-default, the JlArray (that is col major by default) still has identical values.
230 | 
231 | This only applies when a copy is performed :
232 | """
233 | 
234 | nbCodeInBlock:
235 |   var localTensor = newTensor[int64](3, 5)
236 |   var i = 0
237 |   localTensor.apply_inline:
238 |     inc(i)
239 |     i
240 |   echo localTensor
241 | 
242 |   var localArray = jlArrayFromBuffer(localTensor)
243 |   echo localArray
244 | 
245 | nbText: """When working from the raw buffer of the Tensor, because the order is still column major the ``JlArray[T]`` values are different from the previous examples.
246 | 
247 |   To convert a ``JlArray[T]`` to a ``Tensor[T]``, simply use ``to`` proc as you would with any other type; with just an additional argument to specify the memory layout of the Tensor created this way:
248 | """
249 | 
250 | nbCodeInBlock:
251 |   var localArray = Julia.rand([1, 2, 3, 4, 5], (5, 5)).toJlArray[:int]()
252 |   var localTensor = localArray.to(Tensor[int], colMajor)
253 | 
254 |   echo localArray
255 |   echo localTensor
256 | 
257 |   var localTensor2 = localArray.to(Tensor[int], rowMajor)
258 |   assert(localTensor == localTensor2)
259 | 
260 | nbText:"""Both Tensors have identical indexed values but the buffer are different according to the memory layout argument.
261 | 
262 | When passing Tensor directly as values in a ``jlCall`` / ``Julia.`` expression, a ``JlArray[T]`` will be constructed by buffer; so you should be aware about the memory layout of the buffer.
263 | 
264 | """
265 | 
266 | nbCode:
267 |   var orderedTensor = newTensor[int]([3, 2])
268 |   var idx = 0
269 |   orderedTensor.apply_inline:
270 |     inc(idx)
271 |     idx
272 |   echo orderedTensor
273 | 
274 | nbText: """Let's use the simple Tensor above as an example with a trivial funciton such as ``transpose`` and compare the results.
275 | 
276 | Case 1 : Using Tensor argument directly (no copy):
277 | """
278 | 
279 | nbCodeInBlock:
280 |   var res = Julia.transpose(orderedTensor).toJlArray(int)
281 |   echo res
282 |   echo orderedTensor.transpose()
283 | 
284 | nbText:"""This is expanded to:
285 | """
286 | 
287 | nbCodeInBlock:
288 |   # When passing localTensor, a ``JlArray`` is created using ``jlFromBuffer``.
289 |   # Since the Tensor is row major and the Array col major, the order of the values is not conserved
290 |   var res = Julia.transpose(toJlVal(jlArrayFromBuffer(orderedTensor))).toJlArray(int)
291 |   echo res
292 |   echo orderedTensor.transpose()
293 | 
294 | nbText:"""Therefore, no copy is made : the Julia Array points to the Tensor's buffer.
295 | 
296 | The indexed values between ``Julia.transpose(...)`` and ``orderedTensor.transpose()`` **are different** because they are indexed differently : Julia Arrays are indexed in column major while this Arraymancer Tensor is in column major.
297 | 
298 | Case 2 : Copying the Tensor into an Array and using the Array:
299 | """
300 | nbCodeInBlock:
301 |   var tensorCopied = toJlArray(orderedTensor)
302 |   # Tensor is copier to Array in ColMajor order
303 |   var res = Julia.transpose(tensorCopied).toJlArray(int)
304 |   echo res
305 |   echo orderedTensor.transpose()
306 | 
307 | nbText:"""On the other hand, on this case because the Array has been created from **a copy**, the indexed value have been copied into ``JlArray`` in column major order.
308 | 
309 | As a consequence, the indexed value of ``Julia.transpose()`` and ``orderedTensor.transpose()`` **are identical**.
310 | 
311 | Note that you can use ``swapMemoryOrder`` on an existing ``JlArray[T]`` to obtain a copy of the Array but permuted.
312 | """
313 | 
314 | nbCodeInBlock:
315 |   var tensorView = jlArrayFromBuffer(orderedTensor)
316 |   var tensorCopied = toJlArray(orderedTensor)
317 |   echo tensorView
318 |   echo tensorCopied
319 | 
320 | nbText: """The array are actually different from Julia's point of view: ``tensorView`` is row major values (the Tensor buffer) indexed as column major while ``tensorCopied`` is col major values indexed as col major.
321 | 
322 | In Nim, the utility proc ``swapMemoryOrder()`` will change and **return a copy** with a swapped memory order (col major -> row major & vice-versa) to handle such cases more easily.
323 | 
324 | ## Broadcasting
325 | 
326 | One of main appeal of Arrays in Julia, is the ability to broadcast function of a single element.
327 | In Nimjl this is done using the ``jlBroadcast``.
328 | """
329 | 
330 | nbCodeInBlock:
331 |   var localArray = @[
332 |     @[4, 4, 4, 4],
333 |     @[4, 4, 4, 4],
334 |     @[4, 4, 4, 4]
335 |   ].toJlArray()
336 |   echo localArray
337 |   var sqrtLocalArray = jlBroadcast(sqrt, localArray).toJlArray(float) # sqrt of int is a float
338 |   echo sqrtLocalArray
339 | 
340 | nbText: """This is the equivalent in Julia of calling ``sqrt.(localArray)``.
341 | 
342 | For convenience, the usual broadcasted operators have also been defined:
343 | """
344 | 
345 | nbCodeInBlock:
346 |   var localArray = @[
347 |     @[4, 4, 4, 4],
348 |     @[4, 4, 4, 4],
349 |     @[4, 4, 4, 4]
350 |   ].toJlArray()
351 |   echo localArray
352 |   var res = (localArray +. localArray)*.2 -. (localArray/.2)
353 |   echo res
354 | 
355 | nbText: """## Final word ?
356 |   Thanks for reading this far ! I hope that this tutorial will help you get started mixing Julia and Nim in your application.
357 | 
358 |   If you found a bug in [nimjl](https://github.com/Clonkk/nimjl), opening an issue will be much appreciated.
359 |   Got a question ? Contact the SciNim team writing these [getting started](https://github.com/scinim/getting-started) either by opening an issue or through the Nim Discord/Matrix on the science channel.
360 | 
361 | """
362 | 
363 | nbSave
364 | 


--------------------------------------------------------------------------------
/book/numerical_methods/integration1d.nim:
--------------------------------------------------------------------------------
  1 | import nimib except Value
  2 | import nimibook
  3 | 
  4 | nbInit(theme = useNimibook)
  5 | nb.useLatex
  6 | 
  7 | nbText: md"""
  8 | # 1D Numerical Integration
  9 | In this tutorial you will learn learn how to use [numericalnim](https://github.com/HugoGranstrom/numericalnim/) to perform
 10 | numerical integration both on discrete data and continuous functions.
 11 | 
 12 | ## Integrate Continuous Functions
 13 | We will start off by integrating some continuous function using a variety of methods and comparing their accuracies and performances
 14 | so that you can make an educated choice of method. Let's start off with creating the data to integrate, I have choosen to use
 15 | the *humps* function from MATLAB's demos:
 16 | 
 17 | $$ f(x) = \frac{1}{(x - 0.3)^2 + 0.01} + \frac{1}{(x - 0.9)^2 + 0.04} - 6 $$
 18 | 
 19 | It has the primitive function:
 20 | 
 21 | $$ F(x) = 10 \arctan(10x - 3) + 5 \arctan\left(5x - \frac{9}{2}\right) - 6x $$
 22 | 
 23 | Let's code them!
 24 | """
 25 | 
 26 | nbCode:
 27 |   import math, sequtils
 28 |   import numericalnim, ggplotnim, benchy
 29 |   proc f(x: float, ctx: NumContext[float, float]): float =
 30 |     result = 1 / ((x - 0.3)^2 + 0.01) + 1 / ((x - 0.9)^2 + 0.04) - 6
 31 | 
 32 |   proc F(x: float): float =
 33 |     result = 10*arctan(10*x-3) + 5*arctan(5*x - 9/2) - 6*x
 34 | 
 35 | block continuousPart: # Want to be able to use cross-codeblock variables, but also separate each part of the tutorial.
 36 | 
 37 |   nbText: md"""
 38 | As you can see, we defined `f` not as just `proc f(x: float): float` but added a `ctx: NumContext[float, float]` as well.
 39 | That is because `numericalnim`'s integration methods expect a proc with the signature ``proc f[T; U](x: U, ctx: NumContext[T, U]): T``,
 40 | where `U` is the type used for computations internally (typically a `float` or a type like it) and `T` is the user defined
 41 | type that is the result of the function to be integrated (and may be used to include custom data in the body of the function).
 42 | 
 43 | This means that you can both integrate functions *returning* other types than `float`, namely `T` if they provide a certain
 44 | set of supported operations, as well as perform the integration using types `U` as long as they behave "float-like".
 45 | 
 46 | We won't be integrating `F` (it is the indefinite integral already) so I skipped adding `ctx` there for simplicity.
 47 | 
 48 | Aren't you curious of what `f(x)` looks like? Thought so! Let's plot them using `ggplotnim`,
 49 | a more detailed plotting tutorial can be found [here](../data_viz/plotting_data.html).
 50 |   """
 51 | 
 52 |   nbCodeInBlock:
 53 |     let ctxPlot = newNumContext[float, float]()
 54 |     let xPlot = numericalnim.linspace(0, 1, 1000)
 55 |     let yPlot = xPlot.mapIt(f(it, ctxPlot))
 56 | 
 57 |     let dfPlot = toDf(xPlot, yPlot)
 58 |     ggplot(dfPlot, aes("xPlot", "yPlot")) +
 59 |       geom_line() +
 60 |       ggsave("images/humps.png")
 61 | 
 62 |   nbImage("images/humps.png")
 63 | 
 64 |   nbText: md"""
 65 | ### Let the integration begin!
 66 | Now we have everything we need to start integrating. The specific integral we want to compute is:
 67 | 
 68 | $$ \int_0^1 f(x)\, \mathrm{d}x $$
 69 | 
 70 | The methods we will use are: `trapz`([link](https://en.wikipedia.org/wiki/Trapezoidal_rule)), `simpson`([link](https://en.wikipedia.org/wiki/Simpson%27s_rule)),
 71 | `gaussQuad`([link](https://en.wikipedia.org/wiki/Gaussian_quadrature)), `romberg`([link](https://en.wikipedia.org/wiki/Romberg%27s_method)), `adaptiveSimpson` and `adaptiveGauss`.
 72 | Where the last three are adaptive methods and the others are fixed-step methods. We will use a tolerance `tol=1e-6`
 73 | for the adaptive methods and `N=100` intervals for the fixed-step methods.
 74 | Let's code this now and compare them!
 75 |   """
 76 | 
 77 |   nbCode:
 78 |     let a = 0.0
 79 |     let b = 1.0
 80 |     let tol = 1e-6
 81 |     let N = 100
 82 |     let exactIntegral = F(b) - F(a)
 83 | 
 84 |     let trapzError = abs(trapz(f, a, b, N) - exactIntegral)
 85 |     let simpsonError = abs(simpson(f, a, b, N) - exactIntegral)
 86 |     let gaussQuadError = abs(gaussQuad(f, a, b, N) - exactIntegral)
 87 |     let rombergError = abs(romberg(f, a, b, tol=tol) - exactIntegral)
 88 |     let adaptiveSimpsonError = abs(adaptiveSimpson(f, a, b, tol=tol) - exactIntegral)
 89 |     let adaptiveGaussError = abs(adaptiveGauss(f, a, b, tol=tol) - exactIntegral)
 90 | 
 91 |     echo "Trapz Error:      ", trapzError
 92 |     echo "Simpson Error:    ", simpsonError
 93 |     echo "GaussQuad Error:  ", gaussQuadError
 94 |     echo "Romberg Error:    ", rombergError
 95 |     echo "AdaSimpson Error: ", adaptiveSimpsonError
 96 |     echo "AdaGauss Error:   ", adaptiveGaussError
 97 | 
 98 |   nbText: md"""
 99 | It seems like the gauss methods were the most accurate with Romberg and Simpson
100 | coming afterwards and in last place trapz. But at what cost did these scores come at? Which method was the fastest?
101 | Let's find out with a package called `benchy`. `keep` is used to prevent the compiler from optimizing away the code:
102 |   """
103 | 
104 |   nbCode:
105 | 
106 |     timeIt "Trapz":
107 |       keep trapz(f, a, b, N)
108 | 
109 |     timeIt "Simpson":
110 |       keep simpson(f, a, b, N)
111 | 
112 |     timeIt "GaussQuad":
113 |       keep gaussQuad(f, a, b, N)
114 | 
115 |     timeIt "Romberg":
116 |       keep romberg(f, a, b, tol=tol)
117 | 
118 |     timeIt "AdaSimpson":
119 |       keep adaptiveSimpson(f, a, b, tol=tol)
120 | 
121 |     timeIt "AdaGauss":
122 |       keep adaptiveGauss(f, a, b, tol=tol)
123 | 
124 |   nbText: md"""
125 | As we can see, all methods except AdaSimpson were roughly equally fast. So if I were to choose
126 | a winner, it would be `adaptiveGauss` because it was the most accurate while still being among
127 | the fastest methods.
128 | 
129 | ### Cumulative Integration
130 | There is one more type of integration one can do, namely cumulative integration. This is for the case
131 | when you don't just want to calculate the total integral but want an approximation for `F(X)`, so we
132 | need the integral evaluated at multiple points. An example is if we have the acceleration `a(t)` as a function.
133 | If we integrate it we get the velocity, but to be able to integrate the velocity (to get the distance)
134 | we need it as a function, not a single value. That is where cumulative integration comes in!
135 | 
136 | The methods available to us from `numericalnim` are: `cumtrapz`, `cumsimpson`, `cumGauss` and `cumGaussSpline`.
137 | All methods except `cumGaussSpline` returns the cumulative integral as a `seq[T]`, but this instead returns a
138 | Hermite spline. We will both be calculating the errors and visualizing the different approximations of `F(x)`.
139 | Let's get coding!
140 |   """
141 | 
142 |   nbCodeInBlock:
143 |     let a = 0.0
144 |     let b = 1.0
145 |     let tol = 1e-6
146 |     let N = 100
147 |     let dx = (b - a) / N.toFloat
148 | 
149 |     let x = numericalnim.linspace(a, b, 100)
150 |     var exact = x.mapIt(F(it) - F(a))
151 | 
152 |     let yTrapz = cumtrapz(f, x, dx=dx)
153 |     let ySimpson = cumsimpson(f, x, dx=dx)
154 |     let yGauss = cumGauss(f, x, tol=tol, initialPoints=x)
155 | 
156 |     echo "Trapz Error:   ", sum(abs(exact.toTensor - yTrapz.toTensor))
157 |     echo "Simpson Error: ", sum(abs(exact.toTensor - ySimpson.toTensor))
158 |     echo "Gauss Error:   ", sum(abs(exact.toTensor - yGauss.toTensor))
159 | 
160 |     let df = toDf(x, exact, yTrapz, ySimpson, yGauss)
161 |     # Rewrite df in long format for plotting
162 |     let dfLong = df.gather(["exact", "yTrapz", "ySimpson", "yGauss"], key="Method", value="y")
163 |     ggplot(dfLong, aes("x", "y", color="Method")) +
164 |       geom_line() +
165 |       ggsave("images/continuousHumpsComparaision.png")
166 | 
167 |   nbImage("images/continuousHumpsComparaision.png")
168 | 
169 |   nbText: md"""
170 | As we can see in the graph they are all so close to the exact curve that you can't distingush between
171 | them, but when we look at the total error we see that once again the Gauss method is superior.
172 | 
173 | > #### Note:
174 | > When we called `cumGauss` we passed in a parameter `initialPoints` as well. The reason for that
175 | is the fact that the Gauss method uses polynomials of degree 21 in its internal calculations while the
176 | final interpolation at the x-values in `x` is performed using a 3rd degree polynomial. This means that Gauss
177 | internally might only need quite few points because of its high degree, but that means we get too few
178 | points for the final 3rd degree interpolation. So to make sure we have enough points in the end we supply
179 | it with enough initial points so that it has enough points to make good predictions even if it doesn't
180 | split any additional intervals. By default it uses 100 equally spaced points though so unless you know you
181 | need far more or less points you should be good.
182 | This is especially important when using `cumGaussSpline` as we need enough
183 | points to construct an accurate spline.
184 | 
185 | The last method is `cumGaussSpline` which is identical to `cumGauss` except it constructs a Hermite spline
186 | from the returned values which can be evaluated when needed.
187 |   """
188 | 
189 |   nbCodeInBlock:
190 |     let a = 0.0
191 |     let b = 1.0
192 |     let tol = 1e-6
193 | 
194 |     let spline = cumGaussSpline(f, a, b, tol=tol)
195 | 
196 |     echo "One point: ", spline.eval(0.0) # evaluate it in a single point
197 |     echo "Three points: ", spline.eval(@[0.0, 0.5, 1.0]) # or multiple at once
198 | 
199 |     # Thanks to converter you can integrate a spline by passing it as the function:
200 |     echo "Integrate it again: ", adaptiveGauss(spline, a, b)
201 | 
202 |   nbText: md"""
203 |   I think that about wraps it up regarding integrating continuous functions! Let's take a look at
204 |   integrating discrete data now!
205 | 
206 | ## Integrate Discrete Data
207 | 
208 | Discrete data is a different beast than continuous functions as we have limited data. Therefore, the choice
209 | of integration method is even more important as we can't exchange performance to get more accurate results
210 | like we can with continuous functions (we can increase the number of intervals for example). So we want to make the
211 | most out of the data we have, and any knowledge we have about the nature of the data is helpful.
212 | For example if we know the data isn't smooth (discontinuities), then `trapz` could be a better choice
213 | than let's say `simpson`, because `simpson` assumes the data is smooth.
214 | 
215 | Let's sample `f(x)` from above at let's say 9 points and plot how much information we lose by
216 | plotting the sampled points, a Hermite Spline interpolation of them and the original function:
217 | """
218 | 
219 | block discretePart:
220 |   nbCode:
221 |     var xSample = numericalnim.linspace(0.0, 1.0, 9)
222 |     var ySample = xSample.mapIt(f(it, nil)) # nil can be passed in instead of ctx if we don't use it
223 | 
224 |     let xDense = numericalnim.linspace(0, 1, 1000) # "continuous" x
225 |     let yDense = xDense.mapIt(f(it, nil))
226 | 
227 |     var sampledSpline = newHermiteSpline(xSample, ySample)
228 |     var ySpline = sampledSpline.eval(xDense)
229 | 
230 |     var dfSample = toDf(xSample, ySample, xDense, yDense, ySpline)
231 |     ggplot(dfSample) +
232 |       #geom_point(data = dfSample.filter(f{Value -> bool: not `xSample`.isNull.toBool}), aes = aes("xSample", "ySample", color = "Sampled")) +
233 |       geom_point(aes("xSample", "ySample", color="Sampled")) +
234 |       geom_line(aes("xDense", "ySpline", color="Sampled")) +
235 |       geom_line(aes("xDense", "yDense", color="Dense")) +
236 |       scale_x_continuous() + scale_y_continuous() +
237 |       ggsave("images/sampledHumps.png")
238 | 
239 |   nbImage("images/sampledHumps.png")
240 | 
241 |   nbText: md"""
242 | As you can see, the resolution was too small to fully account for the big peak and undershoots it by quite a margin.
243 | Without having known the "real" function in this case we wouldn't have known this of course, and that is most often the
244 | case when we have discrete data. Therefore, the resolution of the data is crucial for the accuracy. But let's say this
245 | is all the data we have at our disposal and let's see how the different methods perform.
246 | 
247 | The integration methods at our disposal are:
248 | - `trapz`: Works for any data.
249 | -  `simpson`: Works for any data with 3 or more data points.
250 | - `romberg`: Works **only** for equally spaced points. The number of points must also be
251 | of the form `2^n + 1` (eg. 3, 5, 9, 17, 33 etc).
252 | 
253 | Luckily for us our data satisfies all of them ;) So let's get coding:
254 |   """
255 | 
256 |   nbCode:
257 |     let exact = F(1) - F(0)
258 | 
259 |     var trapzIntegral = trapz(ySample, xSample)
260 |     var simpsonIntegral = simpson(ySample, xSample)
261 |     var rombergIntegral = romberg(ySample, xSample)
262 | 
263 |     echo "Exact:   ", exact
264 |     echo "Trapz:   ", trapzIntegral
265 |     echo "Simpson: ", simpsonIntegral
266 |     echo "Romberg: ", rombergIntegral
267 |     echo "Trapz Error:   ", abs(trapzIntegral - exact)
268 |     echo "Simpson Error: ", abs(simpsonIntegral - exact)
269 |     echo "Romberg Error: ", abs(rombergIntegral - exact)
270 | 
271 |   nbText: md"""
272 | As expected all the methods underestimated the integral, but it might be unexpected that
273 | `trapz` performed the best out of them. Let's add a few more points, why not 33, and let's see if
274 | that changes things!
275 |   """
276 | 
277 |   nbCode:
278 |     xSample = numericalnim.linspace(0.0, 1.0, 33)
279 |     ySample = xSample.mapIt(f(it, nil))
280 | 
281 |     sampledSpline = newHermiteSpline(xSample, ySample)
282 |     ySpline = sampledSpline.eval(xDense)
283 | 
284 |     dfSample = toDf(xSample, ySample, xDense, yDense, ySpline)
285 |     ggplot(dfSample) +
286 |       geom_point(aes("xSample", "ySample", color="Sampled")) +
287 |       geom_line(aes("xDense", "ySpline", color="Sampled")) +
288 |       geom_line(aes("xDense", "yDense", color="Dense")) +
289 |       scale_x_continuous() + scale_y_continuous() +
290 |       ggsave("images/sampledHumps33.png")
291 | 
292 |     trapzIntegral = trapz(ySample, xSample)
293 |     simpsonIntegral = simpson(ySample, xSample)
294 |     rombergIntegral = romberg(ySample, xSample)
295 | 
296 |     echo "Exact:   ", exact
297 |     echo "Trapz:   ", trapzIntegral
298 |     echo "Simpson: ", simpsonIntegral
299 |     echo "Romberg: ", rombergIntegral
300 |     echo "Trapz Error:   ", abs(trapzIntegral - exact)
301 |     echo "Simpson Error: ", abs(simpsonIntegral - exact)
302 |     echo "Romberg Error: ", abs(rombergIntegral - exact)
303 | 
304 |   nbImage("images/sampledHumps33.png")
305 | 
306 |   nbText: md"""
307 | As expected all methods became more accurate when we increased the amount of points.
308 | And from the graph we can see that the points capture the shape of the curve much better now.
309 | We can also note that `simpson` has overtaken `trapz` and `romberg` is neck-in-neck with `trapz` now.
310 | Experiment for yourself with different number of points, but asymptotically `romberg` will eventually
311 | beat `simpson` when enough points are used.
312 | 
313 | The take-away from this very limited testing is that depending on the characteristics and quality
314 | of the data, different methods might give the most accurate answer. Which one is hard to tell in general
315 | but `trapz` *might* be more robust for very sparse data as it doesn't "guess" as much as the others. But once again,
316 | it entirely depends on the data, so make sure to understand your data!
317 | 
318 | ### Cumulative Integration with Discrete Data
319 | Performing cumulative integration on discrete data works the same as for continuous functions. The only differences are that
320 | only `cumtrapz` and `cumsimpson` are available and that you pass in `y` and `x` instead of `f`:
321 |   """
322 | 
323 |   nbCodeInBlock:
324 |     let a = 0.0
325 |     let b = 1.0
326 |     let tol = 1e-6
327 |     let N = 100
328 | 
329 |     let x = numericalnim.linspace(a, b, N)
330 |     let y = x.mapIt(f(it, nil))
331 |     var exact = x.mapIt(F(it) - F(a))
332 | 
333 |     let yTrapz = cumtrapz(y, x)
334 |     let ySimpson = cumsimpson(y, x)
335 | 
336 |     echo "Trapz Error:   ", sum(abs(exact.toTensor - yTrapz.toTensor))
337 |     echo "Simpson Error: ", sum(abs(exact.toTensor - ySimpson.toTensor))
338 | 
339 |     let df = toDf(x, exact, yTrapz, ySimpson)
340 |     # Rewrite df in long format for plotting
341 |     let dfLong = df.gather(["exact", "yTrapz", "ySimpson"], key="Method", value="y")
342 |     ggplot(dfLong, aes("x", "y", color="Method")) +
343 |       geom_line() +
344 |       ggsave("images/discreteHumpsComparaision.png")
345 | 
346 |   nbImage("images/discreteHumpsComparaision.png")
347 | 
348 | nbSave()
349 | 


--------------------------------------------------------------------------------
/book/overview/index.md:
--------------------------------------------------------------------------------
  1 | # Overview of the scientific computing ecosystem
  2 | 
  3 | This chapter aims to provide a rough overview of the ecosystem for scientific computing
  4 | with the aim to present the available packages with an overview of what they can be
  5 | used for.
  6 | 
  7 | In general, keep the [Nimble directory](https://nimble.directory/) handy to search for
  8 | Nim packages from your browser.
  9 | 
 10 | Note that due to the way this page is laid out, some packages might appear multiple times
 11 | under different sections.
 12 | 
 13 | Further: if you feel any existing library is missing on this page, *please* either create
 14 | a PR to this page, open an issue or simply write a message in the Matrix/Discord Nim science
 15 | channel!
 16 | 
 17 | ## Fundamental data handling libraries
 18 | 
 19 | The libraries listed here all provide basic data types that are
 20 | helpful in general (multidimensional arrays & data frames).
 21 | 
 22 | - [Arraymancer](https://github.com/mratsim/arraymancer) ⇐ provides a generic `Tensor[T]` type,
 23 |   similar to a Numpy `ndarray`. On top it defines operations from indexing, broadcasting
 24 |   and apply/map/fold/reduce operations to linear algebra and much more
 25 | - [Neo](https://github.com/andreaferretti/neo) ⇐ provides primitives for linear algebra. This means
 26 |   it implements vectors and matrices, either with static or dynamic sizes.
 27 | - [Datamancer](https://github.com/scinim/datamancer) ⇐ builds on top of Arraymancer to provide
 28 |   a runtime based `DataFrame` implementation. Runtime based means the types of columns are
 29 |   determined at runtime instead of compile time (e.g. via a schema). The focus is on column
 30 |   based operations.
 31 | - [NimData](https://github.com/bluenote10/nimdata) ⇐ provides another `DataFrame` implementation,
 32 |   which - compared to Datamancer - has a stricter CT safety focus. Its implementation is
 33 |   row based and the `DataFrame` type is determined at compile time. Operations are built on top
 34 |   of iterators for lazy evaluation.
 35 | 
 36 | ## Data visualization
 37 | 
 38 | There are multiple libraries for data visualization ("plotting") available,
 39 | each with their own focus and thus pros and cons.
 40 | 
 41 | Beyond the libraries listed in this section, keep in mind that your favorite
 42 | Python, Julia and R plotting library is only a [nimpy](https://github.com/yglukhov/nimpy),
 43 | [nimjl](https://github.com/Clonkk/nimjl) and
 44 | [Rnim](https://github.com/SciNim/Rnim) call away!
 45 | 
 46 | - [ggplotnim](https://github.com/Vindaar/ggplotnim) ⇐ pure Nim library
 47 |   for data visualization that is highly inspired by
 48 |   [ggplot2](https://ggplot2.tidyverse.org) for R. Also see the
 49 |   introduction [here](https://scinim.github.io/getting-started/data_viz/plotting_data.html).
 50 | - [nim-plotly](https://github.com/SciNim/nim-plotly) ⇐ an interface to the JavaScript library
 51 |   [plotly.js](https://plotly.com/javascript/basic-charts/); generates plotly compatible JSON
 52 | - [gnuplot.nim](https://github.com/dvolk/gnuplot.nim) ⇐ one of two
 53 |   available bindings to `gnuplot`
 54 | - [gnuplotlib](https://github.com/planetis-m/gnuplotlib) ⇐ one of two
 55 |   available bindings to `gnuplot`
 56 | - [asciigraph](https://github.com/KeepCoolWithCoolidge/asciigraph) ⇐ plots data using unicode symbols to draw
 57 |   pretty graphs in the terminal
 58 | - [gr.nim](https://github.com/mantielero/gr.nim) ⇐ wrapper of the [GR visualization framework](https://gr-framework.org/)
 59 | 
 60 | ## Numerical algorithms
 61 | 
 62 | Numerical algorithms for integration, interpolation, (numerical) differentiation and
 63 | solving differential equations are of course fundamental for scientific computing.
 64 | 
 65 | - [Numericalnim](https://github.com/SciNim/numericalnim) ⇐ *the* most comprehensive
 66 |   library for numerical algorithms in Nim. Also see the integration
 67 |   tutorial [here](https://scinim.github.io/getting-started/numerical_methods/integration1d.html).
 68 | - [Polynumeric](https://github.com/SciNim/polynumeric) ⇐ provides
 69 |   common operations (derivatives, root finding, etc.) of polynomials
 70 | 
 71 | ## Optimization
 72 | 
 73 | Optimization (possibly non-linear) problems are a problem domain large enough to deserve
 74 | their own section beyond the "numerical algorithm" section.
 75 | 
 76 | - [Numericalnim](https://github.com/SciNim/numericalnim) ⇐ provides some algorithms
 77 |   for non-linear optimization. These include Levenberg-Marquardt for non-linear curve fitting
 78 |   and (L)BFGS for general optimization problems.
 79 | - [fitl](https://github.com/c-blake/fitl) ⇐ contains a pure Nim linear least squares solver (so
 80 |   no LAPACK dependency!) and provides many goodness-of-fit tests
 81 | - [nimnlopt](https://github.com/Vindaar/nimnlopt) ⇐ wrapper of the [NLopt](https://nlopt.readthedocs.io/en/latest/)
 82 |   C library. It includes a large number of algorithms for non-linear
 83 |   optimization problems (gradient / non gradient & local / global
 84 |   methods) with support for constraints.
 85 | - [nim-mpfit](https://github.com/Vindaar/nim-mpfit) ⇐ wrapper of the C library [cmpfit](https://pages.physics.wisc.edu/~craigm/idl/cmpfit.html), an implementation of the Levenberg-Marquardt algorithm for non-linear least squares problems (i.e. non-linear curve fitting).
 86 | - [gsl-nim](https://github.com/YesDrX/gsl-nim) ⇐ wrapper for the [GNU
 87 | Scientific Library](https://www.gnu.org/software/gsl/), which probably
 88 | satisfies all your numerical optimization needs (and much more), if you can live with the GSL dependence and raw C API.
 89 | 
 90 | ## (Binary) data storage & serialization libraries
 91 | 
 92 | - [nimhfd5](https://github.com/Vindaar/nimhdf5) ⇐ high level bindings for the HDF5 library
 93 | - [netcdf](https://github.com/SciNim/netcdf) ⇐ wrapper for NetCDF library
 94 | - [mcpl](https://github.com/SciNim/mcpl) ⇐ wrapper for MCPL library
 95 | - Arrow
 96 |   - [freccia](https://github.com/SciNim/freccia) ⇐ pure Nim library for Apache Arrow format
 97 |   - [nimarrow_glib](https://github.com/emef/nimarrow_glib) ⇐ wrapper of libarrow
 98 | - [nio](https://github.com/c-blake/nio) ⇐ also includes operations for binary data handling
 99 | - [nimcfitsio](https://github.com/ziotom78/nimcfitsio) ⇐ wrapper for the CFITSIO library, typically used
100 |   in astronomy
101 | - [nim-teafiles](https://github.com/andreaferretti/nim-teafiles) ⇐ library to read [TeaFiles](http://discretelogics.com/teafiles/),
102 |   a format for fast read/write access to time series data
103 | - [CSVtools](https://github.com/andreaferretti/csvtools) ⇐ library for typed iterators on CSV files
104 | - [DuckDB](https://github.com/ayman-albaz/nim-duckdb) ⇐ DuckDB wrapper for Nim. DuckDB is a DB focused on fast data analysis
105 | 
106 | ## Linear algebra
107 | 
108 | A list of libraries for linear algebra operations. These libraries typically provide their own matrix and vector
109 | types and define common (and not so common) operations on them.
110 | 
111 | - [Neo](https://github.com/andreaferretti/neo) ⇐ linear algebra library with support for dense and sparse
112 |   matrices. Wraps BLAS & LAPACK and also has GPU support
113 | - [Manu](https://github.com/planetis-m/manu) ⇐ pure Nim library for operations on real, dense matrices (solving linear
114 |   equations, determinants, matrix inverses & decompositions, ...)
115 | - [Arraymancer](https://github.com/mratsim/arraymancer) ⇐ Arraymancer also provides many linear algebra routines
116 | - [gsl-nim](https://github.com/YesDrX/gsl-nim) ⇐ GSL provides
117 |   [many linear algebra](https://www.gnu.org/software/gsl/doc/html/linalg.html) routines
118 | 
119 | ## Algebra
120 | 
121 | - [emmy](https://github.com/andreaferretti/emmy) ⇐ Algebraic structures and operations on them
122 | - [nim-algebra](https://github.com/MichalMarsalek/nim-algebra) ⇐ implements many routines for rings, fields and groups
123 | 
124 | ## Symbolic operations
125 | 
126 | Libraries dealing with symbolic instead of numeric operations.
127 | 
128 | - [astgrad](https://github.com/SciNim/astgrad) ⇐ symbolic derivatives based on Nim AST
129 | - [symbolicnim](https://github.com/hugogranstrom/symbolicnim) ⇐ pure Nim library for symbolic computations
130 | - [symengine](https://github.com/SciNim/symengine.nim) ⇐ wrapper for C++ library for symbolic computations
131 | 
132 | ## Number types
133 | 
134 | These libraries all provide specific data types suited to certain kind of operations.
135 | 
136 | - decimal libraries
137 |   - [nim-decimal](https://github.com/status-im/nim-decimal) ⇐ decimal library wrapping C lib `mpdecimal`
138 |   - [decimal128](https://github.com/JohnAD/decimal128) ⇐ pure Nim decimal library, missing some features
139 | - multi-precision integers (bigints)
140 |   - [bignum](https://github.com/SciNim/bignum) ⇐ wrapper of GMP providing arbitrary precision ints & rationals, does not wrap `mpfr` (so no multi precision floats)
141 |   - [bigints](https://github.com/nim-lang/bigints) ⇐ pure Nim bigint library
142 |   - [theo](https://github.com/SciNim/theo) ⇐ optimized bigint library, WIP
143 | - [fpn](https://gitlab.com/lbartoletti/fpn) ⇐ fixed point number library in pure Nim
144 | - [stdlib rationals](https://nim-lang.github.io/Nim/rationals.html) ⇐ Nim standard library module  for rational numbers
145 | - [stdlib complex](https://nim-lang.github.io/Nim/complex.html) ⇐ Nim standard library module for complex numbers
146 | 
147 | ## Statistics, sampling and random number generation
148 | 
149 | - [statistical-tests](https://github.com/ayman-albaz/statistical-tests)
150 | - [linear-models](https://github.com/ayman-albaz/linear-models)
151 | - [distributions](https://github.com/ayman-albaz/distributions)
152 | - [stdlib stats](https://nim-lang.github.io/Nim/stats.html) ⇐ basic statistics module from the stdlib. Supports moments up to
153 |   kurtosis & provides basic regression support
154 | - [alea](https://github.com/andreaferretti/alea) ⇐ library for sampling from many different distribiutons. Allows to wrap custom (e.g. stdlib) RNGs
155 | - [sitmo](https://github.com/jxy/sitmo) ⇐ Nim implementation of the Sitmo parallel RNG
156 | - [stdlib random](https://nim-lang.github.io/Nim/random.html) ⇐ random number generation of the Nim standard library
157 | - [nim-random](https://github.com/oprypin/nim-random) ⇐ alternative to the Nim stdlib random number library
158 | - [nim-mentat](https://github.com/ruivieira/nim-mentat) ⇐ implements
159 |   exponentially weighted moving averages
160 | - [fitl](https://github.com/c-blake/fitl) ⇐ contains a submodule `dists` for random sampling, mixing of distributions, (inverse) CDFs
161 | 
162 | ## Machine learning
163 | 
164 | - [Flambeau](https://github.com/SciNim/flambeau) ⇐ as a wrapper to [libtorch](https://pytorch.org/cppdocs/installing.html) provides
165 |   access to state-of-the-art ML features
166 | - [Arraymancer](https://github.com/mratsim/arraymancer) ⇐ Arraymancer implements a DSL to define neural networks
167 |   (see the [examples](https://github.com/mratsim/Arraymancer/tree/master/examples)) and provides other, more primitive
168 |   ML tools (PCA, ...)
169 | - [exprgrad](https://github.com/can-lehmann/exprgrad) ⇐ Experimental deep learning framework, based on an
170 |   easily extensible LLVM compiled differentiable programming language
171 | - [DecisionTreeNim](https://github.com/Michedev/DecisionTreeNim) ⇐ implements decision trees & random forests
172 | 
173 | ## Natural language processing
174 | 
175 | - [tome](https://github.com/dizzyliam/tome) ⇐ provides tokenization and parts of speech (POS) tagging
176 | - [word2vec](https://github.com/treeform/word2vec) ⇐ [Word2vec](https://en.wikipedia.org/wiki/Word2vec) implementation in Nim
177 | - [fastText](https://github.com/Nim-NLP/fastText) ⇐ library to perform predictions of [fastText](https://github.com/facebookresearch/fastText) models
178 | - [scim](https://github.com/xflywind/scim) ⇐ library for helpful tools for speech recognition based on arraymancer
179 | 
180 | ## Spatial data structures, distance measures & clustering algorithms
181 | 
182 | - [kdtree](https://github.com/jblindsay/kdtree) ⇐ k-d tree implementation in pure Nim
183 | - [RTree](https://github.com/stefansalewski/RTree) ⇐ R- and R*-Tree implementations in pure Nim
184 | - [QuadtreeNim](https://github.com/Nycto/QuadtreeNim) ⇐ Quadtree implementation implementation in pure Nim
185 | - [distances](https://github.com/ayman-albaz/distances) ⇐ library to compute distances under different metrics
186 |   with support for standard sequences, arraymancer & neo types
187 | - [arraymancer](https://github.com/mratsim/arraymancer) ⇐ arraymancer contains a k-d tree implementation, multiple
188 |   distance metrics (incl. user defined custom metrics) plus k-means & DBSCAN clustering algorithms
189 | - [spacy](https://github.com/treeform/spacy) ⇐ collection of different spatial data structures
190 | - [DelaunayNim](https://github.com/Nycto/DelaunayNim) ⇐ library to compute the [Delaunay triangulation](https://en.wikipedia.org/wiki/Delaunay_triangulation) of a set of points
191 | - [nim-mentat](https://github.com/ruivieira/nim-mentat) ⇐ implements Balanced Box-Decomposition trees
192 | 
193 | ## Special functions
194 | 
195 | These libraries implement different [special functions](https://en.wikipedia.org/wiki/Special_functions).
196 | 
197 | - [stdlib math](https://nim-lang.github.io/Nim/math.html) ⇐ The Nim standard library `math` module contains all libraries
198 |   you find in `math.h`.
199 | - [spfun](https://github.com/c-blake/spfun) ⇐ library for many special functions used in stats, physics, ...
200 | - [gsl-nim](https://github.com/YesDrX/gsl-nim) ⇐ The GSL probably provides
201 |   [any special function](https://www.gnu.org/software/gsl/doc/html/specfunc.html) you may need
202 | - [special-functions](https://github.com/ayman-albaz/special-functions) ⇐ contains many special functions, which are not part of the stdlib module
203 | 
204 | ## FFT
205 | 
206 | - [nimfftw3](https://github.com/SciNim/nimfftw3) ⇐ FFTW3 wrapper
207 | - [impulse](https://github.com/SciNim/impulse) ⇐ pocket FFT wrapper, in principle a repository for signal processing primitives
208 | - [kissFFT](https://github.com/m13253/nim-kissfft) ⇐ kissFFT wrapper
209 | 
210 | ## Primitive compute wrappers
211 | 
212 | - [nimcuda](https://github.com/andreaferretti/nimcuda) ⇐ wrapper for CUDA
213 | - [nimblas](https://github.com/andreaferretti/nimblas) ⇐ wrapper for BLAS
214 | - [nimlapack](https://github.com/andreaferretti/nimlapack) ⇐ wrapper for LAPACK
215 | - [nimcl](https://github.com/andreaferretti/nimcl) ⇐ wrapper for OpenCL
216 | 
217 | ## Multithreading, multiprocessing & asynchronous processing
218 | 
219 | - [weave](https://github.com/mratsim/weave) ⇐ very low overhead, high performance multithreading runtime
220 | - [taskpools](https://github.com/status-im/nim-taskpools) ⇐ lightweight threadpool implementation
221 | - [threadpools](https://github.com/yglukhov/threadpools) ⇐ Custom threadpool implementation
222 | - [threading](https://github.com/nim-lang/threading) ⇐ New pieces for multithreading in times of ARC/ORC
223 | - [asynctools](https://github.com/cheatfate/asynctools) ⇐ Various async tools for usage with Nim's stdlib `async` macro
224 | - [asyncthreadpool](https://github.com/yglukhov/asyncthreadpool) ⇐ An
225 |   awaitable threadpool implementation
226 | - [cligen](https://github.com/c-blake/cligen) ⇐ contains a `procpool` submodule for easy multi*processing*
227 | 
228 | ## Biology
229 | 
230 | - [hts-nim](https://github.com/brentp/hts-nim) ⇐ A wrapper for [htslib](https://github.com/samtools/htslib) for Nim for parsing of
231 |   genomics data files
232 | - [bionim](https://github.com/SciNim/bionim) ⇐ collection of data structures and algorithms for bioinformatics
233 | - [bio](https://github.com/SciNim/bio) ⇐ a library for working with biological sequences
234 | 
235 | ## Physics & astronomy
236 | 
237 | - [unchained](https://github.com/SciNim/Unchained) ⇐ library for CT checking of physical units and
238 |   automatic conversion between units
239 | - [qex](https://github.com/jcosborn/qex/) ⇐ lattice QCD library
240 | - [mclimit](https://github.com/SciNim/mclimit) ⇐ Nim port of the ROOT TLimit class for confidence level computations (limits) for experiments with small statistics
241 | - [nim-constants](https://github.com/SciNim/nim-constants) ⇐ contains many physical and mathematical constants
242 | - [astroNimy](https://github.com/dizzyliam/astroNimy) ⇐ astronomical image processing library
243 | - [orbits](https://github.com/treeform/orbits) ⇐ library for orbital mechanics calculations
244 | - [nim-root](https://github.com/watson-ij/nim-root) ⇐ partial wrapper for [CERN's ROOT](https://root.cern.ch)
245 | - [MDevolve](https://github.com/jxy/MDevolve) ⇐ integrator framework for molecular dynamic evolutions
246 | - [polypbren](https://github.com/guibar64/polypbren) ⇐ program to
247 |   compute renormalized parameters of charged colloids
248 | - [xrayAttenuation](https://github.com/SciNim/xrayAttenuation) ⇐ for
249 |   calculations of X-ray transmissions through materials & X-ray reflectivity under grazing angles
250 | 
251 | ## Mathematics
252 | 
253 | - [perms-nim](https://github.com/remigijusj/perms-nim) ⇐ library for permutation group calculations and factorization algorithms
254 | 
255 | ## Other useful libraries
256 | 
257 | - [scinim](https://github.com/SciNim/scinim) ⇐ library of general scientific things that are
258 |   either primitives or too small to have their own library
259 | - [Measuremancer](https://github.com/SciNim/Measuremancer) ⇐ library for automatic error propagation
260 |   of measurement uncertainties
261 | - [gsl-nim](https://github.com/YesDrX/gsl-nim) ⇐ wrapper for GSL (GNU Scientific Library)
262 | - [nim-opencv](https://github.com/dom96/nim-opencv) ⇐ Nim wrapper for [OpenCV](https://en.wikipedia.org/wiki/OpenCV)
263 | - [zero-functional](https://github.com/zero-functional/zero-functional) ⇐ library for zero cost chaining of
264 |   functional primitves (map, apply, fold, ...). Fuses multiple operations into a single loop.
265 | - [iterrr](https://github.com/hamidb80/iterrr) ⇐ another library for zero cost chaining, similar to `zero-functional`. Aims to be
266 |   easier to extend.
267 | - [flower](https://github.com/dizzyliam/flower) ⇐ pure Nim bloom filter, probabilistic data structure to check if
268 |   elements are in a set ("possibly in set" vs. "definitely not in set"). Supports arbitrary Nim types in single filter.
269 | - pattern matching:
270 |   - [pattern matching in fusion](https://github.com/nim-lang/fusion/blob/master/src/fusion/matching.rst) ⇐ pattern
271 |     matching for Nim. Possibly the most feature rich pattern matching library for Nim. Future developments might
272 |     be found [here](https://github.com/haxscramper/hmatching)
273 |   - [patty](https://github.com/andreaferretti/patty)
274 |   - [gara](https://github.com/alehander92/gara)
275 | - [Synthesis](https://github.com/mratsim/Synthesis) ⇐ DSL to generate statically checked state machines
276 | - [jupyternim](https://github.com/stisa/jupyternim) ⇐ Jupyter kernel for Nim
277 | - [cligen](https://github.com/c-blake/cligen) ⇐ elegant library to write CLI interfaces
278 | - [LatexDSL](https://github.com/Vindaar/LatexDSL) ⇐ DSL to generate CT checked latex strings, supporting Nim variable interpolation
279 | - [nim-mathexpr](https://github.com/Yardanico/nim-mathexpr) ⇐ mathematical string expression evaluator library
280 | - [nim-pari](https://codeberg.org/BarrOff/nim-pari) ⇐ wrapper for the [PARI](https://pari.math.u-bordeaux.fr/) C library underlying
281 |   the PARI/GP computer algebra system
282 | - [memo](https://github.com/andreaferretti/memo) ⇐ macro library to allow memoization of function calls (automatic caching of
283 |   function calls)
284 | - [forematics](https://github.com/treeform/forematics) ⇐ Nim implementation of a [Metamath](http://us.metamath.org/) verifier
285 | - [DeepLearningNim](https://github.com/Niminem/DeepLearningNim) ⇐ example of building a DQN with arraymancer
286 | 
287 | ## Educational resources
288 | 
289 | - [nim-bayes](https://github.com/kerrycobb/nim-bayes) ⇐ Tutorial about Bayesien Inference of a linear model in Nim
290 | 
291 | ## Language bindings
292 | 
293 | First of all Nim itself of course provides direct support to wrap C and C++ libraries
294 | using its FFI. See the Nim manual [here](https://nim-lang.github.io/Nim/manual.html#foreign-function-interface)
295 | for an introduction to the C / C++ FFI.
296 | 
297 | For more details on how to use the language specific bindings, see the section
298 | about it [here](https://scinim.github.io/getting-started/external_language_integration/index.html)
299 | 
300 | ### Direct language bridges
301 | - [nimjl](https://github.com/Clonkk/nimjl) ⇐ bridge to Julia
302 | - [nimpy](https://github.com/yglukhov/nimpy) ⇐ bridge to Python
303 | - [Rnim](https://github.com/SciNim/Rnim) ⇐ bridge to R
304 | 
305 | ### Tools to wrap C / C++
306 | 
307 | - [c2nim](https://github.com/nim-lang/c2nim) ⇐ the default Nim tool to generate Nim wrappers of C header files
308 | - [futhark](https://github.com/PMunch/futhark) ⇐ automatic imports of C header files in Nim code
309 | - [nimterop](https://github.com/nimterop/nimterop) ⇐ library to simplify wrapping of C/C++ using [tree-sitter](http://tree-sitter.github.io/tree-sitter/)
310 | 


--------------------------------------------------------------------------------
/book/basics/data_wrangling.nim:
--------------------------------------------------------------------------------
  1 | # cannot import `Value` because it clashes with `mustache.values.Value`. This is fixed for
  2 | # Nim version >= 1.5 (fully qualified type for formulas used) but important on < 1.5 (cannot
  3 | # fully qualify types there)
  4 | import nimib except Value
  5 | import nimibook
  6 | import datamancer
  7 | 
  8 | nbInit(theme = useNimibook)
  9 | 
 10 | nbText: """
 11 | # Data wrangling using the `DataFrame` from [Datamancer](https://github.com/SciNim/Datamancer)
 12 | 
 13 | The third major data type often encountered is a `DataFrame`.
 14 | 
 15 | Data frames can be thought of as multiple, named tensors of possibly different types
 16 | in one object. A data frame library then is supposed to make working with such data
 17 | as convenient and powerful as possible.
 18 | 
 19 | In the specific case of Datamancer, the data structure is essentially an
 20 | `OrderedTable[string, Column]`, where `Column` is a variant object storing one
 21 | of 5 different `Tensor[T]` types.
 22 | 
 23 | In order to use Datamancer, you must first import it. In addition to that, in this tutorial
 24 | we will also import the [Arraymancer](https://github.com/mratsim/Arraymancer) tensor library,
 25 | which we will use to demonstrate that you can create dataframes from Arraymancer tensors.
 26 | 
 27 | """
 28 | nbCode:
 29 |   import datamancer
 30 |   import arraymancer
 31 | nbText: """
 32 | 
 33 | ## Construction of a `DataFrame`
 34 | 
 35 | A `DataFrame` from the Datamancer library can be constructed in two different ways. Either
 36 | from an input CSV file or from existing sequences or tensors.
 37 | 
 38 | Construction from a CSV file is performed using the `readCsv` procedure. It provides multiple
 39 | different options (different separators, skipping lines, header symbols, ...), but for a
 40 | regular comma separated value file, the defaults are fine. For example:
 41 | 
 42 | ```nim
 43 | let df1 = readCsv("foo.csv")
 44 | echo df1
 45 | ```
 46 | 
 47 | Alternatively, if one already has a mix of sequences and tensors of the same length:
 48 | """
 49 | nbCode:
 50 |   let s1 = [1, 2, 3]
 51 |   let s2 = @["hello", "foo", "bar"]
 52 |   let s3 = @[1.5, 2.5, 3.5].toTensor
 53 |   let df2 = toDf(s1, s2, s3)
 54 |   echo df2
 55 |   echo "Column names: ", df2.getKeys() ## getKeys only returns the column names
 56 | nbText: """
 57 | which creates a data frame with three columns named `"s1", "s2", "s3"`. We can see that
 58 | mixing different input types is not a problem. The supported types are
 59 | - `float`
 60 | - `int`
 61 | - `string`
 62 | - `bool`
 63 | 
 64 | and a mix of them in one column.
 65 | 
 66 | Printing a data frame by default prints the first 20 rows. This can be adjusted by calling
 67 | the `pretty` procedure manually and handing the number of rows (-1 for all).
 68 | 
 69 | In addition one can always view a data frame in the browser by doing `showBrowser(df)` where
 70 | `df` is the data frame to view.
 71 | 
 72 | If one wishes to name the columns differently from construction (they can be renamed later
 73 | as well), it is done by:
 74 | """
 75 | nbCode:
 76 |   let df3 = toDf({"Id" : s1, "Word" : s2, "Number" : s3})
 77 |   echo df3
 78 | nbText: """
 79 | 
 80 | Finally, one can also create a `DataFrame` starting from an empty object and
 81 | assigning sequences, tensors or scalar values manually:
 82 | """
 83 | nbCodeInBlock:
 84 |   var df = newDataFrame()
 85 |   df["x"] = @[1, 2, 3] ## assign a sequence. This sets the `DataFrame` length to 3
 86 |   df["y"] = @[4.0, 5.0, 6.0].toTensor ## assign a tensor. Input now `must` match length 3
 87 |   try:
 88 |     df["z"] = @[5, 6] ## raises
 89 |   except ValueError: discard ## type of exception might change in the future
 90 |   df["z"] = constantColumn(1, df.len) ## assign a constant column of integers.
 91 | nbText: """
 92 | 
 93 | ## Accessing data underlying a column
 94 | 
 95 | The data stored in a column of a data frame can always be accessed easily. Because the
 96 | data is stored in a variant object, the user needs to supply the data type to use to
 97 | read the data as. Nim does *not* allow return type overloading, which means we cannot
 98 | use the runtime information about the types to return the "correct" tensor. All we can
 99 | make sure is that accessing the data with the *correct* type is a no-op.
100 | 
101 | This has the downside that an invalid type will produce a runtime error. On the upside
102 | it allows us to perform type conversions directly, for instance reading an integer column
103 | as floats or any column as strings.
104 | 
105 | The syntax is as follows:
106 | """
107 | nbCodeInBlock:
108 |   let df = toDf({"x" : @[1, 2, 3], "y" : @[4.0, 5.0, 6.0]})
109 |   let t1: Tensor[int] = df["x", int] ## this is a no-op
110 |   let t2: Tensor[float] = df["x", float] ## converts integers to floats
111 |   let t3: Tensor[float] = df["y", float] ## also a no-op
112 |   let t4: Tensor[string] = df["x", string] ## convert to string
113 |   try:
114 |     let t5: Tensor[bool] = df["x", bool] ## would produce a runtime error
115 |   except ValueError: discard ## type of exception might be changed in the future
116 | nbText: """
117 | where we indicate the types explicitly on the left hand side for clarity.
118 | 
119 | This means we can in principle always access individual elements of a data frame column
120 | by getting the tensor and accessing elements from it. Of course this has some overhead,
121 | but due to reference semantics it is relatively cheap (no data is copied, unless type
122 | conversions need to be performed).
123 | 
124 | ## Computing single column aggregations
125 | 
126 | As we saw in the previous section, accessing a tensor of a column is cheap. We can
127 | use that to perform aggregations on full columns:
128 | """
129 | nbCodeInBlock:
130 |   let df = toDf({"x" : @[1, 2, 3], "y" : @[4.0, 5.0, 6.0]})
131 |   echo df["x", int].sum
132 |   echo df["y", float].mean
133 | nbText: """
134 | and in that sense any operation acting on tensors can be used.
135 | 
136 | ## Data frame operations
137 | 
138 | In the more general case (the reason one uses a data frame in the first place) we
139 | don't want to only consider a single column.
140 | 
141 | Many different operations are supported, but can be grouped into a few general procedures.
142 | 
143 | Some of the procedures of Datamancer take so called `FormulaNodes`. They are essentially
144 | a domain specific language to succinctly express operations on data frame columns
145 | without the need to fully refer to them. Their basic construction and usage should become
146 | clear in the code below. The Datamancer documentation contains a much deeper introduction
147 | into the specifics here:
148 | 
149 | [Formula introduction](https://scinim.github.io/Datamancer/datamancer.html#formulas)
150 | 
151 | ### `select` - Selecting a subset of columns
152 | 
153 | If we have a data frame with multiple columns we may want to keep only
154 | a subset of these going forward. This can be achieved using `select`:
155 | """
156 | nbCodeInBlock:
157 |   var df = newDataFrame()
158 |   for i in 0 ..< 100:
159 |     df["x" & $i] = @[1 + i, 2 + i, 3 + i]
160 |   echo df.select("x1", "x50", "x99")
161 | nbText: """
162 | which drops every column not selected.
163 | 
164 | The inverse is also possible using `drop`:
165 | """
166 | nbCodeInBlock:
167 |   let df = toDf({"x" : @[1, 2, 3], "y" : @[4.0, 5.0, 6.0], "z" : @["a", "b", "c"]})
168 |   echo df.drop("x")
169 | nbText: """
170 | 
171 | ### `rename` - Renaming a column
172 | 
173 | `rename`, as the name implies, is used to rename columns. Usage is rather simple. We'll
174 | get our first glance at the `f{}` macro to generate a `FormulaNode` here:
175 | """
176 | nbCodeInBlock:
177 |   let df = toDf({"x" : @[1, 2, 3], "y" : @[4.0, 5.0, 6.0]})
178 |   echo df.rename(f{"foo" <- "x"})
179 | nbText: """
180 | So we can see that we simply assign `<-` the old name "x" to the new name "foo".
181 | 
182 | ### `arrange` - Sorting a data frame
183 | 
184 | Often we wish to sort a data frame by one or more columns. This is done using `arrange`.
185 | It can take one or more columns to sort by, where for multiple columns the order
186 | of the inputs decides the precedence of what to sort by first, the later columns only
187 | used to break ties between the former.
188 | 
189 | The sort order is handled in the same way as in Nim's standard library, i.e. using
190 | an `order` argument that takes either `SortOrder.Ascending` or `SortOrder.Descending`.
191 | The default order is ascending order.
192 | """
193 | nbCodeInBlock:
194 |   let df = toDf({ "x" : @[4, 2, 7, 4], "y" : @[2.3, 7.1, 3.3, 1.0],
195 |                       "z" : @["b", "c", "d", "a"]})
196 |   echo df.arrange("x") ## sort by `x` in ascending order (default)
197 |   echo df.arrange("x", order = SortOrder.Descending) ## sort in descending order
198 |   echo df.arrange(["x", "z"]) ## sort by two columns, first `x` then `z` to break ties
199 | nbText: """
200 | 
201 | ### `unique` - Removing duplicate rows
202 | 
203 | Another useful operation is removal of duplicate entries. `unique` is the procedure
204 | to use. If no argument is given uniqueness is determined based on *all* existing
205 | columns. This is not always the most desired option of course, which is why `unique`
206 | accepts a variable number of columns. Then only uniqueness among these columns is
207 | considered.
208 | """
209 | nbCodeInBlock:
210 |   let df = toDf({ "x" : @[1, 2, 2, 2, 4], "y" : @[5.0, 6.0, 7.0, 8.0, 9.0],
211 |                       "z" : @["a", "b", "b", "d", "e"]})
212 |   echo df.unique() ## consider uniqueness of all columns, nothing removed
213 |   echo df.unique("x") ## only consider `x`, only keeps keeps 1st, 2nd, last row
214 |   echo df.unique(["x", "z"]) ## considers `x` and `z`, one more unique (4th row)
215 | nbText: """
216 | 
217 | ### `mutate` - Creating new or modifying existing columns
218 | 
219 | `mutate` is the procedure to use to add new columns to a data frame or modify
220 | existing ones. For this procedure we need to hand formulas using the `f{}` macro
221 | again. Here it is advisable to name the formulas. Instead of the above assignment
222 | operator `<-` we now use the "x depends on y" operator `~`.
223 | 
224 | Further, to refer to a column in the computation we perform we will use accented
225 | quotes. This is all the complexity of that macro we will discuss in this introduction.
226 | 
227 | Let's compute the sum of two columns to get a feel:
228 | """
229 | nbCodeInBlock:
230 |   let df = toDf({ "x" : @[1, 2, 3], "y" : @[10, 11, 12] })
231 |   echo df.mutate(f{"x+y" ~ `x` + `y`})
232 | nbText: """
233 | Of course we can use constants and local Nim symbols as well:
234 | """
235 | nbCodeInBlock:
236 |   let df = toDf({ "x" : @[1, 2, 3]})
237 |   echo df.mutate(f{"x+5" ~ `x` + 5 })
238 |   let y = 2.0
239 |   echo df.mutate(f{"x + local y" ~ `x` + y})
240 | nbText: """
241 | Note: There is a slight subtlety at play here. If you look closely at the output of
242 | these two `mutate` commands you see that in the first case the resulting column is
243 | of type `int`, whereas in the second case it's `float`. That is because the type
244 | of the column is deduced based on the types in the rest of the formula. `5` is an
245 | `int` so `x` is read as integers in the first case, whereas `y` is a `float` and so
246 | `x` is read as a `float`. See the Datamancer documentation on details and how to
247 | specify the types manually.
248 | 
249 | And as stated we can also overwrite columns:
250 | """
251 | nbCodeInBlock:
252 |   let df = toDf({ "x" : @[1, 2, 3] })
253 |   echo df.mutate(f{"x" ~ `x` + `x`})
254 | nbText: """
255 | 
256 | Under the hood these formulas are converted into a closure that takes a data frame
257 | as an input. The column references are extracted and converted into a preamble
258 | that reads the corresponding tensors. Then we run over the relevant tensors and
259 | perform the described operation for each element. The result is assigned to
260 | a resulting tensor, which is assigned as the new column.
261 | 
262 | The only restriction on the body of the formula is that it's a valid Nim expression
263 | (if one mentally replaces column references by tensor elements) that returns a
264 | value of a valid data type for a data frame.
265 | 
266 | If one wishes the same behavior as `mutate` but does not require the columns anymore
267 | that are not explicitly created / modified using a formula, there is `transmute` for
268 | this purpose. Otherwise it is equivalent to `mutate`.
269 | 
270 | ### `filter` - Removing rows based on a predicate
271 | 
272 | These mentioned formulas can of course also return boolean values. In combination
273 | with the `filter` procedure this allows us to remove rows of a data frame that
274 | fail to pass a condition (or a "predicate").
275 | """
276 | nbCodeInBlock:
277 |   let df = toDf({ "x" : @[1, 2, 3, 4, 5], "y" : @["a", "b", "c", "d", "e"] })
278 |   echo df.filter(f{ `x` < 3 or `y` == "e" })
279 | nbText: """
280 | 
281 | ### `summarize` - Computing aggregations on a full data frame
282 | 
283 | The approach described in "Computing single column aggregations" can be useful for
284 | simple single column operations, but does not scale well. That's what `summarize` is
285 | for. Here we use the last operator used in the `f{}` macro, namely the reduction
286 | `<<` operator:
287 | """
288 | nbCodeInBlock:
289 |   let df = toDf({ "x" : @[1, 2, 3, 4, 5], "y" : @[5, 10, 15, 20, 25] })
290 |   echo df.summarize(f{float:  mean(`x`) }) ## compute mean, auto creates a column name
291 |   echo df.summarize(f{float: "mean(x)" << mean(`x`) }) ## same but with a custom name
292 |   echo df.summarize(f{"mean(x)+sum(y)" << mean(`x`) + sum(`y`) })
293 | nbText: """
294 | Keen eyes will notice the `float:` at the beginning of the first two examples. This is
295 | a "type hint" for the formula, because the symbol "mean" is overloaded in Nim. But not
296 | by a few distinct procedures, but generically. At this moment there are no heuristics
297 | involved to choose one type over another in a generic case. Therefore, we don't know
298 | what type `x` should be read as. So we overwrite the input type manually and give the
299 | macro a hint.
300 | 
301 | If we leave out the type information you will be greeted with a message of the type
302 | information found of `mean` and to consider giving such a type hint.
303 | 
304 | The situation is slightly different for the last case, in which an addition is involved.
305 | Due to some heuristic rules involving the most basic operators (maths and boolean) we can
306 | determine here that the input is probably supposed to be float.
307 | 
308 | ### `group_by`
309 | 
310 | `summarize` and the other procedures can be spiced up if used in combination with
311 | `group_by`.
312 | 
313 | `group_by` by itself doesn't perform any operations. It simply returns a new data frame
314 | with the exact same data that is now "grouped" by one or more columns. These columns should
315 | be columns containing *discrete* data. This grouping can be used (manually or indirectly)
316 | via the `groups` iterator. It yields all "sub data frames" contained in the grouped data
317 | frame. These sub data frames are those of duplicate entries in the columns that we have
318 | grouped by. It essentially yields everything as a sub data frame that would be reduced
319 | to a single row if using `unique` on the same columns as grouped by.
320 | 
321 | This should become clearer with an example:
322 | """
323 | nbCodeInBlock:
324 |   let df = toDf({ "Class" : @["A", "C", "B", "B", "A", "C", "C"],
325 |                       "Num" : @[1, 5, 3, 4, 8, 7, 2] })
326 |     .group_by("Class")
327 |   for t, subDf in groups(df):
328 |     echo "Sub data frame: ", t
329 |     echo subDf
330 | nbText: """
331 | We can see we have 3 sub data frames. One for each discrete value found in column `Class`.
332 | 
333 | The actually interesting applications of `groub_by` though is its combination with one
334 | of the other procedures shown above, in particular `summarize`, `filter` and `mutate`.
335 | For a grouped data frame these operations will then performed *group wise*. Operations
336 | that only use information of a single row are unaffected by this. But any formula that
337 | includes a reference to a full column (`mean, sum, ...`) will compute this value per
338 | group.
339 | 
340 | A few examples:
341 | - `summarize`
342 | """
343 | nbCodeInBlock:
344 |   let df = toDf({ "Class" : @["A", "C", "B", "B", "A", "C", "C", "A", "B"],
345 |                       "Num" : @[1, 5, 3, 4, 8, 7, 2, 0, 0] })
346 |   echo df.group_by("Class").summarize(f{int: "sum(Num)" << sum(`Num`)})
347 | nbText: """
348 |   We can see this computes the sum for each class now.
349 | - `filter`:
350 | """
351 | nbCodeInBlock:
352 |   let df = toDf({ "Class" : @["A", "C", "B", "B", "A", "C", "C", "A", "B"],
353 |                       "Num" : @[1, 5, 3, 4, 8, 7, 2, 0, 0] })
354 |   echo df.group_by("Class").filter(f{ sum(`Num`) <= 9 })
355 | nbText: """
356 |   and again, the filtering is done per group. In this sense a filtering operation
357 |   that uses a reducing formula as input would usually not make too much sense anyway.
358 | - `mutate`:
359 | """
360 | nbCodeInBlock:
361 |   let df = toDf({ "Class" : @["A", "C", "B", "B", "A", "C", "C", "A", "B"],
362 |                       "Num" : @[1, 5, 3, 4, 8, 7, 2, 0, 0] })
363 |   echo df.group_by("Class").mutate(f{"Num - mean" ~ `Num` - mean(`Num`)})
364 | nbText: """
365 |   where we subtract the mean (of each class!) from each observation.
366 | 
367 | If one uses multiple columns to group by, we get instead the sub data frame corresponding
368 | to each unique combination of discrete values. Feel free to play around and try out
369 | such an example!
370 | 
371 | ### `gather` - Converting a wide format data frame to long format
372 | 
373 | As one of the last things to cover, we will quickly talk about data frames in wide and
374 | long format. In a way the example data frame above with a column "Class" and a column
375 | "Num" can be considered a data frame in "long" format. Long format in the sense that
376 | we have one discrete column "Class" that maps to different "Num" values. Because the
377 | column "Class" contains *discrete* values, We can imagine "transposing" the data frame
378 | to columns "A", "B", "C" instead with the values for each of these *groups* as the values
379 | in the corresponding columns. Let's look at:
380 | - this data frame
381 | - the output of grouping that data frame by "Class"
382 | - the same data frame in wide format
383 | 
384 | for clarity:
385 | """
386 | nbCodeInBlock:
387 |   let dfLong = toDf({ "Class" : @["A", "C", "B", "B", "A", "C", "C", "A", "B"],
388 |                           "Num" : @[1, 5, 3, 4, 8, 7, 2, 0, 0] })
389 |   echo "Long format:\n", dfLong
390 |   echo "----------------------------------------"
391 |   echo "Grouping by `Class`:"
392 |   for _, subDf in groups(dfLong.group_by("Class")):
393 |     echo subDf
394 |   echo "----------------------------------------"
395 |   let dfWide = toDf({"A" : [1, 8, 0], "B" : [3, 4, 0], "C" : [5, 7, 2]})
396 |   echo "Wide format:\n", dfWide
397 | nbText: """
398 | As we can see, the difference between wide and long format is the way the `groub_by` results
399 | are "assembled". As different columns for each group (wide format) or as two (key, value)
400 | columns (long format).
401 | 
402 | The conversion from wide -> long format is always possible. But the the mapping of long -> wide
403 | format requires there to be the the same number of entries for each class. If that condition
404 | is not satisfied, there will be missing values in the columns of the separate classes.
405 | 
406 | Depending on circumstances one might have input data in either order. However, in particular
407 | for plotting purposes the long format is often more convenient as it allows to classify the
408 | discrete classes using different colors, shapes etc. automatically.
409 | 
410 | Therefore, there is the `gather` procedure to convert a wide format data frame into a
411 | long format one. It takes the columns to be "gathered", the name of the column containing
412 | the "keys" (the column from which a value came) and a name for the column of the "values"
413 | that were "gathered". We can use it to recover the ("Class", "Num") data frame from
414 | the last one:
415 | """
416 | nbCodeInBlock:
417 |   let df = toDf({"A" : [1, 8, 0], "B" : [3, 4, 0], "C" : [5, 7, 2]})
418 |   echo df.gather(df.getKeys(), ## get all keys to gather
419 |                  key = "Class", ## the name of the `key` column
420 |                  value = "Num")
421 | nbText: """
422 | which is exactly the same data frame as in the examples before.
423 | 
424 | (Note: the inverse procedure to convert a long format data frame back into wide format
425 | is currently still missing. It will be added soon)
426 | 
427 | ### `innerJoin` - joining two data frames by a common column
428 | 
429 | As the last common example of data frame operations, we shall consider joining two
430 | data frames by a common column.
431 | """
432 | nbCodeInBlock:
433 |   let df1 = toDf({ "Class" : @["A", "B", "C", "D", "E"],
434 |                        "Num" : @[1, 5, 3, 4, 6] })
435 |   let df2 = toDf({ "Class" : ["E", "B", "A", "D", "C"],
436 |                        "Ids" : @[123, 124, 125, 126, 127] })
437 |   echo innerJoin(df1, df2, by = "Class")
438 | nbText: """
439 | where we joined two data frames by the "Class" column, resulting in a data frame with
440 | 3 columns. The matching rows for the classes were put together aligning corresponding
441 | "Num" and "Ids" values.
442 | 
443 | Of course joining two data frames is only a sensible option for a column containing
444 | discrete data so that equal elements in that column for both input data frames can
445 | be found.
446 | 
447 | This already covers the *majority* of the API of Datamancer. There are more procedures,
448 | but the presented ones should be all that is needed in the vast majority of use cases.
449 | 
450 | Check out the [Datamancer documentation](https://scinim.github.io/Datamancer/datamancer.html)
451 | for a full picture and in particular for a better and more thorough introduction to the
452 | formula syntax.
453 | """
454 | nbSave
455 | 


--------------------------------------------------------------------------------
/book/data_viz/plotting_data.nim:
--------------------------------------------------------------------------------
  1 | import nimib, nimibook
  2 | import ggplotnim
  3 | 
  4 | nbInit(theme = useNimibook)
  5 | 
  6 | nbText: """
  7 | # Plotting data using [ggplotnim](https://github.com/Vindaar/ggplotnim)
  8 | 
  9 | In this tutorial we will introduce `ggplotnim`, a Nim plotting library heavily inspired
 10 | by the great R library [ggplot2](https://ggplot2.tidyverse.org).
 11 | 
 12 | This will be kept rather brief, but we will discuss the philosophy of the syntax, look
 13 | at a reasonably complex plotting example that we deconstruct and finish of by (hopefully)
 14 | coming to the conclusion that the ggplot-like syntax is rather elegant.
 15 | 
 16 | For this tutorial you should have read the data wrangling introduction to `Datamancer` or
 17 | know about data frames and have seen the `Datamancer` formula macro `f{}`.
 18 | 
 19 | ## On philosophy and graphics
 20 | 
 21 | Similar to most areas of life touched by more than a few people who seemingly all have
 22 | their own ideas about the right way to do things, plotting libraries come in different
 23 | shapes and forms. In terms of their output formats, choice of colors and style and
 24 | of most importance for us here: in terms of their API / the programming syntax used.
 25 | 
 26 | Most plotting libraries fall into a category that are either focused on object orientation
 27 | (your commands return some objects for you to modify to your needs) or a generally imperative
 28 | style (call this function `plotFoo` for plot style A, that function `plotBar` for style B,
 29 | etc.) and often some combination of these two.
 30 | 
 31 | `ggplot2` and as a result `ggplotnim` follow a declarative style that builds up a plot
 32 | from a single command by combining multiple different layers as a chain of commands.
 33 | 
 34 | This is because `ggplot2` is an implementation of the so called "grammar of graphics".
 35 | It's the NixOS of plotting libraries. Tell it what you want and it gets it done for you,
 36 | as long as you speak its language.
 37 | 
 38 | ## A motivating example
 39 | 
 40 | Let's now consider a somewhat complicated plotting example. Using that we will look at why
 41 | it is called a *grammar* of graphics.
 42 | """
 43 | nbCodeInBlock:
 44 |   ## ignore the dummy `df` here. This is to be able to compile the code (we throw away
 45 |   ## the `ggplot` result as we don't call `ggsave`)
 46 |   let df = toDf({"Energy" : @[1], "Counts" : @[2], "Type" : @["background"]})
 47 |   discard ggplot(df, aes("Energy", "Counts", fill = "Type", color = "Type")) +
 48 |     geom_histogram(stat = "identity", position = "identity", alpha = 0.5, hdKind = hdOutline) +
 49 |     geom_point(binPosition = "center") +
 50 |     geom_errorbar(data = df.filter(f{`Type` == "background"}),
 51 |                   aes = aes(yMin = f{max(`Counts` - 1.0, 0.0)}, yMax = f{`Counts` + 1.0}),
 52 |                   binPosition = "center") +
 53 |     xlab("Energy [keV]") + ylab("#") +
 54 |     ggtitle("A multi-layer plot of a histogram and scatter plot with error bars")
 55 | nbText: """
 56 | It may seem overwhelming. But it's actually simple and can be read from top to bottom.
 57 | In words all this says is:
 58 | 
 59 | "Create a plot from the input data frame `df` using column 'Energy' for the x axis, 'Counts'
 60 | for the y axis and color the data (both outline `color` and fill color `fill`) based on the
 61 | discrete entries of column 'Type'. With it draw:
 62 | - a histogram without statistical computations (`stat = "identity"`, i.e. don't *compute* a histogram
 63 |   but use the data as a continuous bar plot), draw them in identity position (where the data says,
 64 |   no stacking of bars), add some alpha to the color and draw it as an outline.
 65 | - a scatter plot in the center positions of each bin (`binPosition = "center"`), as the data
 66 |   contains bin edges.
 67 | - errorbars for all data of type 'background' (`data = df.filter(…)`), where the error bars range
 68 |   from `yMin` to `yMax` for all points, also in center position.
 69 | 
 70 | Finally, customize x (`xlab`) and y (`ylab`) labels and add a title (`ggtitle`)."
 71 | 
 72 | The only thing we left out is the `ggsave` call, as we only have a dummy data frame here. We
 73 | will now walk through the basic building blocks of every plot and then look at the above as
 74 | an actual plot. After reading the next part looking at the plot above again should make it seem
 75 | less dense already.
 76 | 
 77 | ## The 3 (or 4) basic building blocks of a "ggplot" plot
 78 | 
 79 | There are 3 (in some respect 4) major pieces that make up the basic syntax of *every single*
 80 | plot created by `ggplot2` or `ggplotnim`. We will quickly go through these now. Keep
 81 | in mind that every option that might be automatically deduced can always be overridden.
 82 | 
 83 | ### Input data
 84 | 
 85 | The zeroth piece (hence maybe 4) of a ggplot plot is the input data. It *always* comes
 86 | in the form of a `DataFrame` that contains the data to be plotted (or at least the data
 87 | from which the thing to be plotted can be computed from, more on that later). If not
 88 | overridden manually the columns that are to be plotted define the labels for
 89 | the axes in the final plot.
 90 | 
 91 | In addition the library will determine automatically (based on column types & heuristic
 92 | rules) whether each column to be plotted is continuous or discrete. Continuity and discreteness
 93 | are a major factor in the kinds of plots we may create (and how they are represented).
 94 | 
 95 | So for the next sections let's say we have some input data frame `df`.
 96 | 
 97 | ### The `ggplot` procedure
 98 | 
 99 | The first proper piece of *every* plot is a call to the `ggplot` procedure. It has a rather
100 | simple signature (note: we drop 2 arguments here, as they are left over in the code for "historical"
101 | reasons, namely `numX/YTicks`):
102 | 
103 | ```nim
104 | proc ggplot*(data: DataFrame, aes: Aesthetics = aes(), …): GgPlot =
105 | ```
106 | 
107 | The first argument is the aforementioned input `DataFrame`. With our data frame, we can
108 | write down the first piece of every plot we will create:
109 | 
110 | ```nim
111 | ggplot(df, …)
112 | ```
113 | 
114 | Simple enough. This doesn't do anything interesting yet. That's what the `aes` is for.
115 | 
116 | ### `aes` - Aesthetics
117 | 
118 | The `aes` argument of the `ggplot` procedure is the first deciding piece of our plotting
119 | call. It will essentially determine *what* we wish to plot and to some extend *how* we
120 | want to plot it.
121 | 
122 | "Aesthetics" are the name for the description of the "aesthetic description" about which
123 | data to use for what visible purpose. This might sound abstract, but will become clear
124 | in a few seconds.
125 | 
126 | For the simplest cases (a scatter or line plot, a histogram, ...) we simply hand a (or multiple)
127 | column(s) to draw. Depending on whether a column contains discrete or continuous data decides
128 | how the axis (or additional scale) will be laid out.
129 | 
130 | To construct such an `Aesthetic` argument the `aes` macro is used. While it is a macro it
131 | behaves like a regular procedure and can take the following arguments:
132 | 
133 | - `x`
134 | - `y`
135 | - `color`
136 | - `fill`
137 | - `shape`
138 | - `size`
139 | - `xmin`
140 | - `xmax`
141 | - `ymin`
142 | - `ymax`
143 | - `width`
144 | - `height`
145 | - `text`
146 | - `weight`
147 | - `yridges`
148 | 
149 | quite the list!
150 | 
151 | Taking a closer look at the kind of arguments gives us maybe an inkling of what it's all about.
152 | The argument either maps to a physical axis in the plot (x, y), a "style"-like thing (color,
153 | fill, shape, size) or some more "descriptive" thing (e.g. for sizes x/yMin/Max, width, height),
154 | and finally some slightly "special" ones (text, weight, yridges).
155 | 
156 | What each of these mean for the final plot (again) depends on the data being discrete or continuous.
157 | 
158 | As an example:
159 | - Discrete, each discrete value:
160 |   - x and y: has one tick along x or y
161 |   - color: has one color
162 |   - shape: has one shape
163 |   - size: has one size
164 | - Continuous, each value:
165 |   - x and y: map to a continuous range between min and max values
166 |   - color and fill: has a color picked from a continuous color range
167 |   - size:  has a size picked from a continuous range between smallest and largest size
168 |   - shape: not supported, there are no "continuous shapes"
169 | 
170 | (for the other aesthetics also only either discrete or continuous make sense. For instance "text" is
171 | always a discrete input, it's used to draw text onto a plot. yridges is to create a discrete ridgeline
172 | plot, etc.)
173 | 
174 | How these are finally applied still depends on what comes later in the plotting syntax. But in principle
175 | the mapping to more specific things to be drawn is natural. For a point plot the size determines the point
176 | size and the color the point color. For a line it's line width and color and so on.
177 | 
178 | This part of the ggplot construction might be the most "vague" at first. But with it we can
179 | now continue our construction. Assume our data frame `df` has columns "Energy" and "Counts" (continuous),
180 | "Type" (discrete).
181 | 
182 | ```nim
183 | ggplot(df, aes("Energy", "Counts", fill = "Type", color = "Type"))
184 | ```
185 | 
186 | In a sense this has described a coordinate system for our plot. From the continuous / discrete columns
187 | we can determine the data ranges ranges / classes for each "axis". Every aesthetic can
188 | be considered an "axis" here. For example a scatter plot of `x` and `y` values that is also classified
189 | by color using discrete column `A` and by shape using discrete column `B` is technically a 4
190 | dimensional representation.
191 | 
192 | #### Formulas as `aes` arguments
193 | 
194 | If you paid close attention to the plot example above, you will have noticed that for `yMin` and `yMax`
195 | we did not actually hand a column, but rather a `ggplotnim` [formula](https://scinim.github.io/getting-started/basics/data_wrangling.html). This is the main reason `aes` is a macro.
196 | 
197 | You can hand *any* formula that references local variables or column references, or simply assign
198 | constant values (`aes(width = 5)` is perfectly valid). `ggplotnim` will compute the resulting values
199 | for you automatically before plotting.
200 | 
201 | To summarize, you can use one of the following three things as values to `aes` arguments:
202 | - a string literal referring to a column
203 | - a formula computing some constant value or some operation using data frame columns
204 | - a constant (non string) value that can be stored in a data frame
205 | 
206 | For formulas and constant values the corresponding absolute value will be computed for each
207 | data frame entry to be plotted.
208 | 
209 | ### `geoms` - Geometric shapes to fill a plot
210 | 
211 | Input data and aesthetics of course are not enough to actually draw a plot. So far we have only
212 | stated what part of the data to use and added a discrete classification by one column (fill
213 | and color the "Type" column).
214 | 
215 | This is what all available `geom_*` procedures are for. They return `Geom` variant objects that mainly
216 | just store their kind and possibly some specific information required to draw them.
217 | 
218 | The (currently) implemented geoms are as follows (with the required aesthetics listed):
219 | - `geom_point`: draw points for each `x`/`y`
220 | - `geom_line`: draw a line through all `x`/`y`
221 | - `geom_errorbar`: draw error bars from `xMin` to `xMax` or `yMin` to `yMax` at `x`/`y`
222 | - `geom_linerange`: draw lines from `xMin` to `xMax` or `yMin` to `yMax`
223 | - `geom_bar`: draw a *discrete* bar plot using the occurrences (default `stat = "count"`) of each
224 |    discrete value in `x` or the number of counts indicated in `y` (`stat = "identity"`)
225 | - `geom_histogram`: draw a *continuous* bar plot computing a histogram from continuous variable `x`
226 |    (default `stat = "bin"`) or draw continuous bars starting at `x` and the number of entries
227 |    indicated in `y` (`stat = "identity"`).
228 | - `geom_freqpoly`: same as `geom_histogram`, but connect bin centers by lines instead of drawing bars
229 | - `geom_tile`: draw discrete tiles at `x`/`y` (default position bottom left) with width `width` and height
230 |   `height` each. Tiles don't need to touch.
231 | - `geom_raster`: draw fully connected tiles at `x`/`y` of `width` and `height`. `width` and `height` must
232 |    be constant!
233 | - `geom_text`: draw text at `x`/`y` containing `text` (the `text` aesthetic)
234 | 
235 | Here we stated mainly the *typical* (or default) use cases. All geoms take all arguments. That means
236 | you can also draw a histogram using points by applying the `stat = "bin"` argument. The difference
237 | is just in the defaults! Or in case of a `geom_histogram` call you can indicate that the `binPosition`
238 | should be `"center"` instead of the default `"left"` to have `x` indicate the bin centers.
239 | 
240 | The possibilities are almost endless. You can combine any geom with (almost) any option and
241 | it *should just work* (few exceptions exist, e.g. `geom_raster` only draws fixed size tiles for performance).
242 | 
243 | One final thing to mention: the `geom_histogram` procedures *also* take `data` and `aes`
244 | arguments. This means one can override the data or the aesthetics for a single geom!
245 | 
246 | #### Applying geom layers to build the initial plot
247 | 
248 | We now need to apply the input data and selection of columns to things we can actually draw.
249 | 
250 | This is where the layering structure of `ggplot` actually becomes apparent, because from here
251 | we will list all kinds of `geoms` to draw. The order we list them directly determines
252 | the order they are drawn in.
253 | 
254 | Step by step we will now add layer by layer and look at what happens with each to reproduce
255 | the plot talked about in the beginning. For that purpose we will generate a data frame that we
256 | will use in all following snippets. It will contain 3 columns:
257 | - "Energy": a column of twice 25 elements covering the range (0, 10) with 25 entries
258 | - "Counts": a column of twice 25 random entries between 0 and 10. The first 25 elements
259 |   are drawn as floats (to get fractional values) and the second 25 entries will be random
260 |   integer numbers
261 | - "Type": simply a column that designates the first 25 rows as "background" and the latter
262 |   25 as "candidates"
263 | 
264 | Our construction in the following is a bit artificial of course.
265 | """
266 | nbCode:
267 |   import ggplotnim, random, sequtils
268 |   randomize(42)
269 |   let df = toDf({ "Energy" : cycle(linspace(0.0, 10.0, 25).toRawSeq, 2),
270 |                       "Counts" : concat(toSeq(0 ..< 25).mapIt(rand(10.0)),
271 |                                         toSeq(0 ..< 25).mapIt(rand(10).float)),
272 |                       "Type" : concat(newSeqWith(25, "background"),
273 |                                       newSeqWith(25, "candidates")) })
274 |   echo "Input data frame: "
275 |   echo "Head(10): ", df.head(10).pretty(10)
276 |   echo "Tail(10): ", df.tail(10).pretty(10)
277 | nbText: """
278 | What we want to achieve as a final plot is the following, where the explanations
279 | are mainly due to the original motivation of where the plot example is taken from:
280 | - a histogram for each "Type", drawn with one color each and a bit transparent so they
281 |   are visible where they overlap. Let each bin content correspond to some data point
282 |   at that energy.
283 | - also plot the actual data points on top of the bins. For the "background" like data
284 |   we have fractional values as it's values are normalized to the "candidates" (simply
285 |   by scaling from some hypothetical time for background / time for candidate data).
286 | - For the background data we want error bars. They represent our uncertainty of the
287 |   background hypothesis.
288 | - the candidates are just counts we measured. They don't have inherent uncertainties
289 |   (from a frequentist perspective we have to repeat the experiment many times and *then*
290 |   we can write down some variance in our candidates)
291 | 
292 | #### Building layer 1 - `geom_histogram`
293 | 
294 | So, let's start with drawing a histogram of "Energy" and "Counts":
295 | """
296 | nbCodeInBlock:
297 |   ggplot(df, aes("Energy", "Counts")) +
298 |     geom_histogram() +
299 |     ggsave("images/multi_layer_histogram_0.png")
300 | nbImage("images/multi_layer_histogram_0.png")
301 | nbText: """
302 | So, uhm. This looks rather broken! Or at least not what we want. What's going on?
303 | We're asking for a histogram! By default this means `ggplotnim` will *compute* the
304 | histogram based on the `x` aesthetic. (Note: it should at least print a warning
305 | message if a `y` aesthetic is given that user probably wants identity stats!).
306 | Instead our data is *already* binned. We need the `stat = "identity"` option to
307 | the `geom_histogram` call:
308 | """
309 | nbCodeInBlock:
310 |   ggplot(df, aes("Energy", "Counts")) +
311 |     geom_histogram(stat = "identity") +
312 |     ggsave("images/multi_layer_histogram_1.png")
313 | nbImage("images/multi_layer_histogram_1.png")
314 | nbText: """
315 | This looks a bit better. At least we have something that sort of resembles our
316 | input data! But what's that wide gray bar spanning the whole `x` range with a
317 | height of roughly 3?
318 | 
319 | Our data frame covers the `x` range *twice*. Once for our "background"
320 | dataset, indices 0 to 24, and then again for our "candidates" dataset,
321 | indices 25 to 49. At the intersection from the first part (index 24)
322 | our "Energy" column is `10.0`. From there it jumps back to `0.0` (index
323 | 25) in the next bin. This leads to a full bin that accidentally covers
324 | the full range from 0 to 10 (with a "negative" bin width if we compute
325 | it bin to bin). Let's check that assumption by printing values between
326 | index 24 and 26 from our data frame:
327 | """
328 | nbCodeInBlock:
329 |   echo df[24 .. 26]
330 | nbText: """
331 | As we can see index 24 with a value of `2.746` is used for a bin with bin width 10.
332 | 
333 | To get closer to the plot we want, we will perform classification by a discrete
334 | variable. Let's `color` by the "Type" column:
335 | """
336 | nbCodeInBlock:
337 |   ggplot(df, aes("Energy", "Counts", color = "Type")) +
338 |     geom_histogram(stat = "identity") +
339 |     ggsave("images/multi_layer_histogram_2.png")
340 | nbImage("images/multi_layer_histogram_2.png")
341 | nbText: """
342 | Ohh, interesting. See how we have an automatic legend based on the
343 | two classes found in column "Type".
344 | 
345 | The entries seem a bit high right now. We only sampled up to a total of
346 | 10. And didn't we say we want to have classification by `color`? For bars
347 | `color` refers to the *outline* of a bar. We need to add a `fill` to get the
348 | bars into a fully colored object.
349 | """
350 | nbCodeInBlock:
351 |   ggplot(df, aes("Energy", "Counts", color = "Type", fill = "Type")) +
352 |     geom_histogram(stat = "identity") +
353 |     ggsave("images/multi_layer_histogram_3.png")
354 | nbImage("images/multi_layer_histogram_3.png")
355 | nbText: """
356 | Aha! Apparently both classes are being *stacked* on top of one
357 | another. This is the default behavior for classified histograms so
358 | that all the data is visible. Without transparency we would hide data
359 | otherwise.
360 | 
361 | To change this behavior to the one we want we will apply `position ="identity"`:
362 | """
363 | nbCodeInBlock:
364 |   ggplot(df, aes("Energy", "Counts", color = "Type", fill = "Type")) +
365 |     geom_histogram(stat = "identity", position = "identity") +
366 |     ggsave("images/multi_layer_histogram_4.png")
367 | nbImage("images/multi_layer_histogram_4.png")
368 | nbText: """
369 | This is already looking somewhat reasonable, barring the fact that we now
370 | have the exact problem stacking is supposed to fix. One histogram overlaps
371 | the other. We can solve that by applying 50% alpha channel:
372 | """
373 | nbCodeInBlock:
374 |   ggplot(df, aes("Energy", "Counts", color = "Type", fill = "Type")) +
375 |     geom_histogram(stat = "identity", position = "identity", alpha = 0.5) +
376 |     ggsave("images/multi_layer_histogram_5.png")
377 | nbImage("images/multi_layer_histogram_5.png")
378 | nbText: """
379 | 
380 | The plot we're seeing is quite pretty already. The only small
381 | annoyance is that the outline is still sticking out between all bars,
382 | which makes it more busy than it should be. Let's fix that by drawing
383 | the histograms using *outlines* instead of individual bars: """
384 | nbCodeInBlock:
385 |   ggplot(df, aes("Energy", "Counts", color = "Type", fill = "Type")) +
386 |     geom_histogram(stat = "identity", position = "identity", alpha = 0.5, hdKind = hdOutline) +
387 |     ggsave("images/multi_layer_histogram_6.png")
388 | nbImage("images/multi_layer_histogram_6.png")
389 | nbText: """
390 | Nice, first layer done! This is the result we want to achieve for the *histogram* part
391 | of our plot. As we can see, we've added *one* geom to the call chain. One layer.
392 | 
393 | #### Building layer 2 - `geom_point`
394 | 
395 | Next up, let's plot some points for the data to better highlight where our actual
396 | data lies (and to lay the foundation for our error bars). This is as simple as adding
397 | a single `geom_point` call into the chain:
398 | """
399 | nbCodeInBlock:
400 |   ggplot(df, aes("Energy", "Counts", color = "Type", fill = "Type")) +
401 |     geom_histogram(stat = "identity", position = "identity", alpha = 0.5, hdKind = hdOutline) +
402 |     geom_point() +
403 |     ggsave("images/multi_layer_histogram_7.png")
404 | nbImage("images/multi_layer_histogram_7.png")
405 | nbText: """
406 | But wait. Why are our points on the left side of each bar? Because we defined our
407 | `Energy` column to contain *bin edges*. This is because different geoms use different
408 | defaults for their arguments. A histogram with identity statistics essentially interprets
409 | the `x` axis data as bin edges, whereas point plot of courses uses the `x` values as the
410 | location where to draw the points.
411 | 
412 | However, the grammar of graphics allows us to change that as well. Let's tell `geom_point`
413 | that the data points are *bin centers*:
414 | """
415 | nbCodeInBlock:
416 |   ggplot(df, aes("Energy", "Counts", color = "Type", fill = "Type")) +
417 |     geom_histogram(stat = "identity", position = "identity", alpha = 0.5, hdKind = hdOutline) +
418 |     geom_point(binPosition = "center") +
419 |     ggsave("images/multi_layer_histogram_8.png")
420 | nbImage("images/multi_layer_histogram_8.png")
421 | nbText: """
422 | Perfect, now our points are right where they belong. This concludes layer 2.
423 | 
424 | #### Building layer 3 - `geom_errorbar`
425 | 
426 | This leaves us with a single, final layer. Those of the error bars.
427 | Due to a bug present right now, we cannot call `geom_errorbar` without min / max
428 | aesthetic args right now (which should in practice raise an exception or
429 | draw nothing, because without limits error bars make no sense).
430 | 
431 | So, let's assume we want (arbitrary) error bars that are ± 1 at each
432 | point. This can be achieved by assigning a formula to the `yMin` and
433 | `yMax` aesthetic in which we describe this relationship. Start with `yMin`:
434 | """
435 | nbCodeInBlock:
436 |   ggplot(df, aes("Energy", "Counts", color = "Type", fill = "Type")) +
437 |     geom_histogram(stat = "identity", position = "identity", alpha = 0.5, hdKind = hdOutline) +
438 |     geom_point(binPosition = "center") +
439 |     geom_errorbar(aes = aes(yMin = f{`Counts` - 1.0})) +
440 |     ggsave("images/multi_layer_histogram_9.png")
441 | nbImage("images/multi_layer_histogram_9.png")
442 | nbText: """
443 | 
444 | Looking closely, we see that the error bars in some bins go to
445 | negative values! That's not acceptable for us. Error bars on counts
446 | should stop at 0, because we cannot measure negative counts!
447 | 
448 | We do this by modifying the formula for `yMin` to simply take the maximum value
449 | in each case between the computed difference and 0.
450 | 
451 | And in addition they are also drawn on the left side of each bin. Let's fix
452 | both the range of the bar as well as its placement:
453 | """
454 | nbCodeInBlock:
455 |   ggplot(df, aes("Energy", "Counts", color = "Type", fill = "Type")) +
456 |     geom_histogram(stat = "identity", position = "identity", alpha = 0.5, hdKind = hdOutline) +
457 |     geom_point(binPosition = "center") +
458 |     geom_errorbar(binPosition = "center", aes = aes(yMin = f{max(`Counts` - 1.0, 0.0)})) +
459 |     ggsave("images/multi_layer_histogram_11.png")
460 | nbImage("images/multi_layer_histogram_11.png")
461 | nbText: """
462 | Much better. Of course we still only have error bars in the negative direction and lines
463 | down to zero (`yMax` is unset, so default value 0). On to add positive bars then:
464 | """
465 | nbCodeInBlock:
466 |   ggplot(df, aes("Energy", "Counts", color = "Type", fill = "Type")) +
467 |     geom_histogram(stat = "identity", position = "identity", alpha = 0.5, hdKind = hdOutline) +
468 |     geom_point(binPosition = "center") +
469 |     geom_errorbar(binPosition = "center", aes = aes(yMin = f{max(`Counts` - 1.0, 0.0)}, yMax = f{`Counts` + 1.0})) +
470 |     ggsave("images/multi_layer_histogram_12.png")
471 | nbImage("images/multi_layer_histogram_12.png")
472 | nbText: """
473 | Sweet! But wait, we still have error bars for the "candidates" dataset. This is where
474 | the fact that individual geoms can receive their own data frame comes in. We'll simply
475 | hand `geom_errorbar` the input data frame filtered to only "background" rows. This way
476 | it will only have that data to plot and we should end up without error bars on the
477 | "candidates" data:
478 | """
479 | nbCodeInBlock:
480 |   ggplot(df, aes("Energy", "Counts", color = "Type", fill = "Type")) +
481 |     geom_histogram(stat = "identity", position = "identity", alpha = 0.5, hdKind = hdOutline) +
482 |     geom_point(binPosition = "center") +
483 |     geom_errorbar(binPosition = "center", data = df.filter(f{`Type` == "background"}),
484 |                   aes = aes(yMin = f{max(`Counts` - 1.0, 0.0)}, yMax = f{`Counts` + 1.0})) +
485 |     ggsave("images/multi_layer_histogram_13.png")
486 | nbImage("images/multi_layer_histogram_13.png")
487 | nbText: """
488 | Perfect! Let's round it off by modifying the `x` and `y` labels and add a nice
489 | title on top:
490 | """
491 | nbCodeInBlock:
492 |   ggplot(df, aes("Energy", "Counts", color = "Type", fill = "Type")) +
493 |     geom_histogram(stat = "identity", position = "identity", alpha = 0.5, hdKind = hdOutline) +
494 |     geom_point(binPosition = "center") +
495 |     geom_errorbar(binPosition = "center", data = df.filter(f{`Type` == "background"}),
496 |                   aes = aes(yMin = f{max(`Counts` - 1.0, 0.0)}, yMax = f{`Counts` + 1.0})) +
497 |     xlab("Energy [keV]") + ylab("#") +
498 |     ggtitle("A multi-layer plot of a histogram and scatter plot with error bars") +
499 |     ggsave("images/multi_layer_histogram.png")
500 | nbImage("images/multi_layer_histogram.png")
501 | nbText: """
502 | 
503 | And here we are. We've rebuilt the whole plot from the beginning. Now you should have a
504 | good idea of why this plot looks the way it does.
505 | 
506 | The great thing is that this is the whole workflow of ggplot. You won't have to search
507 | through weird N levels deep inheritances of objects (looking at you, matplotlib!) to
508 | figure out how to do this or that. Every other feature `ggplotnim` provides is
509 | also handled in the same way. We just replace a few geoms or arguments or maybe add
510 | another command. That's all there is to the grammar of graphics. Simple, but powerful.
511 | 
512 | With an understanding of the grammar of graphics, one can then essentially plot everything
513 | that can be mapped to geometric objects and data, even for example
514 | [a periodic table](https://github.com/Vindaar/ggplotnim/blob/master/recipes.org#fun-with-elements).
515 | 
516 | ## A gallery of plotting examples
517 | 
518 | For a large variety of actual plotting example snippets, check out the `ggplotnim` recipe section here:
519 | 
520 | [Recipes](https://github.com/Vindaar/ggplotnim/blob/master/recipes.org)
521 | 
522 | Thanks for reading! :)
523 | """
524 | 
525 | nbSave
526 | 


--------------------------------------------------------------------------------