├── .devcontainer ├── Dockerfile └── devcontainer.json ├── .gitignore ├── LICENSE ├── README.md ├── apt.txt ├── course ├── data │ ├── collections_dataset.root │ └── example_file.root ├── exercises │ ├── core │ │ ├── fitting-exercise-data.txt │ │ ├── fitting-exercise.ipynb │ │ ├── rdataframe-basics.ipynb │ │ └── rdataframe-collections.ipynb │ └── extra │ │ ├── 00_C++_Interpreter │ │ ├── myMacro.C │ │ └── readme.md │ │ ├── 01_Histograms_Graphs_Functions │ │ ├── CreateAHistogram.ipynb │ │ ├── graphDraw.ipynb │ │ ├── readme.md │ │ └── solutions │ │ │ ├── CreateAHistogramCpp_Solution.ipynb │ │ │ ├── CreateAHistogram_Solution.ipynb │ │ │ ├── GraphMacro.py │ │ │ ├── HistogramMacro.py │ │ │ ├── SimpleFunction.C │ │ │ ├── SimpleFunction.ipynb │ │ │ ├── SimpleGraph.C │ │ │ ├── SimpleGraph.ipynb │ │ │ ├── SimpleHistogram.C │ │ │ ├── SimpleHistogram.ipynb │ │ │ └── graphDraw_Solution.ipynb │ │ ├── 02_Fitting │ │ ├── macros │ │ │ ├── correlatedParameters.C │ │ │ ├── firstFit.C │ │ │ └── fitPanel.C │ │ ├── notebooks │ │ │ ├── CentralLimitTheorem.ipynb │ │ │ ├── GausFit.ipynb │ │ │ ├── GausFit_2.ipynb │ │ │ ├── Hgg.txt │ │ │ └── HiggsBinFit.ipynb │ │ ├── readme.md │ │ └── solutions │ │ │ ├── CentralLimitTheorem_Solution.ipynb │ │ │ ├── GausFit_2_Solution.ipynb │ │ │ └── firstFit_Solution.C │ │ ├── 03_Working_With_Files │ │ ├── WritingOnFilesExercise.ipynb │ │ ├── histos.root │ │ ├── readme.md │ │ └── solutions │ │ │ └── WritingOnFiles_Solution.ipynb │ │ ├── 04_RDataFrame │ │ ├── rdataframe-dimuon.ipynb │ │ ├── readme.md │ │ └── solutions │ │ │ ├── rdataframe-dimuon.cpp │ │ │ ├── rdataframe-dimuon.py │ │ │ └── solution-rdataframe-dimuon.ipynb │ │ └── 05_Graphics │ │ ├── readme.md │ │ └── solutions │ │ └── GoodPlot.C ├── images │ ├── DistRDF_architecture.png │ ├── binder1.png │ ├── cern-logo.png │ ├── dataset.png │ ├── dimuonSpectrum.png │ ├── examplehist_df106_HiggsToFourLeptons.png │ ├── examplehisto.png │ ├── rdf_1.png │ ├── root1.png │ ├── swan1.png │ ├── swan2.png │ ├── swan3.png │ ├── swan4.png │ ├── swan5.png │ ├── tfile1.png │ └── tfile2.png └── notebooks │ ├── core │ ├── 00-root-intro.ipynb │ ├── 01-histograms-and-graphs.ipynb │ ├── 02-tfile-read-write-ttree.ipynb │ ├── 03-rdataframe-basics.ipynb │ ├── 04-rdataframe-collections.ipynb │ ├── 05-rdataframe-features.ipynb │ └── 06-rdataframe-advanced.ipynb │ └── extra │ ├── extra-00-setup.ipynb │ ├── extra-01-jupyter.ipynb │ ├── extra-02-root-python-cpp.ipynb │ └── extra-03-root-in-jupyter.ipynb └── environment.yml /.devcontainer/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM rootproject/root:6.36.00-ubuntu25.04 2 | 3 | RUN apt-get update && apt-get install -y git python3-pip python3-venv 4 | 5 | ENV VIRTUAL_ENV=/opt/venv 6 | RUN python3 -m venv $VIRTUAL_ENV 7 | ENV PATH="$VIRTUAL_ENV/bin:$PATH" 8 | 9 | RUN python3 -m pip install --upgrade wheel numpy jupyter metakernel dask distributed pyspark matplotlib mplhep 10 | 11 | # Make sure the filesystem can detect the ROOT C++ Jupyter kernel 12 | RUN mkdir -p ~/.local/share/jupyter/kernels && cp -r /opt/root/etc/notebook/kernels/root ~/.local/share/jupyter/kernels 13 | -------------------------------------------------------------------------------- /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "build": { 3 | "dockerfile": "Dockerfile" 4 | }, 5 | "hostRequirements": { 6 | "cpus": 2 7 | }, 8 | "customizations": { 9 | "vscode": { 10 | "extensions": [ 11 | "ms-toolsai.jupyter", 12 | "ms-python.python", 13 | "albertopdrf.root-file-viewer", 14 | "donjayamanne.vscode-default-python-kernel" 15 | ], 16 | "settings": { 17 | "python.defaultInterpreterPath": "/opt/venv/bin/python" 18 | } 19 | } 20 | } 21 | } -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Misc 2 | .DS_Store 3 | .vscode 4 | *.npz 5 | *.pdf 6 | *.png 7 | 8 | !*/images/*.png 9 | 10 | # Python 11 | __pycache__ 12 | .ipynb_checkpoints 13 | 14 | # C++ 15 | *.a 16 | *.d 17 | *.o 18 | *.exe 19 | *.gch 20 | *.lib 21 | *.out 22 | *.pch 23 | *.pcm 24 | *.so 25 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Attribution 4.0 International 2 | 3 | ======================================================================= 4 | 5 | Creative Commons Corporation ("Creative Commons") is not a law firm and 6 | does not provide legal services or legal advice. Distribution of 7 | Creative Commons public licenses does not create a lawyer-client or 8 | other relationship. Creative Commons makes its licenses and related 9 | information available on an "as-is" basis. Creative Commons gives no 10 | warranties regarding its licenses, any material licensed under their 11 | terms and conditions, or any related information. Creative Commons 12 | disclaims all liability for damages resulting from their use to the 13 | fullest extent possible. 14 | 15 | Using Creative Commons Public Licenses 16 | 17 | Creative Commons public licenses provide a standard set of terms and 18 | conditions that creators and other rights holders may use to share 19 | original works of authorship and other material subject to copyright 20 | and certain other rights specified in the public license below. The 21 | following considerations are for informational purposes only, are not 22 | exhaustive, and do not form part of our licenses. 23 | 24 | Considerations for licensors: Our public licenses are 25 | intended for use by those authorized to give the public 26 | permission to use material in ways otherwise restricted by 27 | copyright and certain other rights. Our licenses are 28 | irrevocable. Licensors should read and understand the terms 29 | and conditions of the license they choose before applying it. 30 | Licensors should also secure all rights necessary before 31 | applying our licenses so that the public can reuse the 32 | material as expected. Licensors should clearly mark any 33 | material not subject to the license. This includes other CC- 34 | licensed material, or material used under an exception or 35 | limitation to copyright. More considerations for licensors: 36 | wiki.creativecommons.org/Considerations_for_licensors 37 | 38 | Considerations for the public: By using one of our public 39 | licenses, a licensor grants the public permission to use the 40 | licensed material under specified terms and conditions. If 41 | the licensor's permission is not necessary for any reason--for 42 | example, because of any applicable exception or limitation to 43 | copyright--then that use is not regulated by the license. Our 44 | licenses grant only permissions under copyright and certain 45 | other rights that a licensor has authority to grant. Use of 46 | the licensed material may still be restricted for other 47 | reasons, including because others have copyright or other 48 | rights in the material. A licensor may make special requests, 49 | such as asking that all changes be marked or described. 50 | Although not required by our licenses, you are encouraged to 51 | respect those requests where reasonable. More considerations 52 | for the public: 53 | wiki.creativecommons.org/Considerations_for_licensees 54 | 55 | ======================================================================= 56 | 57 | Creative Commons Attribution 4.0 International Public License 58 | 59 | By exercising the Licensed Rights (defined below), You accept and agree 60 | to be bound by the terms and conditions of this Creative Commons 61 | Attribution 4.0 International Public License ("Public License"). To the 62 | extent this Public License may be interpreted as a contract, You are 63 | granted the Licensed Rights in consideration of Your acceptance of 64 | these terms and conditions, and the Licensor grants You such rights in 65 | consideration of benefits the Licensor receives from making the 66 | Licensed Material available under these terms and conditions. 67 | 68 | 69 | Section 1 -- Definitions. 70 | 71 | a. Adapted Material means material subject to Copyright and Similar 72 | Rights that is derived from or based upon the Licensed Material 73 | and in which the Licensed Material is translated, altered, 74 | arranged, transformed, or otherwise modified in a manner requiring 75 | permission under the Copyright and Similar Rights held by the 76 | Licensor. For purposes of this Public License, where the Licensed 77 | Material is a musical work, performance, or sound recording, 78 | Adapted Material is always produced where the Licensed Material is 79 | synched in timed relation with a moving image. 80 | 81 | b. Adapter's License means the license You apply to Your Copyright 82 | and Similar Rights in Your contributions to Adapted Material in 83 | accordance with the terms and conditions of this Public License. 84 | 85 | c. Copyright and Similar Rights means copyright and/or similar rights 86 | closely related to copyright including, without limitation, 87 | performance, broadcast, sound recording, and Sui Generis Database 88 | Rights, without regard to how the rights are labeled or 89 | categorized. For purposes of this Public License, the rights 90 | specified in Section 2(b)(1)-(2) are not Copyright and Similar 91 | Rights. 92 | 93 | d. Effective Technological Measures means those measures that, in the 94 | absence of proper authority, may not be circumvented under laws 95 | fulfilling obligations under Article 11 of the WIPO Copyright 96 | Treaty adopted on December 20, 1996, and/or similar international 97 | agreements. 98 | 99 | e. Exceptions and Limitations means fair use, fair dealing, and/or 100 | any other exception or limitation to Copyright and Similar Rights 101 | that applies to Your use of the Licensed Material. 102 | 103 | f. Licensed Material means the artistic or literary work, database, 104 | or other material to which the Licensor applied this Public 105 | License. 106 | 107 | g. Licensed Rights means the rights granted to You subject to the 108 | terms and conditions of this Public License, which are limited to 109 | all Copyright and Similar Rights that apply to Your use of the 110 | Licensed Material and that the Licensor has authority to license. 111 | 112 | h. Licensor means the individual(s) or entity(ies) granting rights 113 | under this Public License. 114 | 115 | i. Share means to provide material to the public by any means or 116 | process that requires permission under the Licensed Rights, such 117 | as reproduction, public display, public performance, distribution, 118 | dissemination, communication, or importation, and to make material 119 | available to the public including in ways that members of the 120 | public may access the material from a place and at a time 121 | individually chosen by them. 122 | 123 | j. Sui Generis Database Rights means rights other than copyright 124 | resulting from Directive 96/9/EC of the European Parliament and of 125 | the Council of 11 March 1996 on the legal protection of databases, 126 | as amended and/or succeeded, as well as other essentially 127 | equivalent rights anywhere in the world. 128 | 129 | k. You means the individual or entity exercising the Licensed Rights 130 | under this Public License. Your has a corresponding meaning. 131 | 132 | 133 | Section 2 -- Scope. 134 | 135 | a. License grant. 136 | 137 | 1. Subject to the terms and conditions of this Public License, 138 | the Licensor hereby grants You a worldwide, royalty-free, 139 | non-sublicensable, non-exclusive, irrevocable license to 140 | exercise the Licensed Rights in the Licensed Material to: 141 | 142 | a. reproduce and Share the Licensed Material, in whole or 143 | in part; and 144 | 145 | b. produce, reproduce, and Share Adapted Material. 146 | 147 | 2. Exceptions and Limitations. For the avoidance of doubt, where 148 | Exceptions and Limitations apply to Your use, this Public 149 | License does not apply, and You do not need to comply with 150 | its terms and conditions. 151 | 152 | 3. Term. The term of this Public License is specified in Section 153 | 6(a). 154 | 155 | 4. Media and formats; technical modifications allowed. The 156 | Licensor authorizes You to exercise the Licensed Rights in 157 | all media and formats whether now known or hereafter created, 158 | and to make technical modifications necessary to do so. The 159 | Licensor waives and/or agrees not to assert any right or 160 | authority to forbid You from making technical modifications 161 | necessary to exercise the Licensed Rights, including 162 | technical modifications necessary to circumvent Effective 163 | Technological Measures. For purposes of this Public License, 164 | simply making modifications authorized by this Section 2(a) 165 | (4) never produces Adapted Material. 166 | 167 | 5. Downstream recipients. 168 | 169 | a. Offer from the Licensor -- Licensed Material. Every 170 | recipient of the Licensed Material automatically 171 | receives an offer from the Licensor to exercise the 172 | Licensed Rights under the terms and conditions of this 173 | Public License. 174 | 175 | b. No downstream restrictions. You may not offer or impose 176 | any additional or different terms or conditions on, or 177 | apply any Effective Technological Measures to, the 178 | Licensed Material if doing so restricts exercise of the 179 | Licensed Rights by any recipient of the Licensed 180 | Material. 181 | 182 | 6. No endorsement. Nothing in this Public License constitutes or 183 | may be construed as permission to assert or imply that You 184 | are, or that Your use of the Licensed Material is, connected 185 | with, or sponsored, endorsed, or granted official status by, 186 | the Licensor or others designated to receive attribution as 187 | provided in Section 3(a)(1)(A)(i). 188 | 189 | b. Other rights. 190 | 191 | 1. Moral rights, such as the right of integrity, are not 192 | licensed under this Public License, nor are publicity, 193 | privacy, and/or other similar personality rights; however, to 194 | the extent possible, the Licensor waives and/or agrees not to 195 | assert any such rights held by the Licensor to the limited 196 | extent necessary to allow You to exercise the Licensed 197 | Rights, but not otherwise. 198 | 199 | 2. Patent and trademark rights are not licensed under this 200 | Public License. 201 | 202 | 3. To the extent possible, the Licensor waives any right to 203 | collect royalties from You for the exercise of the Licensed 204 | Rights, whether directly or through a collecting society 205 | under any voluntary or waivable statutory or compulsory 206 | licensing scheme. In all other cases the Licensor expressly 207 | reserves any right to collect such royalties. 208 | 209 | 210 | Section 3 -- License Conditions. 211 | 212 | Your exercise of the Licensed Rights is expressly made subject to the 213 | following conditions. 214 | 215 | a. Attribution. 216 | 217 | 1. If You Share the Licensed Material (including in modified 218 | form), You must: 219 | 220 | a. retain the following if it is supplied by the Licensor 221 | with the Licensed Material: 222 | 223 | i. identification of the creator(s) of the Licensed 224 | Material and any others designated to receive 225 | attribution, in any reasonable manner requested by 226 | the Licensor (including by pseudonym if 227 | designated); 228 | 229 | ii. a copyright notice; 230 | 231 | iii. a notice that refers to this Public License; 232 | 233 | iv. a notice that refers to the disclaimer of 234 | warranties; 235 | 236 | v. a URI or hyperlink to the Licensed Material to the 237 | extent reasonably practicable; 238 | 239 | b. indicate if You modified the Licensed Material and 240 | retain an indication of any previous modifications; and 241 | 242 | c. indicate the Licensed Material is licensed under this 243 | Public License, and include the text of, or the URI or 244 | hyperlink to, this Public License. 245 | 246 | 2. You may satisfy the conditions in Section 3(a)(1) in any 247 | reasonable manner based on the medium, means, and context in 248 | which You Share the Licensed Material. For example, it may be 249 | reasonable to satisfy the conditions by providing a URI or 250 | hyperlink to a resource that includes the required 251 | information. 252 | 253 | 3. If requested by the Licensor, You must remove any of the 254 | information required by Section 3(a)(1)(A) to the extent 255 | reasonably practicable. 256 | 257 | 4. If You Share Adapted Material You produce, the Adapter's 258 | License You apply must not prevent recipients of the Adapted 259 | Material from complying with this Public License. 260 | 261 | 262 | Section 4 -- Sui Generis Database Rights. 263 | 264 | Where the Licensed Rights include Sui Generis Database Rights that 265 | apply to Your use of the Licensed Material: 266 | 267 | a. for the avoidance of doubt, Section 2(a)(1) grants You the right 268 | to extract, reuse, reproduce, and Share all or a substantial 269 | portion of the contents of the database; 270 | 271 | b. if You include all or a substantial portion of the database 272 | contents in a database in which You have Sui Generis Database 273 | Rights, then the database in which You have Sui Generis Database 274 | Rights (but not its individual contents) is Adapted Material; and 275 | 276 | c. You must comply with the conditions in Section 3(a) if You Share 277 | all or a substantial portion of the contents of the database. 278 | 279 | For the avoidance of doubt, this Section 4 supplements and does not 280 | replace Your obligations under this Public License where the Licensed 281 | Rights include other Copyright and Similar Rights. 282 | 283 | 284 | Section 5 -- Disclaimer of Warranties and Limitation of Liability. 285 | 286 | a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE 287 | EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS 288 | AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF 289 | ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS, 290 | IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION, 291 | WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR 292 | PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS, 293 | ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT 294 | KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT 295 | ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU. 296 | 297 | b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE 298 | TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION, 299 | NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT, 300 | INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES, 301 | COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR 302 | USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN 303 | ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR 304 | DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR 305 | IN PART, THIS LIMITATION MAY NOT APPLY TO YOU. 306 | 307 | c. The disclaimer of warranties and limitation of liability provided 308 | above shall be interpreted in a manner that, to the extent 309 | possible, most closely approximates an absolute disclaimer and 310 | waiver of all liability. 311 | 312 | 313 | Section 6 -- Term and Termination. 314 | 315 | a. This Public License applies for the term of the Copyright and 316 | Similar Rights licensed here. However, if You fail to comply with 317 | this Public License, then Your rights under this Public License 318 | terminate automatically. 319 | 320 | b. Where Your right to use the Licensed Material has terminated under 321 | Section 6(a), it reinstates: 322 | 323 | 1. automatically as of the date the violation is cured, provided 324 | it is cured within 30 days of Your discovery of the 325 | violation; or 326 | 327 | 2. upon express reinstatement by the Licensor. 328 | 329 | For the avoidance of doubt, this Section 6(b) does not affect any 330 | right the Licensor may have to seek remedies for Your violations 331 | of this Public License. 332 | 333 | c. For the avoidance of doubt, the Licensor may also offer the 334 | Licensed Material under separate terms or conditions or stop 335 | distributing the Licensed Material at any time; however, doing so 336 | will not terminate this Public License. 337 | 338 | d. Sections 1, 5, 6, 7, and 8 survive termination of this Public 339 | License. 340 | 341 | 342 | Section 7 -- Other Terms and Conditions. 343 | 344 | a. The Licensor shall not be bound by any additional or different 345 | terms or conditions communicated by You unless expressly agreed. 346 | 347 | b. Any arrangements, understandings, or agreements regarding the 348 | Licensed Material not stated herein are separate from and 349 | independent of the terms and conditions of this Public License. 350 | 351 | 352 | Section 8 -- Interpretation. 353 | 354 | a. For the avoidance of doubt, this Public License does not, and 355 | shall not be interpreted to, reduce, limit, restrict, or impose 356 | conditions on any use of the Licensed Material that could lawfully 357 | be made without permission under this Public License. 358 | 359 | b. To the extent possible, if any provision of this Public License is 360 | deemed unenforceable, it shall be automatically reformed to the 361 | minimum extent necessary to make it enforceable. If the provision 362 | cannot be reformed, it shall be severed from this Public License 363 | without affecting the enforceability of the remaining terms and 364 | conditions. 365 | 366 | c. No term or condition of this Public License will be waived and no 367 | failure to comply consented to unless expressly agreed to by the 368 | Licensor. 369 | 370 | d. Nothing in this Public License constitutes or may be interpreted 371 | as a limitation upon, or waiver of, any privileges and immunities 372 | that apply to the Licensor or You, including from the legal 373 | processes of any jurisdiction or authority. 374 | 375 | 376 | ======================================================================= 377 | 378 | Creative Commons is not a party to its public licenses. 379 | Notwithstanding, Creative Commons may elect to apply one of its public 380 | licenses to material it publishes and in those instances will be 381 | considered the “Licensor.” The text of the Creative Commons public 382 | licenses is dedicated to the public domain under the CC0 Public Domain 383 | Dedication. Except for the limited purpose of indicating that material 384 | is shared under a Creative Commons public license or as otherwise 385 | permitted by the Creative Commons policies published at 386 | creativecommons.org/policies, Creative Commons does not authorize the 387 | use of the trademark "Creative Commons" or any other trademark or logo 388 | of Creative Commons without its prior written consent including, 389 | without limitation, in connection with any unauthorized modifications 390 | to any of its public licenses or any other arrangements, 391 | understandings, or agreements concerning use of licensed material. For 392 | the avoidance of doubt, this paragraph does not form part of the public 393 | licenses. 394 | 395 | Creative Commons may be contacted at creativecommons.org. 396 | 397 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ROOT course for students 2 | [![SWAN](https://swan.web.cern.ch/sites/swan.web.cern.ch/files/pictures/open_in_swan.svg)](https://cern.ch/swanserver/cgi-bin/go?projurl=https://github.com/root-project/student-course.git) 3 | [![Github Codespace](https://img.shields.io/badge/open-GH_Codespaces-blue?logo=github)](https://codespaces.new/root-project/student-course?quickstart=1) 4 | [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/root-project/student-course/main) 5 | 6 | This course targets young scientists and engineers with little or no experience 7 | of ROOT. The aim of the course is to reach a solid understanding of the tools 8 | available to conduct data analysis, processing and visualization. 9 | 10 | In the `course/notebooks/core` directory you will find an overview of ROOT 11 | and six lessons which explain the most fundamental features and functionalities of ROOT. 12 | In the `course/exercises/core` you will find the exercises that you should attempt 13 | after going through the notebooks with the course material. The course is written in 14 | python and it doesn't require much of the C++ knowledge. 15 | 16 | The in-person course based on the material in this repository was recorded during the summer of 2024. The recording is available [here](https://videos.cern.ch/record/2300516). We encourage you to watch the video and follow along with the notebooks and exercises. 17 | 18 | If you want to know a bit more and attempt a few more exercises, including those written 19 | in C++, you can first go through some [extra slides](https://docs.google.com/presentation/d/1iNSwzuhmhJAmU3c1_0SfYgqbr-N7BKILpcAJjojXkSg/edit?usp=sharing) and then go to the `course/exercises/extra` 20 | section. 21 | 22 | Enjoy! And in case of any issues, don't hesitate to ask on our [forum](https://root-forum.cern.ch). 23 | -------------------------------------------------------------------------------- /apt.txt: -------------------------------------------------------------------------------- 1 | openjdk-11-jre-headless 2 | -------------------------------------------------------------------------------- /course/data/collections_dataset.root: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/root-project/student-course/3c1ed0e07c063079579237495db413a927ac1e4d/course/data/collections_dataset.root -------------------------------------------------------------------------------- /course/data/example_file.root: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/root-project/student-course/3c1ed0e07c063079579237495db413a927ac1e4d/course/data/example_file.root -------------------------------------------------------------------------------- /course/exercises/core/rdataframe-basics.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Exercise: RDataFrame basics" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "The file `example_file.root` in the `data` directory contains a `TTree` dataset (called `\"dataset\"`) with two scalar columns `a` and `b`.
\n", 15 | "There is a normal distribution hidden in there, but to display it you have to plot the natural logarithm of `a` only for entries in which `b <= 0.5`.\n", 16 | "\n", 17 | "### Useful links\n", 18 | "\n", 19 | "- [RDataFrame class reference](https://root.cern/doc/master/classROOT_1_1RDataFrame.html)\n", 20 | "- [RDataFrame tutorials](https://root.cern.ch/doc/master/group__tutorial__dataframe.html)" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": null, 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "import ROOT" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "metadata": {}, 35 | "source": [ 36 | "
Click on this text to display the solution (but try yourself first!)\n", 37 | " \n", 38 | "
\n",
39 |     "%jsroot on\n",
40 |     "df = ROOT.RDataFrame(\"dataset\", \"../../data/example_file.root\")\n",
41 |     "h = df.Filter(\"b <= 0.5\").Define(\"loga\", \"TMath::Log(a)\").Histo1D((\"h\",\"A beautiful histo\", 30, -5, 5), \"loga\")\n",
42 |     "c = ROOT.TCanvas()\n",
43 |     "h.Draw()\n",
44 |     "c.Draw()\n",
45 |     "
\n", 46 | "
" 47 | ] 48 | } 49 | ], 50 | "metadata": { 51 | "kernelspec": { 52 | "display_name": "Python 3 (ipykernel)", 53 | "language": "python", 54 | "name": "python3" 55 | }, 56 | "language_info": { 57 | "codemirror_mode": { 58 | "name": "ipython", 59 | "version": 3 60 | }, 61 | "file_extension": ".py", 62 | "mimetype": "text/x-python", 63 | "name": "python", 64 | "nbconvert_exporter": "python", 65 | "pygments_lexer": "ipython3", 66 | "version": "3.9.6" 67 | } 68 | }, 69 | "nbformat": 4, 70 | "nbformat_minor": 4 71 | } 72 | -------------------------------------------------------------------------------- /course/exercises/core/rdataframe-collections.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "41ca9b9e", 6 | "metadata": {}, 7 | "source": [ 8 | "# Exercise: RDataFrame and collections" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "c5657ad2", 14 | "metadata": {}, 15 | "source": [ 16 | "The same dataset used in the previous exercise (`TTree` \"dataset\" in file `example_file.root`) contains two vector columns `vec1` and `vec2`.
\n", 17 | "The elements of `vec1` are distributed as a mixture of Gaussians, plus background noise.
\n", 18 | "To remove the background noise, select the elements of `vec1` for which the square of the corresponding elements in `vec2` is less than 0.1.\n", 19 | "\n", 20 | "Where are the peaks of this mixture of Gaussians?\n", 21 | "\n", 22 | "### Useful links\n", 23 | "\n", 24 | "- [RDataFrame: working with collections](https://root.cern/doc/master/classROOT_1_1RDataFrame.html#collections)\n", 25 | "- [RVec reference](https://root.cern/doc/master/classROOT_1_1VecOps_1_1RVec.html)" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": null, 31 | "id": "4dea17f9", 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "import ROOT" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "id": "32fad902", 41 | "metadata": {}, 42 | "source": [ 43 | "
Click on this text to display the solution (but try yourself first!)\n", 44 | " \n", 45 | "
\n",
46 |     "%jsroot on\n",
47 |     "df = ROOT.RDataFrame(\"dataset\", \"../../data/example_file.root\")\n",
48 |     "h = df.Define(\"v1_to_plot\", \"vec1[vec2*vec2 < 0.1]\").Histo1D(\"v1_to_plot\")\n",
49 |     "c = ROOT.TCanvas()\n",
50 |     "h.Draw()\n",
51 |     "c.Draw()\n",
52 |     "
\n", 53 | "
" 54 | ] 55 | } 56 | ], 57 | "metadata": { 58 | "kernelspec": { 59 | "display_name": "Python 3 (ipykernel)", 60 | "language": "python", 61 | "name": "python3" 62 | }, 63 | "language_info": { 64 | "codemirror_mode": { 65 | "name": "ipython", 66 | "version": 3 67 | }, 68 | "file_extension": ".py", 69 | "mimetype": "text/x-python", 70 | "name": "python", 71 | "nbconvert_exporter": "python", 72 | "pygments_lexer": "ipython3", 73 | "version": "3.9.6" 74 | } 75 | }, 76 | "nbformat": 4, 77 | "nbformat_minor": 5 78 | } 79 | -------------------------------------------------------------------------------- /course/exercises/extra/00_C++_Interpreter/myMacro.C: -------------------------------------------------------------------------------- 1 | 2 | void myMacro () 3 | { 4 | // WRITE HERE YOUR OWN CODE 5 | // like std::cout << "Result is: ..." << std::endl; 6 | } 7 | -------------------------------------------------------------------------------- /course/exercises/extra/00_C++_Interpreter/readme.md: -------------------------------------------------------------------------------- 1 | # C++ Interpreter Learning Module Exercises 2 | Welcome to the exercises of the **C++ Interpreter** learning module! 3 | 4 | ## Start a ROOT session 5 | Carry out these simple tasks: 6 | - From the terminal, fire up ROOT 7 | - Verify it works as a calculator 8 | - List the files in /etc from within the ROOT prompt 9 | - Inspect the help 10 | - Quit 11 | 12 | ## Complete a simple ROOT macro 13 | Consider the file *myMacro.C*. It is a simple ROOT macro. In this exercise, you 14 | will fill in the code you interactively typed into the interpreter to verify the 15 | value of the geometric series. 16 | Run the macro with ROOT in three ways: 17 | - Directly invoking ROOT as interpreter for it 18 | - Executing it from within the prompt 19 | - Loading it from the prompt and then invoking the *myMacro* function 20 | -------------------------------------------------------------------------------- /course/exercises/extra/01_Histograms_Graphs_Functions/CreateAHistogram.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Create a Histogram\n", 8 | "Create a histogram, fill it with random numbers, set its colour to blue, draw it.\n", 9 | "Can you:\n", 10 | "- Can you use the native Python random number generator for this?\n", 11 | "- Can you make your plot interactive using JSROOT?\n", 12 | "- Can you document what you did in markdown?" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 1, 18 | "metadata": {}, 19 | "outputs": [ 20 | { 21 | "name": "stdout", 22 | "output_type": "stream", 23 | "text": [ 24 | "Welcome to JupyROOT 6.10/06\n" 25 | ] 26 | } 27 | ], 28 | "source": [ 29 | "import ROOT" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": null, 35 | "metadata": { 36 | "collapsed": true 37 | }, 38 | "outputs": [], 39 | "source": [ 40 | "# Your Code Here" 41 | ] 42 | } 43 | ], 44 | "metadata": { 45 | "kernelspec": { 46 | "display_name": "Python 2", 47 | "language": "python", 48 | "name": "python2" 49 | }, 50 | "language_info": { 51 | "codemirror_mode": { 52 | "name": "ipython", 53 | "version": 2 54 | }, 55 | "file_extension": ".py", 56 | "mimetype": "text/x-python", 57 | "name": "python", 58 | "nbconvert_exporter": "python", 59 | "pygments_lexer": "ipython2", 60 | "version": "2.7.13" 61 | } 62 | }, 63 | "nbformat": 4, 64 | "nbformat_minor": 2 65 | } 66 | -------------------------------------------------------------------------------- /course/exercises/extra/01_Histograms_Graphs_Functions/graphDraw.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Interactively Draw a Graph" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import ROOT\n", 17 | "import numpy as np" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": null, 23 | "metadata": { 24 | "collapsed": true 25 | }, 26 | "outputs": [], 27 | "source": [ 28 | "c = ROOT.TCanvas()" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "The simple graph" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [ 44 | "x = np.array([1,2,3,4,5], dtype=float)\n", 45 | "y = x*x\n", 46 | "g = ROOT.TGraph(n=x.size, x=x, y=y)\n", 47 | "g.Draw(\"APL\")\n", 48 | "c.Draw()" 49 | ] 50 | }, 51 | { 52 | "cell_type": "markdown", 53 | "metadata": {}, 54 | "source": [ 55 | "Change marker style, colour as well as line colour and thickness. Make the plot interactive. Re-draw the plot and interact with it!" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": null, 61 | "metadata": {}, 62 | "outputs": [], 63 | "source": [ 64 | "# Here your code" 65 | ] 66 | }, 67 | { 68 | "cell_type": "markdown", 69 | "metadata": {}, 70 | "source": [ 71 | "Now we set the title and the grid on the canvas." 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": null, 77 | "metadata": {}, 78 | "outputs": [], 79 | "source": [ 80 | "# Here your code" 81 | ] 82 | }, 83 | { 84 | "cell_type": "markdown", 85 | "metadata": {}, 86 | "source": [ 87 | "We will now add the symbol of the Calcium isotope" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": null, 93 | "metadata": {}, 94 | "outputs": [], 95 | "source": [ 96 | "# Here your code" 97 | ] 98 | }, 99 | { 100 | "cell_type": "markdown", 101 | "metadata": {}, 102 | "source": [ 103 | "Redraw using a Y axis in log scale." 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": null, 109 | "metadata": {}, 110 | "outputs": [], 111 | "source": [ 112 | "# Here your code" 113 | ] 114 | } 115 | ], 116 | "metadata": { 117 | "kernelspec": { 118 | "display_name": "Python 3", 119 | "language": "python", 120 | "name": "python3" 121 | }, 122 | "language_info": { 123 | "codemirror_mode": { 124 | "name": "ipython", 125 | "version": 3 126 | }, 127 | "file_extension": ".py", 128 | "mimetype": "text/x-python", 129 | "name": "python", 130 | "nbconvert_exporter": "python", 131 | "pygments_lexer": "ipython3", 132 | "version": "3.11.9" 133 | } 134 | }, 135 | "nbformat": 4, 136 | "nbformat_minor": 2 137 | } 138 | -------------------------------------------------------------------------------- /course/exercises/extra/01_Histograms_Graphs_Functions/readme.md: -------------------------------------------------------------------------------- 1 | # Histograms, Graphs and Functions Exercises 2 | 3 | Welcome to the exercises of the **Histograms, Graphs and Functions** learning module! 4 | 5 | There are multiple ways of running ROOT - for example, you can use notebooks or you can create macros. Here you can find exercises and examples using both notebooks and macros. 6 | 7 | ## Notebooks 8 | 9 | For the beginning you can try with the notebooks: 10 | 11 | - [CreateAHistogram](CreateAHistogram.ipynb) 12 | - [graphDraw](graphDraw.ipynb) 13 | 14 | if you are stuck in these exercises, take a look at the provided solutions in the `solutions` folder. For the `CreateAHistogram.ipynb` exercise, the solution is provided both as a python notebook and the C++ notebook for you to see and compare the differences. 15 | 16 | ## Python macros 17 | 18 | Now instead of using notebooks, try writing your own python macros. These are the two different exercises, in case you're stuck the solutions are provided both as ROOT macros, but also as notebooks. 19 | 20 | ### Build a histogram 21 | 22 | In order to complete this exercise about ROOT histograms in PyROOT, please follow these steps: 23 | - Open the Python interpreter. 24 | - Import the ROOT module. 25 | - Create a histogram with 64 bins and an x-axis ranging from 0 to 16. 26 | - Fill it with random numbers distributed according to a linear function (*pol1*). 27 | - Make the line width a bit larger. 28 | - Draw it. 29 | 30 | You can find the solution [here](solutions/HistogramMacro.py). 31 | 32 | ## Build a graph 33 | 34 | In order to complete this exercise about ROOT graphs in PyROOT, please follow these steps: 35 | - Create a new Python module. 36 | - In the module, create a graph (*TGraph*). 37 | - Set its title to *My graph*, its X axis title to *myX* and Y axis title to *myY*. 38 | - Fill it with three points: (1,0), (2,3), (3,4). 39 | - Set a red full square marker. 40 | - Draw an orange line between points. 41 | 42 | You can find the solution [here](solutions/GraphMacro.py). 43 | 44 | ## Cpp macros 45 | 46 | The next step is writing your own cpp macros. You have 3 different exercises here, again, in case you're stuck, the solutions are provided both as ROOT macros, but also as notebooks. 47 | 48 | ### Build a histogram 49 | 50 | Create a macro which builds and draws a histogram with the following features: 51 | - The number of bins is 50 and the x axis ranges from 0 to 10. 52 | - It is filled with random numbers distributed according to an exponential distribution which has a rate = 0.5. 53 | Suggestion: see the [TRandom](https://root.cern.ch/doc/master/classTRandom.html) for generating random numbers or 54 | [TH1::FillRandom](https://root.cern.ch/doc/master/classTH1.html#random-numbers) 55 | - Its line width is thicker than the default one. 56 | 57 | You can find the solution as C++ macro [SimpleHistogram.C](solutions/SimpleHistogram.C) and [notebook](solutions/SimpleHistogram.ipynb). 58 | 59 | ### Build a graph 60 | Create a macro which builds and draws a graph with the following features: 61 | - The title of the plot is *My graph*. 62 | - The x and y axis have labels `my_{X}` and `my_{Y}` respectively. 63 | - It has three points with the following coordinates (1,0), (2,3), (3,4). 64 | - The marker is a full square. Its colour is red. 65 | - An orange line joins the points. 66 | 67 | You can find the solution as C++ macro [SimpleGraph.C](solutions/SimpleGraph.C) and as [notebook](solutions/SimpleGraph.ipynb). 68 | 69 | ### Build a function 70 | Create a macro that follows these steps: 71 | - Create a function with formula *cos(x)* and draw it. 72 | - Create another *cos(x)*, but scale the argument of the cosine by adding a parameter. 73 | - Set a value for the parameter. 74 | - Change the line color of the second function. 75 | - Draw the second function in the same canvas as the first one. 76 | 77 | You can find the solution as C++ macro [SimpleFunction.C](solutions/SimpleFunction.C) and as [notebook](solutions/SimpleFunction.ipynb). 78 | -------------------------------------------------------------------------------- /course/exercises/extra/01_Histograms_Graphs_Functions/solutions/GraphMacro.py: -------------------------------------------------------------------------------- 1 | import ROOT 2 | import numpy as np 3 | 4 | # Create a graph with three points: (1,0), (2,3), (3,4) 5 | x = np.array([1,2,3], dtype=float) 6 | y = np.array([0,3,4], dtype=float) 7 | g = ROOT.TGraph(n=x.size, x=x, y=y) 8 | 9 | # Set its title to My graph, its X axis title to myX and Y axis title to myY 10 | g.SetTitle("My graph;myX;myY") 11 | 12 | # Set a red full square marker 13 | g.SetMarkerStyle(ROOT.kFullSquare) 14 | g.SetMarkerColor(ROOT.kRed) 15 | 16 | # Draw an orange line between points 17 | g.SetLineColor(ROOT.kOrange) 18 | 19 | # Draw the graph 20 | # "A": draw the axis 21 | # "P": draw the points 22 | # "L": draw graph as a simple line 23 | g.Draw("APL") 24 | -------------------------------------------------------------------------------- /course/exercises/extra/01_Histograms_Graphs_Functions/solutions/HistogramMacro.py: -------------------------------------------------------------------------------- 1 | # Import the ROOT module 2 | import ROOT 3 | 4 | # Create a histogram with 64 bins and a axis ranging from 0 to 16 5 | h = ROOT.TH1F("myPyHisto", "Productivity;Python Knowledge;Productivity", 64, 0, 16) 6 | 7 | # Fill it with random numbers distributed according to a linear function ("pol1") 8 | h.FillRandom("pol1") 9 | 10 | # Change its line width with a thicker one 11 | h.SetLineWidth(4) 12 | 13 | # Draw it! 14 | h.Draw() 15 | -------------------------------------------------------------------------------- /course/exercises/extra/01_Histograms_Graphs_Functions/solutions/SimpleFunction.C: -------------------------------------------------------------------------------- 1 | 2 | void SimpleFunction() 3 | { 4 | // Create a function with formula "cos(x)" and draw it 5 | auto f1 = new TF1("f1", "cos(x)", 0., 10.); 6 | f1->Draw(); 7 | 8 | // Create another function based on "cos(x)" with a parameter 9 | auto f2 = new TF1("f2", "cos(x/[0])", 0., 10.); 10 | f2->SetParameter(0, 2); 11 | f2->SetLineColor(kBlue); 12 | f2->Draw("Same"); 13 | } 14 | -------------------------------------------------------------------------------- /course/exercises/extra/01_Histograms_Graphs_Functions/solutions/SimpleFunction.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Example of a Function \n", 8 | "\n", 9 | "- Create a function with formula *cos(x)* and draw it.\n", 10 | "- Create another *cos(x)*, but scale the argument of the cosine by adding a parameter.\n", 11 | "- Set a value for the parameter.\n", 12 | "- Change the line color of the second function.\n", 13 | "- Draw the second function in the same canvas as the first one." 14 | ] 15 | }, 16 | { 17 | "cell_type": "markdown", 18 | "metadata": {}, 19 | "source": [ 20 | "We now create the TF1 class with formula *cos(x)*" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": null, 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "auto f1 = new TF1(\"f1\", \"cos(x)\", 0., 10.);" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "metadata": {}, 35 | "source": [ 36 | "We draw now the function." 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": null, 42 | "metadata": {}, 43 | "outputs": [], 44 | "source": [ 45 | "f1->Draw();\n", 46 | "gPad->Draw();" 47 | ] 48 | }, 49 | { 50 | "cell_type": "markdown", 51 | "metadata": {}, 52 | "source": [ 53 | "Create another function based on *cos(x)* with a parameter" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": null, 59 | "metadata": {}, 60 | "outputs": [], 61 | "source": [ 62 | "auto f2 = new TF1(\"f2\", \"cos(x/[0])\", 0., 10.);" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": null, 68 | "metadata": {}, 69 | "outputs": [], 70 | "source": [ 71 | "f2->SetParameter(0, 2);\n", 72 | "f2->SetLineColor(kBlue);" 73 | ] 74 | }, 75 | { 76 | "cell_type": "markdown", 77 | "metadata": {}, 78 | "source": [ 79 | "We draw the second function in the same Pad as the first one" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": null, 85 | "metadata": {}, 86 | "outputs": [], 87 | "source": [ 88 | "f2->Draw(\"SAME\");\n", 89 | "gPad->Draw();" 90 | ] 91 | } 92 | ], 93 | "metadata": { 94 | "kernelspec": { 95 | "display_name": "ROOT C++", 96 | "language": "c++", 97 | "name": "root" 98 | }, 99 | "language_info": { 100 | "codemirror_mode": "text/x-c++src", 101 | "file_extension": ".C", 102 | "mimetype": " text/x-c++src", 103 | "name": "c++" 104 | } 105 | }, 106 | "nbformat": 4, 107 | "nbformat_minor": 2 108 | } 109 | -------------------------------------------------------------------------------- /course/exercises/extra/01_Histograms_Graphs_Functions/solutions/SimpleGraph.C: -------------------------------------------------------------------------------- 1 | void SimpleGraph() 2 | { 3 | // Create the graph on the heap (with 'new') so that it survives the end of the macro 4 | auto g = new TGraph(); 5 | 6 | // Set the points 7 | std::initializer_list> points = {{1,0}, {2,3}, {3,4}}; 8 | int i = 0; 9 | for(auto &&point : points) { 10 | g->SetPoint(i++, point.first, point.second); 11 | } 12 | 13 | // Set the style 14 | g->SetTitle("My graph;my_{X};my_{Y}"); 15 | g->SetLineColor(kOrange); 16 | g->SetLineWidth(2); 17 | g->SetMarkerStyle(kFullSquare); 18 | g->SetMarkerColor(kRed); 19 | 20 | // Draw! 21 | g->Draw("APL"); 22 | } 23 | -------------------------------------------------------------------------------- /course/exercises/extra/01_Histograms_Graphs_Functions/solutions/SimpleGraph.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Building a Graph \n", 8 | "\n", 9 | "Create a graph with the following features: \n", 10 | "- The title of the plot is My graph.\n", 11 | "- The x and y axis have labels my_{X} and my_{Y} respectively.\n", 12 | "- It has three points with the following coordinates (1,0), (2,3), (3,4).\n", 13 | "- The marker is a full square. Its coulour is red.\n", 14 | "- An orange line joins the points." 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "We now create the TGraph class " 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "auto g = new TGraph();" 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": [ 37 | "We fill now the Graph with the points" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": null, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "double x[] = {1,2,3};\n", 47 | "double y[] = {0,3,4};" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": null, 53 | "metadata": {}, 54 | "outputs": [], 55 | "source": [ 56 | "for (int i = 0; i < 3; i++)\n", 57 | " g->SetPoint(i,x[i],y[i]);" 58 | ] 59 | }, 60 | { 61 | "cell_type": "markdown", 62 | "metadata": {}, 63 | "source": [ 64 | "Before drawing it we set its graphic style" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": null, 70 | "metadata": {}, 71 | "outputs": [], 72 | "source": [ 73 | "g->SetTitle(\"My graph;my_{X};my_{Y}\");\n", 74 | "g->SetLineColor(kOrange);\n", 75 | "g->SetLineWidth(2);\n", 76 | "g->SetMarkerStyle(kFullSquare);\n", 77 | "g->SetMarkerColor(kRed);" 78 | ] 79 | }, 80 | { 81 | "cell_type": "markdown", 82 | "metadata": {}, 83 | "source": [ 84 | "We draw the Graph in a ROOT canvas using options: \n", 85 | "- `\"A\"` to draw the axis\n", 86 | "- `\"P\"` to draw the points\n", 87 | "- `\"L\"` to connect the points with a line\n", 88 | "\n", 89 | "See [TGraphPainter](https://root.cern.ch/doc/master/classTGraphPainter.html) for all the drawing options" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": null, 95 | "metadata": {}, 96 | "outputs": [], 97 | "source": [ 98 | "auto c = new TCanvas();\n", 99 | "g->Draw(\"APL\");\n", 100 | "c->Draw();" 101 | ] 102 | }, 103 | { 104 | "cell_type": "markdown", 105 | "metadata": {}, 106 | "source": [ 107 | "Change marker style, colour as well as line colour and thickness. Make the plot interactive. Re-draw the plot and interact with it!" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": null, 113 | "metadata": {}, 114 | "outputs": [], 115 | "source": [ 116 | "g->SetMarkerStyle(kFullCircle);\n", 117 | "g->SetMarkerColor(kBlue);\n", 118 | "g->SetLineColor(kGreen);" 119 | ] 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "metadata": { 124 | "collapsed": true 125 | }, 126 | "source": [ 127 | "Now we set also the grid on the canvas." 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": null, 133 | "metadata": {}, 134 | "outputs": [], 135 | "source": [ 136 | "c->SetGrid();\n", 137 | "c->Draw();" 138 | ] 139 | } 140 | ], 141 | "metadata": { 142 | "kernelspec": { 143 | "display_name": "ROOT C++", 144 | "language": "c++", 145 | "name": "root" 146 | }, 147 | "language_info": { 148 | "codemirror_mode": "text/x-c++src", 149 | "file_extension": ".C", 150 | "mimetype": " text/x-c++src", 151 | "name": "c++" 152 | } 153 | }, 154 | "nbformat": 4, 155 | "nbformat_minor": 2 156 | } 157 | -------------------------------------------------------------------------------- /course/exercises/extra/01_Histograms_Graphs_Functions/solutions/SimpleHistogram.C: -------------------------------------------------------------------------------- 1 | void SimpleHistogram() 2 | { 3 | // Create a histogram with 50 bins and an x axis ranging from 0 to 10 4 | auto h = new TH1D("myHisto", "Exponential distribution; Observed events;time", 50, 0, 10); 5 | 6 | // Fill it with random numbers distributed according to an Exponential distribution 7 | // Use the global ROOT TRandom instance for generating random numbers 8 | double rate = 0.5; 9 | for (int i = 0; i < 1000; i++) 10 | h->Fill(gRandom->Exp(1./rate)); 11 | 12 | // Change its line width with a thicker one 13 | h->SetLineWidth(3); 14 | 15 | // Draw! 16 | h->Draw(); 17 | } 18 | -------------------------------------------------------------------------------- /course/exercises/extra/01_Histograms_Graphs_Functions/solutions/SimpleHistogram.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Create a Simple Histogram - C++ version\n", 8 | "\n", 9 | "builds and draws a histogram with the following features:\n", 10 | "\n", 11 | "- The number of bins is 50 and the x axis ranges from 0 to 10.\n", 12 | "- It is filled with random numbers distributed according to an exponential distribution which has a rate = 0.5.\n", 13 | "- Its line width is thicker than the default one.\n" 14 | ] 15 | }, 16 | { 17 | "cell_type": "markdown", 18 | "metadata": {}, 19 | "source": [ 20 | "We now create our histogram" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": null, 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "TH1D h(\"myHisto\", \"Exponential distribution; time; Observed events\", 50, 0, 10);" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "metadata": {}, 35 | "source": [ 36 | "We use the ROOT random number generator, e.g TRandom3\n", 37 | "See https://root.cern.ch/doc/master/classTRandom.html for the list of available PRNG in ROOT and the available functions to generate random numbers according to some specific distributions.\n", 38 | "\n", 39 | "Note that the parameter to pass to `TRandom::Exp` is the exponetial decay parameter, the inverse of the exponential rate." 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": null, 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "TRandom3 r(111);\n", 49 | "double rate = 0.5;\n", 50 | "double tau = 1./rate;\n", 51 | "for (int i = 0; i < 1000; i++)\n", 52 | " h.Fill(r.Exp(tau));" 53 | ] 54 | }, 55 | { 56 | "cell_type": "markdown", 57 | "metadata": {}, 58 | "source": [ 59 | "Time for styling the histogram setting a thicker line and use jsroot" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": null, 65 | "metadata": {}, 66 | "outputs": [], 67 | "source": [ 68 | "%jsroot on\n", 69 | "h.SetLineWidth(3);\n", 70 | "TCanvas c;\n", 71 | "h.Draw();\n", 72 | "c.Draw();" 73 | ] 74 | }, 75 | { 76 | "cell_type": "markdown", 77 | "metadata": { 78 | "collapsed": true 79 | }, 80 | "source": [ 81 | "We can also fill the histogram with blue color.\n", 82 | "We don't need to re-draw the histogram. It is enough to call `TCanvas::Draw()`" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": null, 88 | "metadata": {}, 89 | "outputs": [], 90 | "source": [ 91 | "h.SetFillColor(kBlue);\n", 92 | "c.Draw();" 93 | ] 94 | } 95 | ], 96 | "metadata": { 97 | "kernelspec": { 98 | "display_name": "ROOT C++", 99 | "language": "c++", 100 | "name": "root" 101 | }, 102 | "language_info": { 103 | "codemirror_mode": "text/x-c++src", 104 | "file_extension": ".C", 105 | "mimetype": " text/x-c++src", 106 | "name": "c++" 107 | } 108 | }, 109 | "nbformat": 4, 110 | "nbformat_minor": 2 111 | } 112 | -------------------------------------------------------------------------------- /course/exercises/extra/01_Histograms_Graphs_Functions/solutions/graphDraw_Solution.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Interactively Draw a Graph" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import ROOT\n", 17 | "import numpy as np" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": null, 23 | "metadata": { 24 | "collapsed": true 25 | }, 26 | "outputs": [], 27 | "source": [ 28 | "c = ROOT.TCanvas()" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "The simple graph" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [ 44 | "x = np.array([1,2,3,4,5], dtype=float)\n", 45 | "y = x*x\n", 46 | "g = ROOT.TGraph(n=x.size, x=x, y=y)\n", 47 | "g.Draw(\"APL\")\n", 48 | "c.Draw()" 49 | ] 50 | }, 51 | { 52 | "cell_type": "markdown", 53 | "metadata": {}, 54 | "source": [ 55 | "Change marker style, colour as well as line colour and thickness. Make the plot interactive. Re-draw the plot and interact with it!" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": null, 61 | "metadata": {}, 62 | "outputs": [], 63 | "source": [ 64 | "%jsroot on\n", 65 | "g.SetMarkerStyle(ROOT.kFullTriangleUp)\n", 66 | "g.SetMarkerSize(3)\n", 67 | "g.SetMarkerColor(ROOT.kAzure)\n", 68 | "g.SetLineColor(ROOT.kRed - 2)\n", 69 | "g.SetLineWidth(2)\n", 70 | "g.SetLineStyle(3)\n", 71 | "c.Draw()" 72 | ] 73 | }, 74 | { 75 | "cell_type": "markdown", 76 | "metadata": {}, 77 | "source": [ 78 | "Now we set the title and the grid on the canvas." 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": null, 84 | "metadata": {}, 85 | "outputs": [], 86 | "source": [ 87 | "g.SetTitle(\"My Graph;The X;My Y\")\n", 88 | "c.SetGrid()\n", 89 | "c.Draw()" 90 | ] 91 | }, 92 | { 93 | "cell_type": "markdown", 94 | "metadata": {}, 95 | "source": [ 96 | "We will now add the symbol of the Calcium isotope" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": null, 102 | "metadata": {}, 103 | "outputs": [], 104 | "source": [ 105 | "txt = \"#color[804]{My text #mu {}^{40}_{20}Ca}\"\n", 106 | "l = ROOT.TLatex(.2, 10, txt)\n", 107 | "l.Draw() \n", 108 | "c.Draw()" 109 | ] 110 | }, 111 | { 112 | "cell_type": "markdown", 113 | "metadata": {}, 114 | "source": [ 115 | "Redraw using a Y axis in log scale." 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": null, 121 | "metadata": {}, 122 | "outputs": [], 123 | "source": [ 124 | "c.SetLogy()\n", 125 | "c.Draw()" 126 | ] 127 | } 128 | ], 129 | "metadata": { 130 | "kernelspec": { 131 | "display_name": "Python 3", 132 | "language": "python", 133 | "name": "python3" 134 | }, 135 | "language_info": { 136 | "codemirror_mode": { 137 | "name": "ipython", 138 | "version": 3 139 | }, 140 | "file_extension": ".py", 141 | "mimetype": "text/x-python", 142 | "name": "python", 143 | "nbconvert_exporter": "python", 144 | "pygments_lexer": "ipython3", 145 | "version": "3.11.9" 146 | } 147 | }, 148 | "nbformat": 4, 149 | "nbformat_minor": 2 150 | } 151 | -------------------------------------------------------------------------------- /course/exercises/extra/02_Fitting/macros/correlatedParameters.C: -------------------------------------------------------------------------------- 1 | // Demonstrate how a poorly designed model can lead to inconsistent results 2 | 3 | { 4 | // We fill a histogram with exponentially distributed random numbers 5 | TRandom3 rndm(1); 6 | TH1D h("he","Exponential Distribution", 64, 0, 16); 7 | for (auto i : ROOT::TSeqI(256)) { //TSeqI generates in integer sequence 8 | h.Fill(rndm.Exp(1)); 9 | } 10 | 11 | // Now we prepare a fit model 12 | TF1 f("expo", "[A] * exp(-([B] + x))"); 13 | f.SetParameter(0, 1); 14 | f.SetParameter(1, 1); 15 | 16 | auto fitRes = h.Fit(&f, "S"); 17 | 18 | fitRes->GetCorrelationMatrix().Print(); 19 | 20 | std::cout << "Question:\n\tWhy is there such a high correlation between the parameters A and B?" << std::endl; 21 | } 22 | -------------------------------------------------------------------------------- /course/exercises/extra/02_Fitting/macros/firstFit.C: -------------------------------------------------------------------------------- 1 | // This macro allows us to study the result of a fit 2 | { 3 | 4 | // -------------------------------------------------------------------------- 5 | // Preparation of the histogram to fit 6 | 7 | // Create First an empty histogram with 50 bins with range [-10,10] 8 | TH1D h1("h1", "h1", 50, -10, 10); 9 | 10 | // Fill the histogram with 10000 Gaussian Random numbers with mean=1 and 11 | // sigma=2 12 | // Hint: Find the TRandom3 documentation. 13 | // Hint 2: If you don't find what you are looking for, check also the TRandom functions on TRandom3's 14 | // documentation 15 | TRandom3 randomGenerator(1); 16 | Your code here 17 | 18 | // Let's now draw the histogram 19 | Your code here 20 | 21 | // -------------------------------------------------------------------------- 22 | // Before Fitting we need to create the fitting function and set its initial 23 | // parameter values. 24 | 25 | auto f1 = new TF1("f1", "gaus", -10, 26 | 10); // We use new to have this surviving the scope 27 | f1->SetParameters(100, 0, 1); 28 | 29 | // We fit now the histogram using the Fit method in ROOT. By default the 30 | // least-square method is used. For likelihood fits we need to use the option 31 | // "L". The option "S" is used to create a TFitResult object that is returned 32 | // to the user. If we want to compute the error using MINOS, we use the "E" 33 | // option We want to change also the default Minimization engine. We will use 34 | // Minuit2 35 | 36 | ROOT::Math::MinimizerOptions::SetDefaultMinimizer("Minuit2"); 37 | 38 | auto res = h1.Fit(f1, "L S E"); 39 | 40 | // print the result 41 | Your code here 42 | 43 | // We now get the correlation matrix of the fit from the TFitResult object and print it 44 | Your code here 45 | 46 | 47 | std::cout << "Gaussian sigma = " << f1->GetParameter("Sigma") << " +/- " 48 | << f1->GetParError(f1->GetParNumber("Sigma")) << std::endl; 49 | 50 | // Gaussian sigma = 2.08778 +/- 0.0 51 | } 52 | -------------------------------------------------------------------------------- /course/exercises/extra/02_Fitting/macros/fitPanel.C: -------------------------------------------------------------------------------- 1 | { 2 | 3 | TH1F h("myHisto", "Barely filled histogram", 128, -8, 8); 4 | h.FillRandom("gaus", 128); 5 | h.Draw(); 6 | 7 | std::cout << "Start a fit panel by clicking Tools -> FitPanel" << std::endl; 8 | } 9 | -------------------------------------------------------------------------------- /course/exercises/extra/02_Fitting/notebooks/CentralLimitTheorem.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Exercise: Central Limit Theorem\n", 8 | "\n", 9 | "In this exercise we will show what is the Central Limit Theorem and how it applies " 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "### Part 1: Generate random number according to a uniform distriobution" 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "The first goal is to generate *n* random numbers according to a uniform distribution between [-1,1], fill an histogram and compute the average of the generated numbers.\n", 24 | "Display also the obtained histogram.\n", 25 | "\n", 26 | "Let's start with n = 10" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 1, 32 | "metadata": { 33 | "collapsed": false 34 | }, 35 | "outputs": [], 36 | "source": [ 37 | "int n = 10; \n", 38 | "TRandom3 r(0); // initialize with zero to have a random seed " 39 | ] 40 | }, 41 | { 42 | "cell_type": "markdown", 43 | "metadata": { 44 | "collapsed": true 45 | }, 46 | "source": [ 47 | "Create and book the histogram" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": null, 53 | "metadata": { 54 | "collapsed": true 55 | }, 56 | "outputs": [], 57 | "source": [] 58 | }, 59 | { 60 | "cell_type": "markdown", 61 | "metadata": { 62 | "collapsed": true 63 | }, 64 | "source": [ 65 | "Generate the numbers and fill the histogram.\n", 66 | "You can compute the average directly or let the histogram computing it for you" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": null, 72 | "metadata": { 73 | "collapsed": false 74 | }, 75 | "outputs": [], 76 | "source": [] 77 | }, 78 | { 79 | "cell_type": "markdown", 80 | "metadata": { 81 | "collapsed": false, 82 | "scrolled": true 83 | }, 84 | "source": [ 85 | "Display the histogram and print out the average result" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": null, 91 | "metadata": { 92 | "collapsed": true 93 | }, 94 | "outputs": [], 95 | "source": [] 96 | }, 97 | { 98 | "cell_type": "markdown", 99 | "metadata": {}, 100 | "source": [ 101 | "### Part 2: Study the distribution of the sample mean composed of *n* numbers uniformly distributed\n", 102 | "\n", 103 | "Now repeat many times what has been done before to study the distribution of the average, $\\mu$. The exercise will show that this distribution will converge very quickly to a Gaussian distribution. It is enough to have a very small \n", 104 | "$n$ to get already a pretty good Gaussian. \n", 105 | "For having the sigma of the distribution indipendent on the number of generated events $n$, we will \n", 106 | "make an histogram of $\\sqrt{n} \\times \\mu$." 107 | ] 108 | }, 109 | { 110 | "cell_type": "markdown", 111 | "metadata": {}, 112 | "source": [ 113 | "Do then as following: \n", 114 | "* Make a loop where for each time $n$ uniform numbers are generated and their average $\\mu$ is computed. \n", 115 | "* Make an histogram now of $\\sqrt{n} \\times \\mu$.\n", 116 | "\n", 117 | "Start using a very small $n$ (e.g. $n=3$) but use for the loop, which performs the generation of $n$ numbers, a large value (e.g. $n_{experiments} = 10000$. " 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": 2, 123 | "metadata": { 124 | "collapsed": false 125 | }, 126 | "outputs": [], 127 | "source": [ 128 | "hout = new TH1D(\"h\",\"Distribution of average values\",50,-2,2);\n", 129 | "int nexp = 10000; " 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": 3, 135 | "metadata": { 136 | "collapsed": false 137 | }, 138 | "outputs": [], 139 | "source": [ 140 | "n = 2;\n", 141 | "hout->Reset(); // for running this cell a second time\n", 142 | "for (int i = 0; i < nexp; ++i){ \n", 143 | " // generate n uniform numbers, compute average and fill histogram \n", 144 | "}" 145 | ] 146 | }, 147 | { 148 | "cell_type": "markdown", 149 | "metadata": { 150 | "collapsed": false 151 | }, 152 | "source": [ 153 | "### Part 3: Fit the obtained histogram with a Gaussian function\n", 154 | "\n", 155 | "we perform now a fit with a Gaussian distribution and see how the obtained data agree with the function" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": null, 161 | "metadata": { 162 | "collapsed": true 163 | }, 164 | "outputs": [], 165 | "source": [] 166 | }, 167 | { 168 | "cell_type": "markdown", 169 | "metadata": {}, 170 | "source": [ 171 | "Repeat the operation above by increasing $n$ to a larger value (e.g. $n=10$). For the Central Limit Theorem as $n$ is increased the obtained distribution will converge to a Gaussian. " 172 | ] 173 | }, 174 | { 175 | "cell_type": "markdown", 176 | "metadata": {}, 177 | "source": [ 178 | "Question : What is the computed standard deviation of the distribution when we generate $n$ uniform number between [-1,1] ? \n", 179 | "What will be then the $\\sigma$ if I generate the number between [$-\\sqrt{3},\\sqrt{3}$] ?" 180 | ] 181 | }, 182 | { 183 | "cell_type": "markdown", 184 | "metadata": {}, 185 | "source": [] 186 | } 187 | ], 188 | "metadata": { 189 | "kernelspec": { 190 | "display_name": "ROOT C++", 191 | "language": "c++", 192 | "name": "root" 193 | }, 194 | "language_info": { 195 | "codemirror_mode": "text/x-c++src", 196 | "file_extension": ".C", 197 | "mimetype": " text/x-c++src", 198 | "name": "c++" 199 | } 200 | }, 201 | "nbformat": 4, 202 | "nbformat_minor": 1 203 | } 204 | -------------------------------------------------------------------------------- /course/exercises/extra/02_Fitting/notebooks/GausFit.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Example: Gauss Histogram Fitting \n", 8 | "\n", 9 | "This example shows how to fit an histogram using ROOT " 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "Create First an empty histogram with 64 bins with range [-8,8]" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": null, 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "TH1D h1(\"h1\",\"h1\",64,-8,8);" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "Fill the histogram with 1000 Gaussian Random number with mean=1 and sigma=2" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": null, 38 | "metadata": {}, 39 | "outputs": [], 40 | "source": [ 41 | "for (int i = 0; i < 1000; ++i) {\n", 42 | " h1.Fill(gRandom->Gaus(1,2));\n", 43 | "}" 44 | ] 45 | }, 46 | { 47 | "cell_type": "markdown", 48 | "metadata": {}, 49 | "source": [ 50 | "Note, we could also use the function h1.FillRandom(\"gaus\"), but we need to set the right parameters of the Gaussian function before.\n", 51 | "\n", 52 | "We plot then the histogram (using JSROOT)" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": null, 58 | "metadata": {}, 59 | "outputs": [], 60 | "source": [ 61 | "%jsroot on\n", 62 | "h1.DrawCopy();\n", 63 | "gPad->Draw();" 64 | ] 65 | }, 66 | { 67 | "cell_type": "markdown", 68 | "metadata": {}, 69 | "source": [ 70 | "### Part1: Fit the Histogram with a Gaussian function\n", 71 | "\n", 72 | "Note that before fitting we need to create the fitting function and set its initial parameter values. \n", 73 | "For a Gaussian function the setting of initial values is not really needed, because they can be computed automatically by ROOT. " 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": null, 79 | "metadata": {}, 80 | "outputs": [], 81 | "source": [ 82 | "// declare here some variables which will be used afterwards\n", 83 | "TF1 * f1 = nullptr; \n", 84 | "TFitResultPtr res; " 85 | ] 86 | }, 87 | { 88 | "cell_type": "markdown", 89 | "metadata": {}, 90 | "source": [ 91 | "Create the Gaussian Fit function" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": null, 97 | "metadata": {}, 98 | "outputs": [], 99 | "source": [ 100 | "f1 = new TF1(\"f1\",\"gaus\",-8,8);" 101 | ] 102 | }, 103 | { 104 | "cell_type": "markdown", 105 | "metadata": {}, 106 | "source": [ 107 | "Set the initial parametger values (e.g. Constant = 100, mean = 0, sigma =1)" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": null, 113 | "metadata": {}, 114 | "outputs": [], 115 | "source": [ 116 | "f1->SetParameters(100,0,1); " 117 | ] 118 | }, 119 | { 120 | "cell_type": "markdown", 121 | "metadata": {}, 122 | "source": [ 123 | "Fit now the histogram using the Fit method in ROOT. \n", 124 | "- By default the least-square method is used. \n", 125 | "- For likelihood fits we need to use the option `\"L\"`\n", 126 | "- for Pearson chi-square (expected error) , option `\"P\"`.\n", 127 | "\n", 128 | "Use also option `\"S\"` to create a TFitResult object that is returned to the user.\n", 129 | "To compute the error using MINOS, use the `\"E\"` option\n" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": null, 135 | "metadata": {}, 136 | "outputs": [], 137 | "source": [ 138 | "ROOT::Math::MinimizerOptions::SetDefaultMinimizer(\"Minuit2\");" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": null, 144 | "metadata": {}, 145 | "outputs": [], 146 | "source": [ 147 | "res = h1.Fit(f1, \"S E\");" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": null, 153 | "metadata": {}, 154 | "outputs": [], 155 | "source": [ 156 | "gStyle->SetOptFit(1111);\n", 157 | "gPad->SetLogy(true); // to set log scale in y\n", 158 | "gPad->Draw();" 159 | ] 160 | }, 161 | { 162 | "cell_type": "markdown", 163 | "metadata": {}, 164 | "source": [ 165 | "Print the result of the Fit from the returned TFitResult object" 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": null, 171 | "metadata": {}, 172 | "outputs": [], 173 | "source": [ 174 | "res->Print();" 175 | ] 176 | }, 177 | { 178 | "cell_type": "markdown", 179 | "metadata": {}, 180 | "source": [ 181 | "We can plot the fit residuals using , see https://root.cern.ch/doc/master/classTRatioPlot.html " 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": null, 187 | "metadata": {}, 188 | "outputs": [], 189 | "source": [ 190 | "auto rp1 = new TRatioPlot(&h1);\n", 191 | "rp1->Draw();\n", 192 | "rp1->GetLowerRefYaxis()->SetTitle(\"ratio\");\n", 193 | "rp1->GetUpperRefYaxis()->SetTitle(\"entries\");\n", 194 | "gPad->Draw();" 195 | ] 196 | }, 197 | { 198 | "cell_type": "markdown", 199 | "metadata": {}, 200 | "source": [ 201 | "We can print also the Chisquare value of the fit. \n", 202 | "- if we have done a Neyman Chi2 fit we print the Chi2 computed using the bin values with observed errors (Neyman Chi2)\n", 203 | "- if we have done a Likelihood fit we compute a Chi2 from the Poisson likelihood (see this [paper](https://cds.cern.ch/record/148303?ln=en) from Baker-Cousins).\n", 204 | "\n", 205 | "Note that in the first case the empty bins are not used, while in the second one all bins are used including empty ones. " 206 | ] 207 | }, 208 | { 209 | "cell_type": "code", 210 | "execution_count": null, 211 | "metadata": {}, 212 | "outputs": [], 213 | "source": [ 214 | "//std::cout << \"Neyman chi2 = \" << h1.Chisquare(f1) << \"/ NDF = \" << f1->GetNDF() << std::endl;\n", 215 | "std::cout << \"Poisson likelihood chi2 = \" << h1.Chisquare(f1,\"L\") << \"/ NDF = \" << h1.GetNbinsX()-f1->GetNpar() << std::endl;" 216 | ] 217 | }, 218 | { 219 | "cell_type": "markdown", 220 | "metadata": {}, 221 | "source": [ 222 | "Get the correlation matrix of the fit from the TFitResult class and print it " 223 | ] 224 | }, 225 | { 226 | "cell_type": "code", 227 | "execution_count": null, 228 | "metadata": {}, 229 | "outputs": [], 230 | "source": [ 231 | "auto corrMatrix = res->GetCorrelationMatrix(); \n", 232 | "corrMatrix.Print(); " 233 | ] 234 | }, 235 | { 236 | "cell_type": "code", 237 | "execution_count": null, 238 | "metadata": {}, 239 | "outputs": [], 240 | "source": [ 241 | "std::cout << \"Gaussian sigma = \" << f1->GetParameter(\"Sigma\") << \" +/- \" << f1->GetParError(f1->GetParNumber(\"Sigma\")) << std::endl;" 242 | ] 243 | }, 244 | { 245 | "cell_type": "markdown", 246 | "metadata": {}, 247 | "source": [ 248 | "If we want to access the MINOS asymmetric error, we can get them from the FitResult object" 249 | ] 250 | }, 251 | { 252 | "cell_type": "code", 253 | "execution_count": null, 254 | "metadata": {}, 255 | "outputs": [], 256 | "source": [ 257 | "std::cout << \"Gaussian sigma = \" << res->Parameter(2) << \" + \" << res->UpperError(2) << \" \" << res->LowerError(2) << std::endl;" 258 | ] 259 | } 260 | ], 261 | "metadata": { 262 | "kernelspec": { 263 | "display_name": "ROOT C++", 264 | "language": "c++", 265 | "name": "root" 266 | }, 267 | "language_info": { 268 | "codemirror_mode": "text/x-c++src", 269 | "file_extension": ".C", 270 | "mimetype": " text/x-c++src", 271 | "name": "c++" 272 | } 273 | }, 274 | "nbformat": 4, 275 | "nbformat_minor": 1 276 | } 277 | -------------------------------------------------------------------------------- /course/exercises/extra/02_Fitting/readme.md: -------------------------------------------------------------------------------- 1 | # Fitting and parameter estimation 2 | Welcome to the exercises of the **Fitting and parameter estimation** learning module! 3 | 4 | Since fitting is a complex topic we provide a number of exercises. We know the time at the today's course is limited, so please follow your pace and attempt as many exercises as you can. You can always come back to them later. The section is split into notebook amd macros based exercises. 5 | 6 | ## Notebooks 7 | 8 | ### Simple fitting example 9 | 10 | Fill in the following [notebook](notebooks/FittingExample.ipynb). This notebook shows how to fit an histogram in Python (using PyROOT) with a user defined function (implemented in C++ for better CPU efficiency). The solution is available in the `solutions` folder. 11 | 12 | ### Example of fitting an histogram with a Gaussian function 13 | 14 | The [notebook](notebooks/GausFit.ipynb) shows how to fit an histogram using ROOT with a simple function such as a Gaussian. We show how to use the default chi-square method and the binned likelihood method for fitting histograms. We also show how to plot the fit residual using the TRatioPlot and to compute the chi2 for a goodness of fit test. 15 | 16 | ### Additional examples 17 | 18 | We provide two extra examples for those of you who want to train even more or would like to come back to the fitting module exercises in the future: 19 | - [GausFit_2](notebooks/GausFit_2.ipynb) 20 | - [CentralLimittheorem](notebooks/CentralLimitTheorem.ipynb) 21 | - [HiggsBinFit](notebooks/HiggsBinFit.ipynb) - which shows how we really work with the data, here looking for the Higgs signal events. 22 | 23 | ## Macros 24 | 25 | ### Your First fit with ROOT 26 | The macro [firstFit.C](macros/firstFit.C) is only half-way complete. In the code a fit of a histogram is performed 27 | and the parameters of the fitted model are inspected. 28 | Try to complete the code putting to a good use what you learned during the lectures. 29 | The solution is available in the [firstFit_Solution.C](solutions/firstFit_Solution.C) macro. 30 | 31 | ### Correlation of Parameters 32 | The [correlatedParameters.C](macros/correlatedParameters.C) is a simple macro which fits an exponential to a histogram. It seems that there is a problem. While the fit parameters look ok, the correlation matrix shows something fishy. Can you say what is going wrong and why? 33 | 34 | ### Likelihood and Chi-Square estimators 35 | Run the macro [fitPanel.C](macros/fitPanel.C) with this command 36 | ``` 37 | root -l fitPanel.C 38 | ``` 39 | This leaves a window with a histogram drawn open. The entries are distributed according to a Gauss but the bins are narrow and the number of entries is low. Right click on the histogram line (or with two fingers if you have a mac). Select *FitPanel* to open a fit panel. Now fit the histogram selecting the Chi-Square and Likelihood estimators. 40 | What changes to the parameter values and to their uncertainties? 41 | -------------------------------------------------------------------------------- /course/exercises/extra/02_Fitting/solutions/firstFit_Solution.C: -------------------------------------------------------------------------------- 1 | // This macro allows us to study the result of a fit 2 | { 3 | 4 | // -------------------------------------------------------------------------- 5 | // Preparation of the histogram to fit 6 | 7 | // Create First an empty histogram with 50 bins with range [-10,10] 8 | TH1D h1("h1", "h1", 50, -10, 10); 9 | 10 | // Fill the histogram with 10000 Gaussian Random number with mean=1 and 11 | // sigma=2 12 | // Hint: Find the TRandom3 documentation. 13 | // Hint 2: If you don't find what you are looking for, check also the TRandom functions on TRandom3's 14 | // documentation 15 | TRandom3 randomGenerator(1); 16 | for (int i = 0; i < 1000; ++i) { 17 | h1.Fill(randomGenerator.Gaus(1, 2)); 18 | } 19 | 20 | // Let's now draw the histogram 21 | h1.Draw(); 22 | 23 | // -------------------------------------------------------------------------- 24 | // Before Fitting we need to create the fitting function and set its initial 25 | // parameter values. 26 | 27 | auto f1 = new TF1("f1", "gaus", -10, 28 | 10); // We use new to have this surviving the scope 29 | f1->SetParameters(100, 0, 1); 30 | 31 | // We fit now the histogram using the Fit method in ROOT. By default the 32 | // least-square method is used. For likelihood fits we need to use the option 33 | // "L". The option "S" is used to create a TFitResult object that is returned 34 | // to the user. If we want to compute the error using MINOS, we use the "E" 35 | // option We want to change also the default Minimization engine. We will use 36 | // Minuit2 37 | 38 | ROOT::Math::MinimizerOptions::SetDefaultMinimizer("Minuit2"); 39 | 40 | auto res = h1.Fit(f1, "L S E"); 41 | 42 | res->Print(); 43 | 44 | // **************************************** 45 | // Minimizer is Minuit2 / Migrad 46 | // Chi2 = 35.8313 47 | // NDf = 29 48 | // Edm = 1.53927e-06 49 | // NCalls = 61 50 | // Constant = 74.0249 +/- 3.02719 51 | // Mean = 1.02272 +/- 0.0685141 52 | // Sigma = 2.08778 +/- 0.0545989 (limited) 53 | 54 | // We now get the correlation matrix of the fit from the TFitResult class 55 | 56 | auto corrMatrix = res->GetCorrelationMatrix(); 57 | 58 | corrMatrix.Print(); 59 | 60 | // 3x3 matrix is as follows 61 | 62 | // | 0 | 1 | 2 | 63 | // -------------------------------------------- 64 | // 0 | 1 0.007831 -0.6162 65 | // 1 | 0.007831 1 -0.02075 66 | // 2 | -0.6162 -0.02075 1 67 | 68 | std::cout << "Gaussian sigma = " << f1->GetParameter("Sigma") << " +/- " 69 | << f1->GetParError(f1->GetParNumber("Sigma")) << std::endl; 70 | 71 | // Gaussian sigma = 2.08778 +/- 0.0 72 | } 73 | -------------------------------------------------------------------------------- /course/exercises/extra/03_Working_With_Files/WritingOnFilesExercise.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Writing on files\n", 8 | "This is a Python notebook in which you will practice the concepts learned during the lectures.\n", 9 | "\n", 10 | "## Startup ROOT\n", 11 | "Import the ROOT module: this will activate the integration layer with the notebook automatically" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": null, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "import ROOT" 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": {}, 26 | "source": [ 27 | "## Writing histograms\n", 28 | "Create a **TFile** containing three histograms filled with random numbers distributed according to a Gaus, an exponential and a uniform distribution.\n", 29 | "Close the file: you will reopen it later." 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": null, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "rndm = ROOT.TRandom3(1)\n", 39 | "\n", 40 | "filename = \"histos.root\"\n", 41 | "\n", 42 | "# Here open a file and create three histograms\n", 43 | "\n", 44 | "for i in range(1024):\n", 45 | " # Use the following lines to feed the Fill method of the histograms in order to fill\n", 46 | " rndm.Gaus()\n", 47 | " rndm.Exp(1)\n", 48 | " rndm.Uniform(-4,4)\n", 49 | "\n", 50 | "# Here write the three histograms on the file and close the file \n", 51 | " " 52 | ] 53 | }, 54 | { 55 | "cell_type": "markdown", 56 | "metadata": {}, 57 | "source": [ 58 | "Now, you can invoke the *ls* command from within the notebook to list the files in this directory. Check that the file is there. You can invoke the *rootls* command to see what's inside the file. " 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": null, 64 | "metadata": {}, 65 | "outputs": [], 66 | "source": [ 67 | "! ls .\n", 68 | "! echo Now listing the content of the file\n", 69 | "! rootls -l #filename here" 70 | ] 71 | }, 72 | { 73 | "cell_type": "markdown", 74 | "metadata": {}, 75 | "source": [ 76 | "Access the histograms and draw them in Python. Remember that you need to create a TCanvas before and draw it too in order to inline the plots in the notebooks.\n", 77 | "You can switch to the interactive JavaScript visualisation using the *%jsroot on* \"magic\" command." 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": null, 83 | "metadata": {}, 84 | "outputs": [], 85 | "source": [ 86 | "%jsroot on\n", 87 | "f = ROOT.TFile(filename)\n", 88 | "c = ROOT.TCanvas()\n", 89 | "c.Divide(2,2)\n", 90 | "c.cd(1)\n", 91 | "f.gaus.Draw()\n", 92 | "# finish the drawing in each pad\n", 93 | "# Draw the Canvas" 94 | ] 95 | }, 96 | { 97 | "cell_type": "markdown", 98 | "metadata": {}, 99 | "source": [ 100 | "You can now repeat the exercise above using **C++**. Transform the cell in a **C++** cell using the *%%cpp* \"magic\"." 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": null, 106 | "metadata": {}, 107 | "outputs": [], 108 | "source": [ 109 | "%%cpp\n", 110 | "TFile f(\"histos.root\");\n", 111 | "TH1F *hg, *he, *hu;\n", 112 | "hg = f.Get(\"gaus\");\n", 113 | "// ... read the histograms and draw them in each pad" 114 | ] 115 | }, 116 | { 117 | "cell_type": "markdown", 118 | "metadata": {}, 119 | "source": [ 120 | "## Inspect the content of the file: TXMLFile\n", 121 | "ROOT provides a different kind of **TFile**, **TXMLFile**. It has the same interface and it's very useful to better understand how objects are written in files by ROOT.\n", 122 | "Repeat the exercise above, either on Python or C++ - your choice, using a **TXMLFILE** rather than a **TFile** and then display its content with the *cat* command. Can you see how the content of the individual bins of the histograms is stored? And the colour of its markers?\n", 123 | "Do you understand why the xml file is bigger than the root one even if they have the same content?" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": null, 129 | "metadata": {}, 130 | "outputs": [], 131 | "source": [ 132 | "f = ROOT.TXMLFile(\"histos.xml\",\"RECREATE\")\n", 133 | "\n", 134 | "hg = ROOT.TH1F(\"gaus\",\"Gaussian numbers\", 64, -4, 4)\n", 135 | "he = ROOT.TH1F(\"expo\",\"Exponential numbers\", 64, -4, 4)\n", 136 | "hu = ROOT.TH1F(\"unif\",\"Uniform numbers\", 64, -4, 4)\n", 137 | "for i in range(1024):\n", 138 | " hg.Fill(rndm.Gaus())\n", 139 | " # ... Same as above!" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": null, 145 | "metadata": {}, 146 | "outputs": [], 147 | "source": [ 148 | "! ls -l histos.xml histos.root" 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": null, 154 | "metadata": {}, 155 | "outputs": [], 156 | "source": [ 157 | "! cat histos.xml" 158 | ] 159 | } 160 | ], 161 | "metadata": { 162 | "kernelspec": { 163 | "display_name": "Python 2", 164 | "language": "python", 165 | "name": "python2" 166 | }, 167 | "language_info": { 168 | "codemirror_mode": { 169 | "name": "ipython", 170 | "version": 2 171 | }, 172 | "file_extension": ".py", 173 | "mimetype": "text/x-python", 174 | "name": "python", 175 | "nbconvert_exporter": "python", 176 | "pygments_lexer": "ipython2", 177 | "version": "2.7.15rc1" 178 | } 179 | }, 180 | "nbformat": 4, 181 | "nbformat_minor": 2 182 | } 183 | -------------------------------------------------------------------------------- /course/exercises/extra/03_Working_With_Files/histos.root: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/root-project/student-course/3c1ed0e07c063079579237495db413a927ac1e4d/course/exercises/extra/03_Working_With_Files/histos.root -------------------------------------------------------------------------------- /course/exercises/extra/03_Working_With_Files/readme.md: -------------------------------------------------------------------------------- 1 | # Working With Files Learning Module 2 | Welcome to the exercises of the **Working with Files** Learning Module! 3 | 4 | ## Writing and Reading histograms 5 | This exercise is entirely based on notebooks.The instructions are embedded in the [notebook](WritingOnFilesExercise.ipynb) and your code soon will be as well! The solution is provided in the `solutions` folder. 6 | 7 | ## TBrowser 8 | If you're also using ROOT outside of the notebooks (e.g. you have ROOT installed on your machine or you're using lxplus), TBrowser is a very useful tool that allows to interactively inspect the contents of the ROOT files. 9 | 10 | In the terminal open the `histos.root` file with *root* 11 | ```root histos.root``` 12 | And open a **TBrowser** 13 | ```root[0] TBrowser b``` 14 | Now display the histograms: do they have the same content you saw in the notebook? 15 | -------------------------------------------------------------------------------- /course/exercises/extra/04_RDataFrame/rdataframe-dimuon.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "slideshow": { 7 | "slide_type": "slide" 8 | } 9 | }, 10 | "source": [ 11 | "## ROOT dataframe tutorial: Dimuon spectrum\n", 12 | "\n", 13 | "This tutorial shows you how to analyze datasets using RDataFrame from a Python notebook. The example analysis performs the following steps:\n", 14 | "\n", 15 | "* Connect a ROOT dataframe to a dataset containing 61 mio. events recorded by CMS in 2012\n", 16 | "* Filter the events being relevant for your analysis\n", 17 | "* Compute the invariant mass of the selected dimuon candidates\n", 18 | "* Plot the invariant mass spectrum showing resonances up to the Z mass\n", 19 | "\n", 20 | "This material is based on the analysis done by Stefan Wunsch, available [here](http://opendata.web.cern.ch/record/12342) in CERN's Open Data portal.\n", 21 | "\n", 22 | "
" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": null, 28 | "metadata": { 29 | "slideshow": { 30 | "slide_type": "fragment" 31 | } 32 | }, 33 | "outputs": [], 34 | "source": [ 35 | "import ROOT " 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": { 41 | "slideshow": { 42 | "slide_type": "slide" 43 | } 44 | }, 45 | "source": [ 46 | "## Create a ROOT dataframe in Python\n", 47 | "First we will create a ROOT dataframe that is connected to a dataset named `Events` stored in a ROOT file. The file is pulled in via [XRootD](http://xrootd.org/) from EOS public, but note how it could also be stored in your CERNBox space or in any other EOS repository accessible from SWAN (e.g. the experiment ones).\n", 48 | "\n", 49 | "The dataset Events is a TTree and has the following branches:\n", 50 | "\n", 51 | "| Branch name | Data type | Description |\n", 52 | "|-------------|-----------|-------------|\n", 53 | "| `nMuon` | `unsigned int` | Number of muons in this event |\n", 54 | "| `Muon_pt` | `float[nMuon]` | Transverse momentum of the muons stored as an array of size `nMuon` |\n", 55 | "| `Muon_eta` | `float[nMuon]` | Pseudo-rapidity of the muons stored as an array of size `nMuon` |\n", 56 | "| `Muon_phi` | `float[nMuon]` | Azimuth of the muons stored as an array of size `nMuon` |\n", 57 | "| `Muon_charge` | `int[nMuon]` | Charge of the muons stored as an array of size `nMuon` and either -1 or 1 |\n", 58 | "| `Muon_mass` | `float[nMuon]` | Mass of the muons stored as an array of size `nMuon` |" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": null, 64 | "metadata": { 65 | "slideshow": { 66 | "slide_type": "subslide" 67 | } 68 | }, 69 | "outputs": [], 70 | "source": [ 71 | "treename = \"Events\"\n", 72 | "filename = \"root://eospublic.cern.ch//eos/opendata/cms/derived-data/AOD2NanoAODOutreachTool/Run2012BC_DoubleMuParked_Muons.root\"\n", 73 | "df = ROOT.RDataFrame(treename, filename)" 74 | ] 75 | }, 76 | { 77 | "cell_type": "markdown", 78 | "metadata": { 79 | "slideshow": { 80 | "slide_type": "slide" 81 | } 82 | }, 83 | "source": [ 84 | "## Run only on a part of the dataset\n", 85 | "\n", 86 | "The full dataset contains half a year of CMS data taking in 2012 with 61 mio events. For the purpose of this example, we use the [Range](https://root.cern/doc/master/classROOT_1_1RDF_1_1RInterface.html#a1b36b7868831de2375e061bb06cfc225) node to run only on a small part of the dataset. This feature also comes in handy in the development phase of your analysis.\n", 87 | "\n", 88 | "Feel free to experiment with this parameter!" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": null, 94 | "metadata": {}, 95 | "outputs": [], 96 | "source": [ 97 | "# Take only the first 1M events\n", 98 | "df_range = # do something here" 99 | ] 100 | }, 101 | { 102 | "cell_type": "markdown", 103 | "metadata": { 104 | "slideshow": { 105 | "slide_type": "slide" 106 | } 107 | }, 108 | "source": [ 109 | "## Filter relevant events for this analysis\n", 110 | "\n", 111 | "Physics datasets are often general purpose datasets and therefore need extensive filtering of the events for the actual analysis. Here, we implement only a simple selection based on the number of muons and the charge to cut down the dataset in events that are relevant for our study.\n", 112 | "\n", 113 | "In particular, we are applying two filters to keep:\n", 114 | "1. Events with exactly two muons\n", 115 | "2. Events with muons of opposite charge" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": null, 121 | "metadata": { 122 | "slideshow": { 123 | "slide_type": "subslide" 124 | } 125 | }, 126 | "outputs": [], 127 | "source": [ 128 | "# Change the first strings of both following operations to proper C++ expressions\n", 129 | "# Use the points 1, 2 above as hints for what to write in your expression\n", 130 | "df_2mu = df_range.Filter(\"DO SOMETHING WITH COLUMN nMuon\", \"Events with exactly two muons\")\n", 131 | "df_oc = df_2mu.Filter(\"DO SOMETHING WITH COLUMN Muon_charge\", \"Muons with opposite charge\")" 132 | ] 133 | }, 134 | { 135 | "cell_type": "markdown", 136 | "metadata": { 137 | "slideshow": { 138 | "slide_type": "slide" 139 | } 140 | }, 141 | "source": [ 142 | "## Perform complex operations in Python, efficiently!\n", 143 | "\n", 144 | "Since we still want to perform complex operations in Python but plain Python code is prone to be slow and not thread-safe, you should use as much as possible C++ functions to do the work in your event loop during runtime. This mechanism uses the C++ interpreter `cling ` shipped with ROOT, making this possible in a single line of code.\n", 145 | "\n", 146 | "Note, that we are using here the `Define` node of the computation graph with a jitted function, calling into a function available in the ROOT library." 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": null, 152 | "metadata": { 153 | "slideshow": { 154 | "slide_type": "subslide" 155 | } 156 | }, 157 | "outputs": [], 158 | "source": [ 159 | "df_mass = df_oc.Define(\"Dimuon_mass\", \"ROOT::VecOps::InvariantMass(Muon_pt, Muon_eta, Muon_phi, Muon_mass)\")" 160 | ] 161 | }, 162 | { 163 | "cell_type": "markdown", 164 | "metadata": { 165 | "slideshow": { 166 | "slide_type": "slide" 167 | } 168 | }, 169 | "source": [ 170 | "## Make a histogram of the newly created column" 171 | ] 172 | }, 173 | { 174 | "cell_type": "code", 175 | "execution_count": null, 176 | "metadata": { 177 | "slideshow": { 178 | "slide_type": "fragment" 179 | } 180 | }, 181 | "outputs": [], 182 | "source": [ 183 | "# These are the parameters you would give to a histogram object constructor\n", 184 | "# Put them in the right order inside the parentheses below\n", 185 | "# You are effectively passing a tuple to the `Histo1D` operation as seen previously in other notebooks\n", 186 | "nbins = 30000\n", 187 | "low = 0.25\n", 188 | "up = 300\n", 189 | "histo_name = \"Dimuon_mass\"\n", 190 | "histo_title = histo_name\n", 191 | "\n", 192 | "h = df_mass.Histo1D((\"PUT HISTOGRAM PARAMETERS HERE IN THE CORRECT ORDER\"), \"Dimuon_mass\")" 193 | ] 194 | }, 195 | { 196 | "cell_type": "markdown", 197 | "metadata": { 198 | "slideshow": { 199 | "slide_type": "slide" 200 | } 201 | }, 202 | "source": [ 203 | "## Book a Report of the dataframe filters" 204 | ] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "execution_count": null, 209 | "metadata": { 210 | "slideshow": { 211 | "slide_type": "fragment" 212 | } 213 | }, 214 | "outputs": [], 215 | "source": [ 216 | "report = # your code here" 217 | ] 218 | }, 219 | { 220 | "cell_type": "markdown", 221 | "metadata": { 222 | "slideshow": { 223 | "slide_type": "slide" 224 | } 225 | }, 226 | "source": [ 227 | "## Start data processing\n", 228 | "This is the final step of the analysis: retrieving the result. We are expecting to see a plot of the mass of the dimuon spectrum similar to the one shown at the beginning of this exercise (remember we are running on fewer entries in this exercise). Finally in the last cell we should see a report of the filters applied on the dataset." 229 | ] 230 | }, 231 | { 232 | "cell_type": "code", 233 | "execution_count": null, 234 | "metadata": { 235 | "slideshow": { 236 | "slide_type": "fragment" 237 | } 238 | }, 239 | "outputs": [], 240 | "source": [ 241 | "%%time\n", 242 | "\n", 243 | "ROOT.gStyle.SetOptStat(0)\n", 244 | "ROOT.gStyle.SetTextFont(42)\n", 245 | "c = ROOT.TCanvas(\"c\", \"\", 800, 700)\n", 246 | "c.SetLogx()\n", 247 | "c.SetLogy()\n", 248 | "h.SetTitle(\"\")\n", 249 | "h.GetXaxis().SetTitle(\"m_{#mu#mu} (GeV)\")\n", 250 | "h.GetXaxis().SetTitleSize(0.04)\n", 251 | "h.GetYaxis().SetTitle(\"N_{Events}\")\n", 252 | "h.GetYaxis().SetTitleSize(0.04)\n", 253 | "h.Draw()\n", 254 | "\n", 255 | "label = ROOT.TLatex()\n", 256 | "label.SetNDC(True)\n", 257 | "label.SetTextSize(0.040)\n", 258 | "label.DrawLatex(0.100, 0.920, \"#bf{CMS Open Data}\")\n", 259 | "label.SetTextSize(0.030)\n", 260 | "label.DrawLatex(0.500, 0.920, \"#sqrt{s} = 8 TeV, L_{int} = 11.6 fb^{-1}\")" 261 | ] 262 | }, 263 | { 264 | "cell_type": "code", 265 | "execution_count": null, 266 | "metadata": { 267 | "slideshow": { 268 | "slide_type": "slide" 269 | } 270 | }, 271 | "outputs": [], 272 | "source": [ 273 | "%jsroot on\n", 274 | "c.Draw()" 275 | ] 276 | }, 277 | { 278 | "cell_type": "code", 279 | "execution_count": null, 280 | "metadata": { 281 | "slideshow": { 282 | "slide_type": "slide" 283 | } 284 | }, 285 | "outputs": [], 286 | "source": [ 287 | "report.Print()" 288 | ] 289 | } 290 | ], 291 | "metadata": { 292 | "celltoolbar": "Slideshow", 293 | "kernelspec": { 294 | "display_name": "Python 3", 295 | "language": "python", 296 | "name": "python3" 297 | }, 298 | "language_info": { 299 | "codemirror_mode": { 300 | "name": "ipython", 301 | "version": 3 302 | }, 303 | "file_extension": ".py", 304 | "mimetype": "text/x-python", 305 | "name": "python", 306 | "nbconvert_exporter": "python", 307 | "pygments_lexer": "ipython3", 308 | "version": "3.9.6" 309 | } 310 | }, 311 | "nbformat": 4, 312 | "nbformat_minor": 2 313 | } 314 | -------------------------------------------------------------------------------- /course/exercises/extra/04_RDataFrame/readme.md: -------------------------------------------------------------------------------- 1 | # RDataFrame 2 | Welcome to the additional exercises of the **RDataFrame** learning module! What you can find 3 | here is an example of a HEP analysis that uses the `RDataFrame` tool. Open the notebook 4 | and follow the instructions to fill it in. In the `solutions` folder you can find 5 | the solutions in a notebook format, as well as python macro and a fully compiled 6 | C++ code - so you can see and compare what the the same analysis code looks like 7 | in C++, can you execute `rdataframe-dimuon.cpp` as well? 8 | 9 | For more RDataFrame tutorials, visit: https://root.cern/doc/master/group__tutorial__dataframe.html -------------------------------------------------------------------------------- /course/exercises/extra/04_RDataFrame/solutions/rdataframe-dimuon.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Adaptation of the tutorial at https://root.cern/doc/master/df102__NanoAODDimuonAnalysis_8py.html 3 | */ 4 | #include 5 | #include 6 | #include 7 | 8 | int main() 9 | { 10 | // Create dataframe from NanoAOD files 11 | ROOT::RDataFrame df_full{"Events", "root://eospublic.cern.ch//eos/opendata/cms/derived-data/AOD2NanoAODOutreachTool/" 12 | "Run2012BC_DoubleMuParked_Muons.root"}; 13 | 14 | // Optional: to see the results faster, restrict the tutorial to the first one million entries of the dataset 15 | auto df = df_full.Range(1000000); 16 | 17 | // Select events with exactly two muons 18 | auto df_2mu = df.Filter([](unsigned int nMuon) { return nMuon == 2; }, {"nMuon"}); 19 | 20 | // Select events with two muons of opposite charge 21 | auto df_os = df_2mu.Filter([](const ROOT::RVecI &muonCharge) { return muonCharge[0] != muonCharge[1]; }, 22 | {"Muon_charge"}); 23 | 24 | // Compute invariant mass of the dimuon system 25 | auto df_mass = 26 | df_os.Define("Dimuon_mass", ROOT::VecOps::InvariantMass, {"Muon_pt", "Muon_eta", "Muon_phi", "Muon_mass"}); 27 | 28 | // Book histogram of dimuon mass spectrum 29 | const auto bins = 30000; 30 | const auto low = 0.25; 31 | const auto up = 300.0; 32 | auto hist = 33 | df_mass.Histo1D({"Dimuon_mass", "Dimuon mass;m_{#mu#mu} (GeV);N_{Events}", bins, low, up}, "Dimuon_mass"); 34 | 35 | // Produce the plot 36 | TCanvas c; 37 | c.SetLogx(); 38 | c.SetLogy(); 39 | 40 | hist->SetTitle(""); 41 | hist->GetXaxis()->SetTitleSize(0.04); 42 | hist->GetYaxis()->SetTitleSize(0.04); 43 | hist->Draw(); 44 | 45 | c.SaveAs("dimuon_spectrum.png"); 46 | } 47 | -------------------------------------------------------------------------------- /course/exercises/extra/04_RDataFrame/solutions/rdataframe-dimuon.py: -------------------------------------------------------------------------------- 1 | """ 2 | Adaptation of the tutorial at https://root.cern/doc/master/df102__NanoAODDimuonAnalysis_8py.html 3 | """ 4 | import ROOT 5 | 6 | # Create dataframe from NanoAOD files 7 | dataset_name = "Events" 8 | file_path = "root://eospublic.cern.ch//eos/opendata/cms/derived-data/AOD2NanoAODOutreachTool/Run2012BC_DoubleMuParked_Muons.root" 9 | df = ROOT.RDataFrame(dataset_name, file_path) 10 | 11 | # Optional: to see the results faster, restrict the tutorial to the first one million entries of the dataset 12 | df = df.Range(1_000_000) 13 | 14 | # For simplicity, select only events with exactly two muons and require opposite charge 15 | df_2mu = df.Filter("nMuon == 2") 16 | df_os = df_2mu.Filter("Muon_charge[0] != Muon_charge[1]") 17 | 18 | # Compute invariant mass of the dimuon system 19 | df_mass = df_os.Define("Dimuon_mass", "InvariantMass(Muon_pt, Muon_eta, Muon_phi, Muon_mass)") 20 | 21 | # Make histogram of dimuon mass spectrum. Note how we can set titles and axis labels in one go. 22 | bins = 30_000 23 | xlow = 0.25 24 | xup = 300 25 | h = df_mass.Histo1D(("Dimuon_mass", "Dimuon mass;m_{#mu#mu} (GeV);N_{Events}", bins, xlow, xup), "Dimuon_mass") 26 | 27 | # Produce the plot 28 | c = ROOT.TCanvas("c", "", 800, 700) 29 | c.SetLogx() 30 | c.SetLogy() 31 | 32 | h.SetTitle("") 33 | h.GetXaxis().SetTitleSize(0.04) 34 | h.GetYaxis().SetTitleSize(0.04) 35 | h.Draw() 36 | 37 | c.SaveAs("dimuon_spectrum.png") 38 | -------------------------------------------------------------------------------- /course/exercises/extra/04_RDataFrame/solutions/solution-rdataframe-dimuon.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "slideshow": { 7 | "slide_type": "slide" 8 | } 9 | }, 10 | "source": [ 11 | "## ROOT dataframe tutorial: Dimuon spectrum\n", 12 | "\n", 13 | "This tutorial shows you how to analyze datasets using RDataFrame from a Python notebook. The example analysis performs the following steps:\n", 14 | "\n", 15 | "* Connect a ROOT dataframe to a dataset containing 61 mio. events recorded by CMS in 2012\n", 16 | "* Filter the events being relevant for your analysis\n", 17 | "* Compute the invariant mass of the selected dimuon candidates\n", 18 | "* Plot the invariant mass spectrum showing resonances up to the Z mass\n", 19 | "\n", 20 | "This material is based on the analysis done by Stefan Wunsch, available [here](http://opendata.web.cern.ch/record/12342) in CERN's Open Data portal.\n", 21 | "\n", 22 | "
" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": null, 28 | "metadata": { 29 | "slideshow": { 30 | "slide_type": "fragment" 31 | } 32 | }, 33 | "outputs": [], 34 | "source": [ 35 | "import ROOT " 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": { 41 | "slideshow": { 42 | "slide_type": "slide" 43 | } 44 | }, 45 | "source": [ 46 | "## Create a ROOT dataframe in Python\n", 47 | "First we will create a ROOT dataframe that is connected to a dataset named `Events` stored in a ROOT file. The file is pulled in via [XRootD](http://xrootd.org/) from EOS public, but note how it could also be stored in your CERNBox space or in any other EOS repository accessible from SWAN (e.g. the experiment ones).\n", 48 | "\n", 49 | "The dataset Events is a TTree and has the following branches:\n", 50 | "\n", 51 | "| Branch name | Data type | Description |\n", 52 | "|-------------|-----------|-------------|\n", 53 | "| `nMuon` | `unsigned int` | Number of muons in this event |\n", 54 | "| `Muon_pt` | `float[nMuon]` | Transverse momentum of the muons stored as an array of size `nMuon` |\n", 55 | "| `Muon_eta` | `float[nMuon]` | Pseudo-rapidity of the muons stored as an array of size `nMuon` |\n", 56 | "| `Muon_phi` | `float[nMuon]` | Azimuth of the muons stored as an array of size `nMuon` |\n", 57 | "| `Muon_charge` | `int[nMuon]` | Charge of the muons stored as an array of size `nMuon` and either -1 or 1 |\n", 58 | "| `Muon_mass` | `float[nMuon]` | Mass of the muons stored as an array of size `nMuon` |" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": null, 64 | "metadata": { 65 | "slideshow": { 66 | "slide_type": "subslide" 67 | } 68 | }, 69 | "outputs": [], 70 | "source": [ 71 | "treename = \"Events\"\n", 72 | "filename = \"root://eospublic.cern.ch//eos/opendata/cms/derived-data/AOD2NanoAODOutreachTool/Run2012BC_DoubleMuParked_Muons.root\"\n", 73 | "df = ROOT.RDataFrame(treename, filename)" 74 | ] 75 | }, 76 | { 77 | "cell_type": "markdown", 78 | "metadata": { 79 | "slideshow": { 80 | "slide_type": "slide" 81 | } 82 | }, 83 | "source": [ 84 | "## Run only on a part of the dataset\n", 85 | "\n", 86 | "The full dataset contains half a year of CMS data taking in 2012 with 61 mio events. For the purpose of this example, we use the [Range](https://root.cern/doc/master/classROOT_1_1RDF_1_1RInterface.html#a1b36b7868831de2375e061bb06cfc225) node to run only on a small part of the dataset. This feature also comes in handy in the development phase of your analysis.\n", 87 | "\n", 88 | "Feel free to experiment with this parameter!" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": null, 94 | "metadata": {}, 95 | "outputs": [], 96 | "source": [ 97 | "# Take only the first 1M events\n", 98 | "df_range = df.Range(1000000)" 99 | ] 100 | }, 101 | { 102 | "cell_type": "markdown", 103 | "metadata": { 104 | "slideshow": { 105 | "slide_type": "slide" 106 | } 107 | }, 108 | "source": [ 109 | "## Filter relevant events for this analysis\n", 110 | "\n", 111 | "Physics datasets are often general purpose datasets and therefore need extensive filtering of the events for the actual analysis. Here, we implement only a simple selection based on the number of muons and the charge to cut down the dataset in events that are relevant for our study.\n", 112 | "\n", 113 | "In particular, we are applying two filters to keep:\n", 114 | "1. Events with exactly two muons\n", 115 | "2. Events with muons of opposite charge" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": null, 121 | "metadata": { 122 | "slideshow": { 123 | "slide_type": "subslide" 124 | } 125 | }, 126 | "outputs": [], 127 | "source": [ 128 | "df_2mu = df_range.Filter(\"nMuon == 2\", \"Events with exactly two muons\")\n", 129 | "df_oc = df_2mu.Filter(\"Muon_charge[0] != Muon_charge[1]\", \"Muons with opposite charge\")" 130 | ] 131 | }, 132 | { 133 | "cell_type": "markdown", 134 | "metadata": { 135 | "slideshow": { 136 | "slide_type": "slide" 137 | } 138 | }, 139 | "source": [ 140 | "## Perform complex operations in Python, efficiently!\n", 141 | "\n", 142 | "Since we still want to perform complex operations in Python but plain Python code is prone to be slow and not thread-safe, you should use as much as possible C++ functions to do the work in your event loop during runtime. This mechanism uses the C++ interpreter `cling ` shipped with ROOT, making this possible in a single line of code.\n", 143 | "\n", 144 | "Note, that we are using here the `Define` node of the computation graph with a jitted function, calling into a function available in the ROOT library." 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": null, 150 | "metadata": { 151 | "slideshow": { 152 | "slide_type": "subslide" 153 | } 154 | }, 155 | "outputs": [], 156 | "source": [ 157 | "df_mass = df_oc.Define(\"Dimuon_mass\", \"ROOT::VecOps::InvariantMass(Muon_pt, Muon_eta, Muon_phi, Muon_mass)\")" 158 | ] 159 | }, 160 | { 161 | "cell_type": "markdown", 162 | "metadata": { 163 | "slideshow": { 164 | "slide_type": "slide" 165 | } 166 | }, 167 | "source": [ 168 | "## Make a histogram of the newly created column" 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": null, 174 | "metadata": { 175 | "slideshow": { 176 | "slide_type": "fragment" 177 | } 178 | }, 179 | "outputs": [], 180 | "source": [ 181 | "nbins = 30000\n", 182 | "low = 0.25\n", 183 | "up = 300\n", 184 | "histo_name = \"Dimuon_mass\"\n", 185 | "histo_title = histo_name\n", 186 | "\n", 187 | "h = df_mass.Histo1D((histo_name, histo_title, nbins, low, up), \"Dimuon_mass\")" 188 | ] 189 | }, 190 | { 191 | "cell_type": "markdown", 192 | "metadata": { 193 | "slideshow": { 194 | "slide_type": "slide" 195 | } 196 | }, 197 | "source": [ 198 | "## Book a Report of the dataframe filters" 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": null, 204 | "metadata": { 205 | "slideshow": { 206 | "slide_type": "fragment" 207 | } 208 | }, 209 | "outputs": [], 210 | "source": [ 211 | "report = df.Report()" 212 | ] 213 | }, 214 | { 215 | "cell_type": "markdown", 216 | "metadata": { 217 | "slideshow": { 218 | "slide_type": "slide" 219 | } 220 | }, 221 | "source": [ 222 | "## Start data processing\n", 223 | "This is the final step of the analysis: retrieving the result. We are expecting to see a plot of the mass of the dimuon spectrum similar to the one shown at the beginning of this exercise (remember we are running on fewer entries in this exercise). Finally in the last cell we should see a report of the filters applied on the dataset." 224 | ] 225 | }, 226 | { 227 | "cell_type": "code", 228 | "execution_count": null, 229 | "metadata": { 230 | "slideshow": { 231 | "slide_type": "fragment" 232 | } 233 | }, 234 | "outputs": [], 235 | "source": [ 236 | "%%time\n", 237 | "\n", 238 | "ROOT.gStyle.SetOptStat(0)\n", 239 | "ROOT.gStyle.SetTextFont(42)\n", 240 | "c = ROOT.TCanvas(\"c\", \"\", 800, 700)\n", 241 | "c.SetLogx()\n", 242 | "c.SetLogy()\n", 243 | "h.SetTitle(\"\")\n", 244 | "h.GetXaxis().SetTitle(\"m_{#mu#mu} (GeV)\")\n", 245 | "h.GetXaxis().SetTitleSize(0.04)\n", 246 | "h.GetYaxis().SetTitle(\"N_{Events}\")\n", 247 | "h.GetYaxis().SetTitleSize(0.04)\n", 248 | "h.Draw()\n", 249 | "\n", 250 | "label = ROOT.TLatex()\n", 251 | "label.SetNDC(True)\n", 252 | "label.SetTextSize(0.040)\n", 253 | "label.DrawLatex(0.100, 0.920, \"#bf{CMS Open Data}\")\n", 254 | "label.SetTextSize(0.030)\n", 255 | "label.DrawLatex(0.500, 0.920, \"#sqrt{s} = 8 TeV, L_{int} = 11.6 fb^{-1}\")" 256 | ] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "execution_count": null, 261 | "metadata": { 262 | "slideshow": { 263 | "slide_type": "slide" 264 | } 265 | }, 266 | "outputs": [], 267 | "source": [ 268 | "%jsroot on\n", 269 | "c.Draw()" 270 | ] 271 | }, 272 | { 273 | "cell_type": "code", 274 | "execution_count": null, 275 | "metadata": { 276 | "slideshow": { 277 | "slide_type": "slide" 278 | } 279 | }, 280 | "outputs": [], 281 | "source": [ 282 | "report.Print()" 283 | ] 284 | } 285 | ], 286 | "metadata": { 287 | "celltoolbar": "Slideshow", 288 | "kernelspec": { 289 | "display_name": "Python 3", 290 | "language": "python", 291 | "name": "python3" 292 | }, 293 | "language_info": { 294 | "codemirror_mode": { 295 | "name": "ipython", 296 | "version": 3 297 | }, 298 | "file_extension": ".py", 299 | "mimetype": "text/x-python", 300 | "name": "python", 301 | "nbconvert_exporter": "python", 302 | "pygments_lexer": "ipython3", 303 | "version": "3.9.6" 304 | } 305 | }, 306 | "nbformat": 4, 307 | "nbformat_minor": 2 308 | } 309 | -------------------------------------------------------------------------------- /course/exercises/extra/05_Graphics/readme.md: -------------------------------------------------------------------------------- 1 | # Graphics Exercises 2 | 3 | Welcome to the exercises of the **Graphics** learning module! 4 | 5 | ## The "Good Plot" Example 6 | In this exercise, you will follow the steps of the "Good Plot" example. In particular, these are the steps to follow: 7 | - Create the initial data set by placing the code below in a macro called *GoodPlot.C*: 8 | ```c++ 9 | // The number of points in the data set 10 | const int n_points = 10; 11 | // The values along X and Y axis 12 | double x_vals[n_points] = {1,2,3,4,5,6,7,8,9,10}; 13 | double y_vals[n_points] = {6,12,14,20,22,24,35,45,44,53}; 14 | // The errors on the Y axis 15 | double y_errs[n_points] = {5,5,4.7,4.5,4.2,5.1,2.9,4.1,4.8,5.43}; 16 | ``` 17 | - Create a `TGraphErrors` instance using the aforementioned data. 18 | - Draw the graph data in an error bars plot. Play with the following draw options: *"APE"*, *"APEL"*, *"APEC"*, *"APE4"*. 19 | - Customize the visual aspect of the plot (marker style, marker color, line color). 20 | - Add a function to compare the data with a line. Use the formula *"[0]+x\*[1]"*. Set the parameters to -1 and 5. Play a little with the line style and color and draw the function in the same canvas as the graph. 21 | - Set the titles for the graph (*"Measurement XYZ;length [cm];Arb.Units"*) and for the axis ("*length [cm]"* and *"Arb.Units"* for x and y). Try to add special latex characters. 22 | - Try to change the number of divisions of the x axis: set the number of primary divisions to 20 and check the result. Did you get that number of divisions? 23 | - Change the label of the x axis corresponding to the maximum deviation by setting its color to red. 24 | - Add a legend to the plot, with one entry for the graph and one for the function. 25 | - Add an arrow (`TArrow`) and some text (`TLatex`) to the canvas. 26 | 27 | You can find the solution [here](solutions/GoodPlot.C). 28 | -------------------------------------------------------------------------------- /course/exercises/extra/05_Graphics/solutions/GoodPlot.C: -------------------------------------------------------------------------------- 1 | // First example presented in "The ROOT graphics functionalities" 2 | 3 | void macro1NoFit() { 4 | // The values and the errors on the Y axis 5 | const int n_points=10; 6 | double x_vals[n_points] = {1,2,3,4,5,6,7,8,9,10}; 7 | double y_vals[n_points] = {6,12,14,20,22,24,35,45,44,53}; 8 | double y_errs[n_points] = {5,5,4.7,4.5,4.2,5.1,2.9,4.1,4.8,5.43}; 9 | 10 | // Instance of the graph 11 | auto graph = new TGraphErrors(n_points,x_vals,y_vals,nullptr,y_errs); 12 | graph->SetTitle("Measurement XYZ;length [cm];Arb.Units"); 13 | 14 | // Make the plot esthetically better 15 | graph->SetMarkerStyle(kOpenCircle); 16 | graph->SetMarkerColor(kBlue); 17 | graph->SetLineColor(kBlue); 18 | 19 | // The canvas on which we'll draw the graph 20 | auto mycanvas = new TCanvas(); 21 | 22 | // Draw the graph ! 23 | graph->Draw("APE"); 24 | 25 | // Define a linear function 26 | auto f = new TF1("Linear law","-1+x*5",.5,10.5); 27 | f->SetLineColor(kRed); 28 | f->SetLineStyle(2); 29 | f->Draw("Same"); 30 | 31 | // Build and Draw a legend 32 | auto legend = new TLegend(.1,.7,.3,.9,"Lab. Lesson 1"); 33 | legend->AddEntry(graph,"Exp. Points","PE"); 34 | legend->AddEntry(f,"Th. Law", "L"); 35 | legend->Draw(); 36 | 37 | // Draw an arrow on the canvas 38 | auto arrow = new TArrow(8,8,6.2,23,0.02,"|>"); 39 | arrow->SetLineWidth(2); 40 | arrow->Draw(); 41 | 42 | // Add some text to the plot and highlight the 3rd label 43 | auto text = new TLatex(8.2,7.5,"#splitline{Maximum}{Deviation}"); 44 | text->Draw(); 45 | graph->GetXaxis()->ChangeLabel(3,-1,-1,-1,kRed); 46 | } 47 | -------------------------------------------------------------------------------- /course/images/DistRDF_architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/root-project/student-course/3c1ed0e07c063079579237495db413a927ac1e4d/course/images/DistRDF_architecture.png -------------------------------------------------------------------------------- /course/images/binder1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/root-project/student-course/3c1ed0e07c063079579237495db413a927ac1e4d/course/images/binder1.png -------------------------------------------------------------------------------- /course/images/cern-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/root-project/student-course/3c1ed0e07c063079579237495db413a927ac1e4d/course/images/cern-logo.png -------------------------------------------------------------------------------- /course/images/dataset.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/root-project/student-course/3c1ed0e07c063079579237495db413a927ac1e4d/course/images/dataset.png -------------------------------------------------------------------------------- /course/images/dimuonSpectrum.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/root-project/student-course/3c1ed0e07c063079579237495db413a927ac1e4d/course/images/dimuonSpectrum.png -------------------------------------------------------------------------------- /course/images/examplehist_df106_HiggsToFourLeptons.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/root-project/student-course/3c1ed0e07c063079579237495db413a927ac1e4d/course/images/examplehist_df106_HiggsToFourLeptons.png -------------------------------------------------------------------------------- /course/images/examplehisto.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/root-project/student-course/3c1ed0e07c063079579237495db413a927ac1e4d/course/images/examplehisto.png -------------------------------------------------------------------------------- /course/images/rdf_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/root-project/student-course/3c1ed0e07c063079579237495db413a927ac1e4d/course/images/rdf_1.png -------------------------------------------------------------------------------- /course/images/root1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/root-project/student-course/3c1ed0e07c063079579237495db413a927ac1e4d/course/images/root1.png -------------------------------------------------------------------------------- /course/images/swan1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/root-project/student-course/3c1ed0e07c063079579237495db413a927ac1e4d/course/images/swan1.png -------------------------------------------------------------------------------- /course/images/swan2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/root-project/student-course/3c1ed0e07c063079579237495db413a927ac1e4d/course/images/swan2.png -------------------------------------------------------------------------------- /course/images/swan3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/root-project/student-course/3c1ed0e07c063079579237495db413a927ac1e4d/course/images/swan3.png -------------------------------------------------------------------------------- /course/images/swan4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/root-project/student-course/3c1ed0e07c063079579237495db413a927ac1e4d/course/images/swan4.png -------------------------------------------------------------------------------- /course/images/swan5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/root-project/student-course/3c1ed0e07c063079579237495db413a927ac1e4d/course/images/swan5.png -------------------------------------------------------------------------------- /course/images/tfile1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/root-project/student-course/3c1ed0e07c063079579237495db413a927ac1e4d/course/images/tfile1.png -------------------------------------------------------------------------------- /course/images/tfile2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/root-project/student-course/3c1ed0e07c063079579237495db413a927ac1e4d/course/images/tfile2.png -------------------------------------------------------------------------------- /course/notebooks/core/00-root-intro.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "slideshow": { 7 | "slide_type": "slide" 8 | } 9 | }, 10 | "source": [ 11 | "# What is ROOT?" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": { 17 | "slideshow": { 18 | "slide_type": "slide" 19 | } 20 | }, 21 | "source": [ 22 | "- ROOT is a software framework that can be used for:\n", 23 | " - Data processing\n", 24 | " - Data analysis\n", 25 | " - Data visualisation\n", 26 | " - Data storage\n", 27 | " \n", 28 | "\n", 29 | "- ROOT is written mainly in **C++**, with powerful **Python** bindings\n", 30 | "\n", 31 | "- Adopted in High Energy Physics and other sciences (but also industry)\n", 32 | " - **1 EB** of data in ROOT format\n", 33 | " - Fits and parameters’ estimations for discoveries (e.g. the Higgs)\n", 34 | " - Thousands of ROOT plots in scientific publications" 35 | ] 36 | }, 37 | { 38 | "cell_type": "markdown", 39 | "metadata": { 40 | "slideshow": { 41 | "slide_type": "slide" 42 | } 43 | }, 44 | "source": [ 45 | "## ROOT's building blocks\n", 46 | "\n", 47 | "ROOT can be seen as a collection of building blocks for various activities, such as:\n", 48 | "- **Data analysis: histograms, graphs, functions**\n", 49 | "- **High-level analysis interfaces**: RDataFrame\n", 50 | "- **Statistical tools** (RooFit/RooStats): rich modeling and statistical inference\n", 51 | "- **I/O**: storage of any C++ object, **column-wise** storage of datasets\n", 52 | "- **Math**: non-trivial functions (e.g. Erf, Bessel), optimised math functions\n", 53 | "- **C++ interpretation**: full language compliance\n", 54 | "- **Multivariate analysis** (TMVA): e.g. Boosted decision trees, Neural Nets\n", 55 | "- **Advanced graphics** (2D, 3D, event display)\n", 56 | "- And more: HTTP servering, JavaScript visualisation" 57 | ] 58 | }, 59 | { 60 | "cell_type": "markdown", 61 | "metadata": { 62 | "slideshow": { 63 | "slide_type": "slide" 64 | } 65 | }, 66 | "source": [ 67 | "
" 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "metadata": { 73 | "slideshow": { 74 | "slide_type": "slide" 75 | } 76 | }, 77 | "source": [ 78 | "# Today's goals\n", 79 | "\n", 80 | "- be able to conduct simple data analysis using RDataFrame, understand how to run the analysis efficiently\n", 81 | "- get familiar with basic ROOT features: histograms, graphs, functions\n", 82 | "- get to know what ROOT file format is\n", 83 | "- don't be afraid of reaching out, asking questions and contributing" 84 | ] 85 | }, 86 | { 87 | "cell_type": "markdown", 88 | "metadata": { 89 | "slideshow": { 90 | "slide_type": "slide" 91 | } 92 | }, 93 | "source": [ 94 | "## Useful links\n", 95 | "\n", 96 | "- ROOT users forum (for questions, support, discussion): https://root-forum.cern.ch\n", 97 | "\n", 98 | "- ROOT guide for beginners: https://root.cern/primer/\n", 99 | "\n", 100 | "- ROOT tutorials grouped by topic: https://root.cern/doc/master/group__Tutorials.html\n", 101 | "\n", 102 | "- ROOT website: https://root.cern\n", 103 | "\n", 104 | "- ROOT GitHub repository: https://github.com/root-project/root\n", 105 | "\n", 106 | "- ROOT training material: https://github.com/root-project/training" 107 | ] 108 | } 109 | ], 110 | "metadata": { 111 | "celltoolbar": "Slideshow", 112 | "kernelspec": { 113 | "display_name": "Python 3", 114 | "language": "python", 115 | "name": "python3" 116 | }, 117 | "language_info": { 118 | "codemirror_mode": { 119 | "name": "ipython", 120 | "version": 3 121 | }, 122 | "file_extension": ".py", 123 | "mimetype": "text/x-python", 124 | "name": "python", 125 | "nbconvert_exporter": "python", 126 | "pygments_lexer": "ipython3", 127 | "version": "3.8.6" 128 | } 129 | }, 130 | "nbformat": 4, 131 | "nbformat_minor": 2 132 | } 133 | -------------------------------------------------------------------------------- /course/notebooks/core/01-histograms-and-graphs.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# ROOT Basic Tools: histograms and graphs" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "id": "22001718", 13 | "metadata": { 14 | "slideshow": { 15 | "slide_type": "slide" 16 | } 17 | }, 18 | "source": [ 19 | "# ROOT histograms\n", 20 | "\n", 21 | "[Histogram class documentation](https://root.cern.ch/doc/master/classTH1.html)\n", 22 | "\n", 23 | "ROOT has powerful histogram objects that, among other features, let you produce complex plots and perform fits of arbitrary functions.\n", 24 | "\n", 25 | "Below is an example histogram that can be obtained using one of our tutorials: [Higgs to Four Leptons](https://root.cern/doc/master/df106__HiggsToFourLeptons_8py.html). \n", 26 | "\n", 27 | "`TH1D` is a 1D histogram with floating point double precision y-axis, `TH2I` is a 2D histogram with Integer y-axis, etc.\n", 28 | "\n", 29 | "
" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "id": "2f9a761e", 35 | "metadata": { 36 | "slideshow": { 37 | "slide_type": "slide" 38 | } 39 | }, 40 | "source": [ 41 | "To have something to play with, let's quickly fill a histogram with 5000 normally distributed values:" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "id": "88e3869e", 48 | "metadata": { 49 | "scrolled": true, 50 | "slideshow": { 51 | "slide_type": "slide" 52 | } 53 | }, 54 | "outputs": [], 55 | "source": [ 56 | "import ROOT\n", 57 | "import numpy as np" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": null, 63 | "id": "776e9551", 64 | "metadata": { 65 | "slideshow": { 66 | "slide_type": "fragment" 67 | } 68 | }, 69 | "outputs": [], 70 | "source": [ 71 | "h = ROOT.TH1D(name=\"h\", title=\"My histo\", nbinsx=100, xlow=-5, xup=5)\n", 72 | "h.FillRandom(\"gaus\", ntimes=5000)" 73 | ] 74 | }, 75 | { 76 | "cell_type": "markdown", 77 | "id": "9b4f749c", 78 | "metadata": { 79 | "slideshow": { 80 | "slide_type": "fragment" 81 | } 82 | }, 83 | "source": [ 84 | "To check the full documentation you can always refer to https://root.cern/doc/master (and then switch to the documentation for your particular ROOT version with the drop-down menu at the top of the page)." 85 | ] 86 | }, 87 | { 88 | "cell_type": "markdown", 89 | "id": "49765d80", 90 | "metadata": { 91 | "slideshow": { 92 | "slide_type": "slide" 93 | } 94 | }, 95 | "source": [ 96 | "## Drawing a histogram" 97 | ] 98 | }, 99 | { 100 | "cell_type": "markdown", 101 | "id": "2cff5d92", 102 | "metadata": { 103 | "slideshow": { 104 | "slide_type": "fragment" 105 | } 106 | }, 107 | "source": [ 108 | "[Drawing options documentation](https://root.cern.ch/doc/master/classTHistPainter.html)\n", 109 | "\n", 110 | "The link above contains the documentation for the histogram drawing options.\n", 111 | "\n", 112 | "In a notebook, we want to use the `%jsroot on` magic and explicitly draw a `TCanvas`." 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": null, 118 | "id": "35754314", 119 | "metadata": { 120 | "slideshow": { 121 | "slide_type": "slide" 122 | } 123 | }, 124 | "outputs": [], 125 | "source": [ 126 | "%jsroot on\n", 127 | "c = ROOT.TCanvas()\n", 128 | "#h.SetLineColor(ROOT.kBlue)\n", 129 | "#h.SetFillColor(ROOT.kBlue)\n", 130 | "#h.GetXaxis().SetTitle(\"value\")\n", 131 | "#h.GetYaxis().SetTitle(\"count\")\n", 132 | "#h.SetTitle(\"My histo with latex: p_{t}, #eta, #phi\")\n", 133 | "h.Draw() # draw the histogram on the canvas\n", 134 | "c.Draw() # draw the canvas on the screen" 135 | ] 136 | }, 137 | { 138 | "cell_type": "markdown", 139 | "id": "77d69e64", 140 | "metadata": { 141 | "slideshow": { 142 | "slide_type": "slide" 143 | } 144 | }, 145 | "source": [ 146 | "## The Unified Histogram Interface (UHI)" 147 | ] 148 | }, 149 | { 150 | "cell_type": "markdown", 151 | "id": "930e9b67", 152 | "metadata": { 153 | "slideshow": { 154 | "slide_type": "fragment" 155 | } 156 | }, 157 | "source": [ 158 | "ROOT histograms implement the [Unified Histogram Interface (UHI)](https://uhi.readthedocs.io/en/latest/), you can find implementation details and examples on the [ROOT documentation](https://root.cern.ch/doc/master/group__uhi__docs.html).\n", 159 | "\n", 160 | "To quickly try it out, let's fill a `TH1F` using [`numpy`](https://numpy.org/) and plot it with [`mplhep`](https://mplhep.readthedocs.io/en/latest/):" 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": null, 166 | "id": "a18dfa5c", 167 | "metadata": { 168 | "slideshow": { 169 | "slide_type": "slide" 170 | } 171 | }, 172 | "outputs": [], 173 | "source": [ 174 | "h2 = ROOT.TH1F(name=\"h2\", title=\"My UHI histo\", nbinsx=10, xlow=0, xup=1)\n", 175 | "h2[...] = np.random.random(10)\n", 176 | "print(h2.values())" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": null, 182 | "id": "57c8303a", 183 | "metadata": { 184 | "slideshow": { 185 | "slide_type": "slide" 186 | } 187 | }, 188 | "outputs": [], 189 | "source": [ 190 | "import matplotlib.pyplot as plt\n", 191 | "import mplhep as hep\n", 192 | "\n", 193 | "plt.figure(figsize=(10,7))\n", 194 | "hep.histplot(h2, linewidth=2)\n", 195 | "plt.title(\"My UHI Histo\")\n", 196 | "plt.show()" 197 | ] 198 | }, 199 | { 200 | "cell_type": "markdown", 201 | "id": "d4e28064", 202 | "metadata": { 203 | "slideshow": { 204 | "slide_type": "slide" 205 | } 206 | }, 207 | "source": [ 208 | "# ROOT functions" 209 | ] 210 | }, 211 | { 212 | "cell_type": "markdown", 213 | "id": "52f1ab7e", 214 | "metadata": { 215 | "slideshow": { 216 | "slide_type": "fragment" 217 | } 218 | }, 219 | "source": [ 220 | "The type that represents an arbitrary one-dimensional mathematical function in ROOT is [TF1](https://root.cern.ch/doc/master/classTF1.html).
\n", 221 | "Similarly, [TF2](https://root.cern.ch/doc/master/classTF2.html) and [TF3](https://root.cern.ch/doc/master/classTF3.html) represent 2-dimensional and 3-dimensional functions.\n", 222 | "\n", 223 | "As an example, let's define and plot a simple surface:" 224 | ] 225 | }, 226 | { 227 | "cell_type": "code", 228 | "execution_count": null, 229 | "id": "5b530644", 230 | "metadata": { 231 | "slideshow": { 232 | "slide_type": "slide" 233 | } 234 | }, 235 | "outputs": [], 236 | "source": [ 237 | "f2 = ROOT.TF2(\"f2\", \"sin(x*x - y*y)\", xmin=-2, xmax=2, ymin=-2, ymax=2)" 238 | ] 239 | }, 240 | { 241 | "cell_type": "code", 242 | "execution_count": null, 243 | "id": "f8e3712f", 244 | "metadata": { 245 | "slideshow": { 246 | "slide_type": "fragment" 247 | } 248 | }, 249 | "outputs": [], 250 | "source": [ 251 | "c = ROOT.TCanvas()\n", 252 | "f2.Draw(\"surf1\") # to get a surface instead of the default contour plot\n", 253 | "c.Draw()" 254 | ] 255 | }, 256 | { 257 | "cell_type": "markdown", 258 | "id": "b1bc811f", 259 | "metadata": { 260 | "slideshow": { 261 | "slide_type": "slide" 262 | } 263 | }, 264 | "source": [ 265 | "## Fitting a histogram" 266 | ] 267 | }, 268 | { 269 | "cell_type": "markdown", 270 | "id": "b3e8fb66", 271 | "metadata": { 272 | "slideshow": { 273 | "slide_type": "fragment" 274 | } 275 | }, 276 | "source": [ 277 | "Let's see how to perform simple histogram fits of arbitrary functions. We will need a `TF1` that represents the function we want to use for the fit.\n", 278 | "\n", 279 | "This time we define our `TF1` as a C++ function (note the usage of the `%%cpp` magic to define some C++ inline). Here we define a simple gaussian with scale and mean parameters (`par[0]` and `par[1]` respectively):" 280 | ] 281 | }, 282 | { 283 | "cell_type": "code", 284 | "execution_count": null, 285 | "id": "cc620044", 286 | "metadata": { 287 | "slideshow": { 288 | "slide_type": "fragment" 289 | } 290 | }, 291 | "outputs": [], 292 | "source": [ 293 | "%%cpp\n", 294 | "\n", 295 | "double gaussian(double *x, double *par) {\n", 296 | " return par[0]*TMath::Exp(-TMath::Power(x[0] - par[1], 2.) / 2.)\n", 297 | " / TMath::Sqrt(2 * TMath::Pi());\n", 298 | "}" 299 | ] 300 | }, 301 | { 302 | "cell_type": "markdown", 303 | "id": "1b3f9a09", 304 | "metadata": { 305 | "slideshow": { 306 | "slide_type": "slide" 307 | } 308 | }, 309 | "source": [ 310 | "The function signature, that takes an array of coordinates and an array of parameters as inputs, is the generic signature of functions that can be used to construct a `TF1` object:" 311 | ] 312 | }, 313 | { 314 | "cell_type": "code", 315 | "execution_count": null, 316 | "id": "0d68fef2", 317 | "metadata": { 318 | "slideshow": { 319 | "slide_type": "fragment" 320 | } 321 | }, 322 | "outputs": [], 323 | "source": [ 324 | "fitFunc = ROOT.TF1(\"fitFunc\", ROOT.gaussian, xmin=-5, xmax=5, npar=2)" 325 | ] 326 | }, 327 | { 328 | "cell_type": "markdown", 329 | "id": "7b73a168", 330 | "metadata": { 331 | "slideshow": { 332 | "slide_type": "fragment" 333 | } 334 | }, 335 | "source": [ 336 | "Now we fit our `h` histogram with `fitFunc`:" 337 | ] 338 | }, 339 | { 340 | "cell_type": "code", 341 | "execution_count": null, 342 | "id": "5bc200c2", 343 | "metadata": { 344 | "slideshow": { 345 | "slide_type": "fragment" 346 | } 347 | }, 348 | "outputs": [], 349 | "source": [ 350 | "res = h.Fit(fitFunc, \"S\") # the \"S\" option makes the function return a fit result object" 351 | ] 352 | }, 353 | { 354 | "cell_type": "markdown", 355 | "id": "7ea8cdcc", 356 | "metadata": { 357 | "slideshow": { 358 | "slide_type": "slide" 359 | } 360 | }, 361 | "source": [ 362 | "Drawing the histogram now automatically also shows the fitted function:" 363 | ] 364 | }, 365 | { 366 | "cell_type": "code", 367 | "execution_count": null, 368 | "id": "b6e505d5", 369 | "metadata": { 370 | "scrolled": false, 371 | "slideshow": { 372 | "slide_type": "fragment" 373 | } 374 | }, 375 | "outputs": [], 376 | "source": [ 377 | "c2 = ROOT.TCanvas()\n", 378 | "h.Draw()\n", 379 | "c2.Draw()" 380 | ] 381 | }, 382 | { 383 | "cell_type": "markdown", 384 | "id": "ab7356f9", 385 | "metadata": { 386 | "slideshow": { 387 | "slide_type": "slide" 388 | } 389 | }, 390 | "source": [ 391 | "For the particular case of a gaussian fit, we could also have used the built-in `\"gaus\"` function, as we did when we called `FillRandom` (for the full list of supported expressions see [here](https://root.cern/doc/master/classTFormula.html)):" 392 | ] 393 | }, 394 | { 395 | "cell_type": "code", 396 | "execution_count": null, 397 | "id": "c7ebfd04", 398 | "metadata": { 399 | "slideshow": { 400 | "slide_type": "fragment" 401 | } 402 | }, 403 | "outputs": [], 404 | "source": [ 405 | "res = h.Fit(\"gaus\", \"S\")" 406 | ] 407 | }, 408 | { 409 | "cell_type": "code", 410 | "execution_count": null, 411 | "id": "65f095cd", 412 | "metadata": { 413 | "slideshow": { 414 | "slide_type": "slide" 415 | } 416 | }, 417 | "outputs": [], 418 | "source": [ 419 | "c3 = ROOT.TCanvas()\n", 420 | "h.Draw()\n", 421 | "c3.Draw()" 422 | ] 423 | }, 424 | { 425 | "cell_type": "markdown", 426 | "id": "33f51c9d", 427 | "metadata": { 428 | "slideshow": { 429 | "slide_type": "slide" 430 | } 431 | }, 432 | "source": [ 433 | "For more complex binned and unbinned likelihood fits, check out [RooFit](https://root.cern.ch/download/doc/RooFit_Users_Manual_2.91-33.pdf), a powerful data modelling framework integrated in ROOT." 434 | ] 435 | }, 436 | { 437 | "cell_type": "markdown", 438 | "id": "2fdcbfd3", 439 | "metadata": { 440 | "slideshow": { 441 | "slide_type": "slide" 442 | } 443 | }, 444 | "source": [ 445 | "## ROOT graphs\n", 446 | "\n", 447 | "[TGraph](https://root.cern/doc/master/classTGraph.html) is a type useful for scatter plots.\n", 448 | "\n", 449 | "Their drawing options are documented [here](https://root.cern/doc/master/classTGraphPainter.html).\n", 450 | "\n", 451 | "Like for histograms, the aspect of `TGraph`s can be greatly customized, they can be fitted with custom functions, etc. " 452 | ] 453 | }, 454 | { 455 | "cell_type": "code", 456 | "execution_count": null, 457 | "id": "acc0e4a6", 458 | "metadata": { 459 | "scrolled": false, 460 | "slideshow": { 461 | "slide_type": "slide" 462 | } 463 | }, 464 | "outputs": [], 465 | "source": [ 466 | "x = np.arange(-20, 21, dtype=float)\n", 467 | "y = -x*x\n", 468 | "g = ROOT.TGraph(n=x.size, x=x, y=y)\n", 469 | "\n", 470 | "c4 = ROOT.TCanvas()\n", 471 | "g.SetMarkerStyle(7)\n", 472 | "g.SetLineColor(ROOT.kBlue)\n", 473 | "g.SetTitle(\"My graph\")\n", 474 | "g.Draw()\n", 475 | "c4.Draw()" 476 | ] 477 | }, 478 | { 479 | "cell_type": "markdown", 480 | "id": "70b3e469", 481 | "metadata": { 482 | "slideshow": { 483 | "slide_type": "slide" 484 | } 485 | }, 486 | "source": [ 487 | "The same graph can be displayed as a bar plot:" 488 | ] 489 | }, 490 | { 491 | "cell_type": "code", 492 | "execution_count": null, 493 | "id": "317a855a", 494 | "metadata": { 495 | "slideshow": { 496 | "slide_type": "fragment" 497 | } 498 | }, 499 | "outputs": [], 500 | "source": [ 501 | "c5 = ROOT.TCanvas()\n", 502 | "g.SetTitle(\"My graph\")\n", 503 | "g.SetFillColor(ROOT.kOrange + 1) # base colors can be tweaked by adding/subtracting values to them \n", 504 | "g.Draw(\"AB1\")\n", 505 | "c5.Draw()" 506 | ] 507 | }, 508 | { 509 | "cell_type": "markdown", 510 | "id": "f2460215", 511 | "metadata": { 512 | "slideshow": { 513 | "slide_type": "slide" 514 | } 515 | }, 516 | "source": [ 517 | "### Plot example: histogram stack\n", 518 | "\n", 519 | "In HEP, we often plot stacked histograms, for example to show the\n", 520 | "contributions of different processes. This can be done with [THStack](https://root.cern.ch/doc/master/classTHStack.html)." 521 | ] 522 | }, 523 | { 524 | "cell_type": "code", 525 | "execution_count": null, 526 | "id": "4f6b9843", 527 | "metadata": { 528 | "slideshow": { 529 | "slide_type": "-" 530 | } 531 | }, 532 | "outputs": [], 533 | "source": [ 534 | "f1 = ROOT.TF1(\"f1\", \"gaus\", -4.0, 4.0)\n", 535 | "\n", 536 | "histos = [ROOT.TH1D(f\"h{i}\", \"x\", 64, -4.0, 4.0) for i in range(3)]\n", 537 | "\n", 538 | "hs = ROOT.THStack(\"hs\",\"\")\n", 539 | "hs.SetTitle(\";x;Events\")\n", 540 | "\n", 541 | "colors = [46, 30, 38]\n", 542 | " \n", 543 | "for i in range(len(histos)):\n", 544 | " h = histos[i]\n", 545 | " f1.SetParameters(1.0, i - 1, 1.0)\n", 546 | " h.FillRandom(\"f1\", 100000)\n", 547 | " h.SetFillColor(colors[i])\n", 548 | " hs.Add(h)\n", 549 | "\n", 550 | "c6 = ROOT.TCanvas()\n", 551 | "hs.Draw()\n", 552 | "c6.Draw()" 553 | ] 554 | }, 555 | { 556 | "cell_type": "markdown", 557 | "id": "22ad4ed4", 558 | "metadata": { 559 | "slideshow": { 560 | "slide_type": "slide" 561 | } 562 | }, 563 | "source": [ 564 | "### Plot example: efficiency curves\n", 565 | "\n", 566 | "Another common workflow is to draw efficiency curves with [TEfficiency](https://root.cern.ch/doc/master/classTEfficiency.html), which also gives uncertainties." 567 | ] 568 | }, 569 | { 570 | "cell_type": "code", 571 | "execution_count": null, 572 | "id": "319a70b7", 573 | "metadata": {}, 574 | "outputs": [], 575 | "source": [ 576 | "h_pass = ROOT.TH1D(\"h_pass\", \"My histogram\", 50, 0, 100.0)\n", 577 | "h_total = ROOT.TH1D(\"h_total\", \"My histogram\", 50, 0, 100.0)\n", 578 | "\n", 579 | "f_gaus = ROOT.TF1(\"f_gaus\", \"gaus\", 0, 100.0)\n", 580 | "\n", 581 | "f_gaus.SetParameters(1.0, 56.0, 20.0)\n", 582 | "h_pass.FillRandom(\"f_gaus\", 40000)\n", 583 | "h_pass.SetLineColor(ROOT.kRed)\n", 584 | "f_gaus.SetParameters(1.0, 50.0, 20.0)\n", 585 | "h_total.FillRandom(\"f_gaus\", 100000)" 586 | ] 587 | }, 588 | { 589 | "cell_type": "code", 590 | "execution_count": null, 591 | "id": "ac34e721", 592 | "metadata": { 593 | "slideshow": { 594 | "slide_type": "fragment" 595 | } 596 | }, 597 | "outputs": [], 598 | "source": [ 599 | "teff = ROOT.TEfficiency(h_pass,h_total)\n", 600 | "\n", 601 | "c7 = ROOT.TCanvas(\"rf101_basics\", \"rf101_basics\", 800, 400)\n", 602 | "c7.Divide(2)\n", 603 | "c7.cd(1)\n", 604 | "h_total.Draw()\n", 605 | "h_pass.Draw(\"SAME\")\n", 606 | "c7.cd(2)\n", 607 | "teff.Draw()\n", 608 | "c7.Draw()" 609 | ] 610 | } 611 | ], 612 | "metadata": { 613 | "celltoolbar": "Slideshow", 614 | "kernelspec": { 615 | "display_name": "Python 3 (ipykernel)", 616 | "language": "python", 617 | "name": "python3" 618 | }, 619 | "language_info": { 620 | "codemirror_mode": { 621 | "name": "ipython", 622 | "version": 3 623 | }, 624 | "file_extension": ".py", 625 | "mimetype": "text/x-python", 626 | "name": "python", 627 | "nbconvert_exporter": "python", 628 | "pygments_lexer": "ipython3", 629 | "version": "3.11.9" 630 | } 631 | }, 632 | "nbformat": 4, 633 | "nbformat_minor": 5 634 | } 635 | -------------------------------------------------------------------------------- /course/notebooks/core/02-tfile-read-write-ttree.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "slideshow": { 7 | "slide_type": "slide" 8 | } 9 | }, 10 | "source": [ 11 | "# The ROOT file" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": { 17 | "slideshow": { 18 | "slide_type": "slide" 19 | } 20 | }, 21 | "source": [ 22 | "* With ROOT, objects can be written to files\n", 23 | "\n", 24 | "* ROOT provides its own file class, [TFile](https://root.cern/doc/master/classTFile.html), to interact with these files\n", 25 | "\n", 26 | "* ROOT files are _binary_ and can be transparently _compressed_ to reduce disk usage\n", 27 | "\n", 28 | "* ROOT files have a logical “file-system-like” structure\n", 29 | "\n", 30 | " * E.g. a directory hierarchy" 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": { 36 | "slideshow": { 37 | "slide_type": "slide" 38 | } 39 | }, 40 | "source": [ 41 | "Let's start with importing ROOT as usual" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "metadata": { 48 | "slideshow": { 49 | "slide_type": "fragment" 50 | } 51 | }, 52 | "outputs": [], 53 | "source": [ 54 | "import ROOT" 55 | ] 56 | }, 57 | { 58 | "cell_type": "markdown", 59 | "metadata": { 60 | "slideshow": { 61 | "slide_type": "slide" 62 | } 63 | }, 64 | "source": [ 65 | "\n", 66 | "This is how you create a `TFile`:" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": null, 72 | "metadata": { 73 | "slideshow": { 74 | "slide_type": "fragment" 75 | } 76 | }, 77 | "outputs": [], 78 | "source": [ 79 | "f = ROOT.TFile(\"my_file.root\", \"RECREATE\")" 80 | ] 81 | }, 82 | { 83 | "cell_type": "markdown", 84 | "metadata": { 85 | "slideshow": { 86 | "slide_type": "fragment" 87 | } 88 | }, 89 | "source": [ 90 | "
" 91 | ] 92 | }, 93 | { 94 | "cell_type": "markdown", 95 | "metadata": { 96 | "slideshow": { 97 | "slide_type": "fragment" 98 | } 99 | }, 100 | "source": [ 101 | "and how you close it (note that when `f` is destroyed, the file is closed automatically):" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": null, 107 | "metadata": { 108 | "slideshow": { 109 | "slide_type": "fragment" 110 | } 111 | }, 112 | "outputs": [], 113 | "source": [ 114 | "f.Close()" 115 | ] 116 | }, 117 | { 118 | "cell_type": "markdown", 119 | "metadata": { 120 | "slideshow": { 121 | "slide_type": "slide" 122 | } 123 | }, 124 | "source": [ 125 | "Note that you can use the Python context manager syntax for opening a TFile, which will automatically run the necessary cleanup for you:" 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": null, 131 | "metadata": { 132 | "slideshow": { 133 | "slide_type": "fragment" 134 | } 135 | }, 136 | "outputs": [], 137 | "source": [ 138 | "with ROOT.TFile(\"my_file.root\", \"RECREATE\") as myfile:\n", 139 | " # do something with the file inside the scope\n", 140 | " pass" 141 | ] 142 | }, 143 | { 144 | "cell_type": "markdown", 145 | "metadata": { 146 | "slideshow": { 147 | "slide_type": "slide" 148 | } 149 | }, 150 | "source": [ 151 | "With the following example we demonstrate how to write an object inside a file:" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": null, 157 | "metadata": { 158 | "slideshow": { 159 | "slide_type": "fragment" 160 | } 161 | }, 162 | "outputs": [], 163 | "source": [ 164 | "with ROOT.TFile(\"my_file.root\", \"RECREATE\") as f:\n", 165 | " h = ROOT.TH1D(\"my_histo\", \"Example histogram\", 100, -4, 4)\n", 166 | " f.WriteObject(h, h.GetName())" 167 | ] 168 | }, 169 | { 170 | "cell_type": "markdown", 171 | "metadata": { 172 | "slideshow": { 173 | "slide_type": "fragment" 174 | } 175 | }, 176 | "source": [ 177 | "\n", 178 | "The `\"my_histo\"` argument of the `TH1D` constructor is the name of the histogram, and it is also how it will be identified inside the file, we'll see that in a minute." 179 | ] 180 | }, 181 | { 182 | "cell_type": "markdown", 183 | "metadata": { 184 | "slideshow": { 185 | "slide_type": "slide" 186 | } 187 | }, 188 | "source": [ 189 | "We should now have a file called `my_file.root` in the current directory. We will check that by using the `%%bash` magic, which allows us to run bash commands from a cell:" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": null, 195 | "metadata": { 196 | "slideshow": { 197 | "slide_type": "fragment" 198 | } 199 | }, 200 | "outputs": [], 201 | "source": [ 202 | "%%bash\n", 203 | "ls -l my_file.root" 204 | ] 205 | }, 206 | { 207 | "cell_type": "markdown", 208 | "metadata": { 209 | "slideshow": { 210 | "slide_type": "fragment" 211 | } 212 | }, 213 | "source": [ 214 | "We can also use the `rootls` command to inspect the contents of the ROOT file. See how the file contains an object called `my_histo` of type `TH1D`." 215 | ] 216 | }, 217 | { 218 | "cell_type": "code", 219 | "execution_count": null, 220 | "metadata": { 221 | "slideshow": { 222 | "slide_type": "fragment" 223 | } 224 | }, 225 | "outputs": [], 226 | "source": [ 227 | "%%bash\n", 228 | "rootls -l my_file.root" 229 | ] 230 | }, 231 | { 232 | "cell_type": "markdown", 233 | "metadata": { 234 | "slideshow": { 235 | "slide_type": "slide" 236 | } 237 | }, 238 | "source": [ 239 | "Finally, let's see how we can programmatically retrieve the histogram we just wrote in the file. \n", 240 | "\n", 241 | "We can access the histogram by its name using `TFile::Get()`." 242 | ] 243 | }, 244 | { 245 | "cell_type": "code", 246 | "execution_count": null, 247 | "metadata": { 248 | "slideshow": { 249 | "slide_type": "fragment" 250 | } 251 | }, 252 | "outputs": [], 253 | "source": [ 254 | "with ROOT.TFile(\"my_file.root\") as f: # READ is the default mode\n", 255 | " h = f.Get(\"my_histo\")\n", 256 | " print(h)" 257 | ] 258 | }, 259 | { 260 | "cell_type": "markdown", 261 | "metadata": { 262 | "slideshow": { 263 | "slide_type": "slide" 264 | } 265 | }, 266 | "source": [ 267 | "# The HEP dataset\n", 268 | "\n", 269 | "High Energy Physics data is made of many statistically independent collision events. \n", 270 | "\n", 271 | "Laying data into an \"event class\", then serialise and write out `N` instances of the class into a file would be very inefficient. \n", 272 | "\n", 273 | "In ROOT, a dataset is organised columns that can store elements of any C++ type:\n", 274 | "* fundamental types: `int`, `float`\n", 275 | "* C++ standard collections: `std::vector`, `std::map`\n", 276 | "* User created C++ classes\n", 277 | "\n", 278 | "The ROOT dataset is represented by the `TTree` class and is often simply called a tree. Columns in the dataset are instances of the `TBranch` class (often referred to as \"branches\")." 279 | ] 280 | }, 281 | { 282 | "cell_type": "markdown", 283 | "metadata": { 284 | "slideshow": { 285 | "slide_type": "subslide" 286 | } 287 | }, 288 | "source": [ 289 | "
" 290 | ] 291 | }, 292 | { 293 | "cell_type": "markdown", 294 | "metadata": { 295 | "slideshow": { 296 | "slide_type": "subslide" 297 | } 298 | }, 299 | "source": [ 300 | "- A `TTree` dataset can be written to a `TFile` (just like any other C++ object). \n", 301 | "\n", 302 | "- The ROOT format is logically and physically (on disk) a columnar format. \n", 303 | "\n", 304 | "- Different columns can be read from disk independently. \n", 305 | "\n", 306 | "- This translates into faster IO performance with respect to other dataset formats (HDF5, SQL)." 307 | ] 308 | }, 309 | { 310 | "cell_type": "code", 311 | "execution_count": null, 312 | "metadata": { 313 | "slideshow": { 314 | "slide_type": "fragment" 315 | } 316 | }, 317 | "outputs": [], 318 | "source": [ 319 | "%%bash\n", 320 | "rootls -l ../../data/example_file.root" 321 | ] 322 | } 323 | ], 324 | "metadata": { 325 | "celltoolbar": "Slideshow", 326 | "kernelspec": { 327 | "display_name": "Python 3 (ipykernel)", 328 | "language": "python", 329 | "name": "python3" 330 | }, 331 | "language_info": { 332 | "codemirror_mode": { 333 | "name": "ipython", 334 | "version": 3 335 | }, 336 | "file_extension": ".py", 337 | "mimetype": "text/x-python", 338 | "name": "python", 339 | "nbconvert_exporter": "python", 340 | "pygments_lexer": "ipython3", 341 | "version": "3.11.9" 342 | } 343 | }, 344 | "nbformat": 4, 345 | "nbformat_minor": 4 346 | } 347 | -------------------------------------------------------------------------------- /course/notebooks/core/03-rdataframe-basics.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "slideshow": { 7 | "slide_type": "slide" 8 | } 9 | }, 10 | "source": [ 11 | "# ROOT RDataFrame\n", 12 | "\n", 13 | "[RDataFrame documentation](https://root.cern/doc/master/classROOT_1_1RDataFrame.html)\n", 14 | "\n", 15 | "- RDF is ROOT's high-level analysis interface. \n", 16 | "\n", 17 | "- Users define their analysis as a sequence of operations to be performed on the data-frame object; \n", 18 | "\n", 19 | " - the framework takes care of the management of the loop over entries as well as low-level details such as I/O and parallelisation.\n", 20 | "\n", 21 | "- RDataFrame provides methods to perform most common operations required by ROOT analyses: \n", 22 | "\n", 23 | " - at the same time, users can just as easily specify custom code that will be executed in the event loop.\n", 24 | "" 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "metadata": { 30 | "slideshow": { 31 | "slide_type": "slide" 32 | } 33 | }, 34 | "source": [ 35 | "# HEP data analysis with RDataFrame\n", 36 | "RDataFrame allows reading and writing trees, aiming at making HEP analysis easy to write and fast to perform." 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": null, 42 | "metadata": { 43 | "slideshow": { 44 | "slide_type": "subslide" 45 | } 46 | }, 47 | "outputs": [], 48 | "source": [ 49 | "import ROOT\n", 50 | "\n", 51 | "treename = \"dataset\"\n", 52 | "filename = \"../../data/example_file.root\"\n", 53 | "df = ROOT.RDataFrame(treename, filename)\n", 54 | "\n", 55 | "print(f\"Columns in the dataset: {df.GetColumnNames()}\")" 56 | ] 57 | }, 58 | { 59 | "cell_type": "markdown", 60 | "metadata": { 61 | "slideshow": { 62 | "slide_type": "subslide" 63 | } 64 | }, 65 | "source": [ 66 | "Now we can `Define` new quantities, `Filter` rows based on custom expressions and retrieve some data aggregations such as a `Count` and a `Mean`:" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": null, 72 | "metadata": { 73 | "scrolled": true, 74 | "slideshow": { 75 | "slide_type": "fragment" 76 | } 77 | }, 78 | "outputs": [], 79 | "source": [ 80 | "def1 = df.Define(\"c\", \"a+b\")\n", 81 | "\n", 82 | "fil1 = def1.Filter(\"c < 0.5\")\n", 83 | "\n", 84 | "count = fil1.Count()\n", 85 | "mean = fil1.Mean(\"c\")\n", 86 | "display = fil1.Display([\"a\",\"b\",\"c\"])\n", 87 | "\n", 88 | "print(f\"Number of rows after filter: {count.GetValue()}\")\n", 89 | "print(f\"Mean of column c after filter: {mean.GetValue()}\")\n", 90 | "print(\"Dataset contents:\")\n", 91 | "display.Print()" 92 | ] 93 | }, 94 | { 95 | "cell_type": "markdown", 96 | "metadata": { 97 | "slideshow": { 98 | "slide_type": "slide" 99 | } 100 | }, 101 | "source": [ 102 | "# Histograms with RDataFrame\n", 103 | "RDataFrame helps you streamline the creation and filling of histogram objects from your data. \n", 104 | "\n", 105 | "For example:" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": null, 111 | "metadata": { 112 | "slideshow": { 113 | "slide_type": "fragment" 114 | } 115 | }, 116 | "outputs": [], 117 | "source": [ 118 | "%jsroot on\n", 119 | "c = ROOT.TCanvas()\n", 120 | "h = df.Histo1D(\"vec1\")\n", 121 | "h.Draw()\n", 122 | "c.Draw()" 123 | ] 124 | }, 125 | { 126 | "cell_type": "markdown", 127 | "metadata": { 128 | "slideshow": { 129 | "slide_type": "subslide" 130 | } 131 | }, 132 | "source": [ 133 | "- `Histo1D` will create a one-dimensional histogram holding `double` values. \n", 134 | "\n", 135 | "- `Histo{2,3}D` do the same in higher dimensions. \n", 136 | "\n", 137 | "- These operations also accept a tuple with the same arguments that would be passed to the equivalent histogram object constructors. \n", 138 | "\n", 139 | "- For example:" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": null, 145 | "metadata": { 146 | "slideshow": { 147 | "slide_type": "subslide" 148 | } 149 | }, 150 | "outputs": [], 151 | "source": [ 152 | "histo_name = \"histo_name\"\n", 153 | "histo_title = \"histo_title\"\n", 154 | "nbinsx = 100\n", 155 | "xlow = -10\n", 156 | "xup = 10\n", 157 | "\n", 158 | "# The traditional TH1D constructor\n", 159 | "# ROOT.TH1D(histo_name, histo_title, nbinsx, xlow, xup)\n", 160 | "\n", 161 | "# With RDataFrame\n", 162 | "c = ROOT.TCanvas()\n", 163 | "h = df.Histo1D((histo_name, histo_title, nbinsx, xlow, xup), \"vec1\")\n", 164 | "h.Draw()\n", 165 | "c.Draw()" 166 | ] 167 | }, 168 | { 169 | "cell_type": "markdown", 170 | "metadata": { 171 | "slideshow": { 172 | "slide_type": "slide" 173 | } 174 | }, 175 | "source": [ 176 | "# Think about data-flow\n", 177 | "RDataFrame is built with a modular and flexible workflow in mind, summarised as follows:\n", 178 | "\n", 179 | "* build a data-frame object by specifying your data-set\n", 180 | "* apply a series of transformations to your data\n", 181 | " * filter (e.g. apply some cuts) or\n", 182 | " * define a new column (e.g. the result of an expensive computation on columns)\n", 183 | "* apply actions to the transformed data to produce results (e.g. fill a histogram)" 184 | ] 185 | }, 186 | { 187 | "cell_type": "markdown", 188 | "metadata": { 189 | "slideshow": { 190 | "slide_type": "fragment" 191 | } 192 | }, 193 | "source": [ 194 | "### Important Note!\n", 195 | "Make sure to **book all transformations and actions before** you access the contents of any of the results: this lets RDataFrame accumulate work and then produce all results at the same time, upon first access to any of them." 196 | ] 197 | }, 198 | { 199 | "cell_type": "code", 200 | "execution_count": null, 201 | "metadata": { 202 | "slideshow": { 203 | "slide_type": "subslide" 204 | } 205 | }, 206 | "outputs": [], 207 | "source": [ 208 | "df_wrong = ROOT.RDataFrame(treename, filename)\n", 209 | "\n", 210 | "h_a = df_wrong.Histo1D(\"a\")\n", 211 | "h_a_val = h_a.GetValue()\n", 212 | "\n", 213 | "h_b = df_wrong.Histo1D(\"b\")\n", 214 | "h_b_val = h_b.GetValue()\n", 215 | "\n", 216 | "h_vec1 = df_wrong.Histo1D(\"vec1\")\n", 217 | "h_vec1_val = h_vec1.GetValue()\n", 218 | "\n", 219 | "print(f\"The dataset was processed {df_wrong.GetNRuns()} times.\")\n" 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "execution_count": null, 225 | "metadata": { 226 | "slideshow": { 227 | "slide_type": "subslide" 228 | } 229 | }, 230 | "outputs": [], 231 | "source": [ 232 | "df_good = ROOT.RDataFrame(treename, filename)\n", 233 | "\n", 234 | "h_a = df_good.Histo1D(\"a\")\n", 235 | "h_b = df_good.Histo1D(\"b\")\n", 236 | "h_vec1 = df_good.Histo1D(\"vec1\")\n", 237 | "\n", 238 | "h_a_val = h_a.GetValue()\n", 239 | "h_b_val = h_b.GetValue()\n", 240 | "h_vec1_val = h_vec1.GetValue()\n", 241 | "\n", 242 | "print(f\"The dataset was processed {df_good.GetNRuns()} time.\")" 243 | ] 244 | }, 245 | { 246 | "cell_type": "markdown", 247 | "metadata": { 248 | "slideshow": { 249 | "slide_type": "slide" 250 | } 251 | }, 252 | "source": [ 253 | "# Operation categories in RDataFrame\n", 254 | "There are 3 main types of operations you can perform on RDataFrames:" 255 | ] 256 | }, 257 | { 258 | "cell_type": "code", 259 | "execution_count": null, 260 | "metadata": { 261 | "slideshow": { 262 | "slide_type": "skip" 263 | } 264 | }, 265 | "outputs": [], 266 | "source": [ 267 | "%%html\n", 268 | "" 272 | ] 273 | }, 274 | { 275 | "cell_type": "markdown", 276 | "metadata": { 277 | "slideshow": { 278 | "slide_type": "subslide" 279 | } 280 | }, 281 | "source": [ 282 | "**Transformations**: manipulate the dataset, return a modified RDataFrame for further processing.\n", 283 | "\n", 284 | "| Transformation | Description |\n", 285 | "|-------------------|------------------------------------------------------------|\n", 286 | "| Alias() | Introduce an alias for a particular column name. |\n", 287 | "| Define() | Creates a new column in the dataset. |\n", 288 | "| Filter() | Filter rows based on user-defined conditions. |" 289 | ] 290 | }, 291 | { 292 | "cell_type": "markdown", 293 | "metadata": { 294 | "slideshow": { 295 | "slide_type": "subslide" 296 | } 297 | }, 298 | "source": [ 299 | "**Actions**: aggregate (parts of) the dataset into a result.\n", 300 | "\n", 301 | "| Action | Description |\n", 302 | "|------------------------------------|--------------------------------------------------------------------------------------|\n", 303 | "| Count() | Return the number of events processed. |\n", 304 | "| Display() | Provides a printable object representing the dataset contents. |\n", 305 | "| Graph() | Fills a TGraph with the two columns provided. |\n", 306 | "| Histo1D(), Histo2D(), Histo3D() | Fill a one-, two-, three-dimensional histogram with the processed column values. |\n", 307 | "| Max(), Min() | Return the maximum(minimum) of processed column values. |\n", 308 | "| Snapshot() | Writes processed data-set to a new TTree. |\n", 309 | "| ... | ... " 310 | ] 311 | }, 312 | { 313 | "cell_type": "markdown", 314 | "metadata": { 315 | "slideshow": { 316 | "slide_type": "subslide" 317 | } 318 | }, 319 | "source": [ 320 | "**Queries**: these methods query information about your dataset and the RDataFrame status.\n", 321 | "\n", 322 | "| Operation | Description |\n", 323 | "|---------------------|------------------------------------------------------------------------------------------|\n", 324 | "| GetColumnNames() | Get the names of all the available columns of the dataset. |\n", 325 | "| GetColumnType() | Return the type of a given column as a string. |\n", 326 | "| SaveGraph() | Export the computation graph of an RDataFrame in graphviz format for easy inspection. |\n", 327 | "| ... | ... |" 328 | ] 329 | } 330 | ], 331 | "metadata": { 332 | "celltoolbar": "Slideshow", 333 | "kernelspec": { 334 | "display_name": "Python 3", 335 | "language": "python", 336 | "name": "python3" 337 | }, 338 | "language_info": { 339 | "codemirror_mode": { 340 | "name": "ipython", 341 | "version": 3 342 | }, 343 | "file_extension": ".py", 344 | "mimetype": "text/x-python", 345 | "name": "python", 346 | "nbconvert_exporter": "python", 347 | "pygments_lexer": "ipython3", 348 | "version": "3.11.9" 349 | } 350 | }, 351 | "nbformat": 4, 352 | "nbformat_minor": 2 353 | } 354 | -------------------------------------------------------------------------------- /course/notebooks/core/04-rdataframe-collections.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "6c20dcc6", 6 | "metadata": { 7 | "slideshow": { 8 | "slide_type": "slide" 9 | } 10 | }, 11 | "source": [ 12 | "# Working with collections and object selections\n", 13 | "\n", 14 | "- RDataFrame reads collections as the special type [ROOT::RVec](https://root.cern/doc/master/classROOT_1_1VecOps_1_1RVec.html) - e.g. a branch containing an array of floating point numbers can be read as a `ROOT::RVec`.\n", 15 | "\n", 16 | "- C-style arrays (with variable or static size), `std::vectors` and many other collection types can be read this way. \n", 17 | "\n", 18 | "- When reading ROOT data, column values of type `ROOT::RVec` perform no copy of the underlying array.\n", 19 | "\n", 20 | "- `RVec` is a container similar to `std::vector` (and can be used just like a `std::vector`) but it also offers a rich interface to operate on the array elements in a vectorised fashion, similarly to Python's NumPy arrays." 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": null, 26 | "id": "25b99067", 27 | "metadata": { 28 | "slideshow": { 29 | "slide_type": "fragment" 30 | } 31 | }, 32 | "outputs": [], 33 | "source": [ 34 | "import ROOT\n", 35 | "\n", 36 | "treename = \"myDataset\"\n", 37 | "filename = \"../../data/collections_dataset.root\"\n", 38 | "df = ROOT.RDataFrame(treename, filename)\n", 39 | "\n", 40 | "print(f\"Columns in the dataset: {df.GetColumnNames()}\")" 41 | ] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "id": "0f3633ae", 46 | "metadata": { 47 | "slideshow": { 48 | "slide_type": "subslide" 49 | } 50 | }, 51 | "source": [ 52 | "To quickly inspect the data we can export it as a dictionary of `numpy` arrays thanks to the `AsNumpy` RDataFrame method. \n", 53 | "\n", 54 | "Note that for each row, `E` is an array of values:" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": null, 60 | "id": "b4664f42", 61 | "metadata": { 62 | "scrolled": true, 63 | "slideshow": { 64 | "slide_type": "fragment" 65 | } 66 | }, 67 | "outputs": [], 68 | "source": [ 69 | "npy_dict = df.AsNumpy([\"E\"])\n", 70 | "\n", 71 | "for row, vec in enumerate(npy_dict[\"E\"]):\n", 72 | " print(f\"\\nRow {row} contains:\\n{vec}\")" 73 | ] 74 | }, 75 | { 76 | "cell_type": "markdown", 77 | "id": "b896f202", 78 | "metadata": { 79 | "slideshow": { 80 | "slide_type": "subslide" 81 | } 82 | }, 83 | "source": [ 84 | "### Define a new column with operations on RVecs" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": null, 90 | "id": "4f140a78", 91 | "metadata": { 92 | "slideshow": { 93 | "slide_type": "fragment" 94 | } 95 | }, 96 | "outputs": [], 97 | "source": [ 98 | "df1 = df.Define(\"good_pt\", \"sqrt(px*px + py*py)[E>100]\")" 99 | ] 100 | }, 101 | { 102 | "cell_type": "markdown", 103 | "id": "4ca3b396", 104 | "metadata": { 105 | "slideshow": { 106 | "slide_type": "fragment" 107 | } 108 | }, 109 | "source": [ 110 | "`sqrt(px*px + py*py)[E>100]`:\n", 111 | "- `px`, `py` and `E` are the columns, the elements of those columns are `RVec`s\n", 112 | "\n", 113 | "- Operations on `RVec`s, such as sum, product, sqrt, preserve the dimensionality of the array\n", 114 | "\n", 115 | "- `[E>100]` selects the elements of the array that satisfy the condition\n", 116 | "\n", 117 | "- `E > 100`: boolean expressions on `RVec`s such as `E > 100` return a mask, that is an array with information which values pass the selection (e.g. `[0, 1, 0, 0]` if only the second element satisfies the condition)" 118 | ] 119 | }, 120 | { 121 | "cell_type": "markdown", 122 | "id": "e283340e", 123 | "metadata": { 124 | "slideshow": { 125 | "slide_type": "subslide" 126 | } 127 | }, 128 | "source": [ 129 | "### Now we can plot the newly defined column values in a histogram" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": null, 135 | "id": "9795d4a7", 136 | "metadata": { 137 | "slideshow": { 138 | "slide_type": "fragment" 139 | } 140 | }, 141 | "outputs": [], 142 | "source": [ 143 | "c = ROOT.TCanvas()\n", 144 | "h = df1.Histo1D((\"pt\", \"pt\", 16, 0, 4), \"good_pt\")\n", 145 | "h.Draw()\n", 146 | "c.Draw()" 147 | ] 148 | } 149 | ], 150 | "metadata": { 151 | "celltoolbar": "Slideshow", 152 | "kernelspec": { 153 | "display_name": "Python 3", 154 | "language": "python", 155 | "name": "python3" 156 | }, 157 | "language_info": { 158 | "codemirror_mode": { 159 | "name": "ipython", 160 | "version": 3 161 | }, 162 | "file_extension": ".py", 163 | "mimetype": "text/x-python", 164 | "name": "python", 165 | "nbconvert_exporter": "python", 166 | "pygments_lexer": "ipython3", 167 | "version": "3.9.6" 168 | } 169 | }, 170 | "nbformat": 4, 171 | "nbformat_minor": 5 172 | } 173 | -------------------------------------------------------------------------------- /course/notebooks/core/05-rdataframe-features.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "cd70edc7", 6 | "metadata": { 7 | "slideshow": { 8 | "slide_type": "slide" 9 | } 10 | }, 11 | "source": [ 12 | "# Save datasets to a ROOT file after processing\n", 13 | "\n", 14 | "With RDataFrame, you can read your dataset, add new columns with processed values and finally use `Snapshot` to save the resulting data to a ROOT file in TTree format." 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": null, 20 | "id": "308c56f0", 21 | "metadata": { 22 | "slideshow": { 23 | "slide_type": "subslide" 24 | } 25 | }, 26 | "outputs": [], 27 | "source": [ 28 | "import ROOT\n", 29 | "\n", 30 | "df = ROOT.RDataFrame(\"dataset\",\"../../data/example_file.root\")\n", 31 | "df1 = df.Define(\"c\",\"a+b\")\n", 32 | "\n", 33 | "out_treename = \"outtree\"\n", 34 | "out_filename = \"outtree.root\"\n", 35 | "out_columns = [\"a\",\"b\",\"c\"]\n", 36 | "snapdf = df1.Snapshot(out_treename, out_filename, out_columns)" 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "id": "ecaaed15", 42 | "metadata": { 43 | "slideshow": { 44 | "slide_type": "subslide" 45 | } 46 | }, 47 | "source": [ 48 | "We can now check that the dataset was correctly stored in a file:" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": null, 54 | "id": "7ca9de7b", 55 | "metadata": { 56 | "slideshow": { 57 | "slide_type": "fragment" 58 | } 59 | }, 60 | "outputs": [], 61 | "source": [ 62 | "%%bash\n", 63 | "rootls -lt outtree.root" 64 | ] 65 | }, 66 | { 67 | "cell_type": "markdown", 68 | "id": "55b7bc7f", 69 | "metadata": { 70 | "slideshow": { 71 | "slide_type": "subslide" 72 | } 73 | }, 74 | "source": [ 75 | "Result of a Snapshot is still an RDataFrame that can be further used:" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": null, 81 | "id": "23f46a0b", 82 | "metadata": { 83 | "slideshow": { 84 | "slide_type": "fragment" 85 | } 86 | }, 87 | "outputs": [], 88 | "source": [ 89 | "snapdf.Display().Print()" 90 | ] 91 | }, 92 | { 93 | "cell_type": "markdown", 94 | "id": "d98928de", 95 | "metadata": { 96 | "slideshow": { 97 | "slide_type": "slide" 98 | } 99 | }, 100 | "source": [ 101 | "# Cutflow reports\n", 102 | "Filters applied to the dataset can be given a name. The `Report` method will gather information about filter efficiency and show the data flow between subsequent cuts on the original dataset.\n" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": null, 108 | "id": "d7610f52", 109 | "metadata": { 110 | "slideshow": { 111 | "slide_type": "fragment" 112 | } 113 | }, 114 | "outputs": [], 115 | "source": [ 116 | "df = ROOT.RDataFrame(\"sig_tree\", \"https://root.cern/files/Higgs_data.root\")\n", 117 | "\n", 118 | "filter1 = df.Filter(\"lepton_eta > 0\", \"Lepton eta cut\")\n", 119 | "filter2 = filter1.Filter(\"lepton_phi < 1\", \"Lepton phi cut\")\n", 120 | "\n", 121 | "rep = df.Report()\n", 122 | "rep.Print()" 123 | ] 124 | }, 125 | { 126 | "cell_type": "markdown", 127 | "id": "f3be5b9d", 128 | "metadata": { 129 | "slideshow": { 130 | "slide_type": "slide" 131 | } 132 | }, 133 | "source": [ 134 | "# Using C++ functions in Python\n", 135 | "- We still want to perform complex operations in Python but plain Python code is prone to be slow and not thread-safe. \n", 136 | "\n", 137 | "- Instead, you can inject C++ functions that will do the work in your event loop during runtime. \n", 138 | "\n", 139 | "- This mechanism uses the C++ interpreter `cling` shipped with ROOT, making this possible in a single line of code. \n", 140 | "\n", 141 | "- Let's start by defining a function that will allow us to change the type of a the RDataFrame dataset entry numbers (stored in the special column \"rdfentry\") from `unsigned long long` to `float`." 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": null, 147 | "id": "9a1bcee4", 148 | "metadata": { 149 | "slideshow": { 150 | "slide_type": "fragment" 151 | } 152 | }, 153 | "outputs": [], 154 | "source": [ 155 | "%%cpp\n", 156 | "\n", 157 | "float asfloat(unsigned long long entrynumber){\n", 158 | " return entrynumber;\n", 159 | "}" 160 | ] 161 | }, 162 | { 163 | "cell_type": "markdown", 164 | "id": "1b8f4bd1", 165 | "metadata": { 166 | "slideshow": { 167 | "slide_type": "subslide" 168 | } 169 | }, 170 | "source": [ 171 | "Then let's define another function that takes a `float` values and computes its square." 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": null, 177 | "id": "6d3a8b4f", 178 | "metadata": { 179 | "slideshow": { 180 | "slide_type": "fragment" 181 | } 182 | }, 183 | "outputs": [], 184 | "source": [ 185 | "%%cpp\n", 186 | "\n", 187 | "float square(float val){\n", 188 | " return val * val;\n", 189 | "}" 190 | ] 191 | }, 192 | { 193 | "cell_type": "markdown", 194 | "id": "90522e44", 195 | "metadata": { 196 | "slideshow": { 197 | "slide_type": "subslide" 198 | } 199 | }, 200 | "source": [ 201 | "And now let's use these functions with RDataFrame! \n", 202 | "\n", 203 | "We start by creating an empty RDataFrame with 100 consecutive entries and defining new columns on it:" 204 | ] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "execution_count": null, 209 | "id": "0edd70d3", 210 | "metadata": { 211 | "slideshow": { 212 | "slide_type": "fragment" 213 | } 214 | }, 215 | "outputs": [], 216 | "source": [ 217 | "# Create a new RDataFrame from scratch with 100 consecutive entries\n", 218 | "df = ROOT.RDataFrame(100)\n", 219 | "\n", 220 | "# Create a new column using the previously declared C++ functions\n", 221 | "df1 = df.Define(\"a\", \"asfloat(rdfentry_)\")\n", 222 | "df2 = df1.Define(\"b\", \"square(a)\")" 223 | ] 224 | }, 225 | { 226 | "cell_type": "markdown", 227 | "id": "5b1005d7", 228 | "metadata": { 229 | "slideshow": { 230 | "slide_type": "subslide" 231 | } 232 | }, 233 | "source": [ 234 | "We can now plot the values of the columns in a graph:" 235 | ] 236 | }, 237 | { 238 | "cell_type": "code", 239 | "execution_count": null, 240 | "id": "18a35cd0", 241 | "metadata": { 242 | "slideshow": { 243 | "slide_type": "fragment" 244 | } 245 | }, 246 | "outputs": [], 247 | "source": [ 248 | "# Show the two columns created in a graph\n", 249 | "c = ROOT.TCanvas()\n", 250 | "graph = df2.Graph(\"a\",\"b\")\n", 251 | "graph.SetMarkerStyle(20)\n", 252 | "graph.SetMarkerSize(0.5)\n", 253 | "graph.SetMarkerColor(ROOT.kBlue)\n", 254 | "graph.SetTitle(\"My graph\")\n", 255 | "graph.Draw(\"AP\")\n", 256 | "c.Draw()" 257 | ] 258 | }, 259 | { 260 | "cell_type": "markdown", 261 | "id": "072ae85d", 262 | "metadata": { 263 | "slideshow": { 264 | "slide_type": "slide" 265 | } 266 | }, 267 | "source": [ 268 | "# Using all cores of your machine with multi-threaded RDataFrame\n", 269 | "- RDataFrame can transparently perform multi-threaded event loops to speed up the execution of its actions. \n", 270 | "\n", 271 | "- Users have to call `ROOT::EnableImplicitMT()` before constructing the RDataFrame object to indicate that it should take advantage of a pool of worker threads. \n", 272 | "\n", 273 | "- Each worker thread processes a distinct subset of entries, and their partial results are merged before returning the final values to the user.\n", 274 | "\n", 275 | "- RDataFrame operations such as Histo1D or Snapshot are guaranteed to work correctly in multi-thread event loops. \n", 276 | "\n", 277 | "- User-defined expressions, such as strings or lambdas passed to `Filter`, `Define`, `Foreach`, `Reduce` or `Aggregate` will have to be thread-safe, i.e. it should be possible to call them concurrently from different threads." 278 | ] 279 | }, 280 | { 281 | "cell_type": "code", 282 | "execution_count": null, 283 | "id": "f2d4528b", 284 | "metadata": { 285 | "slideshow": { 286 | "slide_type": "subslide" 287 | } 288 | }, 289 | "outputs": [], 290 | "source": [ 291 | "%%time\n", 292 | "# Get a first baseline measurement\n", 293 | "\n", 294 | "treename = \"Events\"\n", 295 | "filename = \"root://eospublic.cern.ch//eos/opendata/cms/derived-data/AOD2NanoAODOutreachTool/Run2012BC_DoubleMuParked_Muons.root\"\n", 296 | "df = ROOT.RDataFrame(treename, filename)\n", 297 | "\n", 298 | "df.Sum(\"nMuon\").GetValue()" 299 | ] 300 | }, 301 | { 302 | "cell_type": "code", 303 | "execution_count": null, 304 | "id": "ec2afbc4", 305 | "metadata": { 306 | "slideshow": { 307 | "slide_type": "subslide" 308 | } 309 | }, 310 | "outputs": [], 311 | "source": [ 312 | "%%time\n", 313 | "# Activate multithreading capabilities\n", 314 | "# By default takes all available cores on the machine\n", 315 | "ROOT.EnableImplicitMT()\n", 316 | "\n", 317 | "treename = \"Events\"\n", 318 | "filename = \"root://eospublic.cern.ch//eos/opendata/cms/derived-data/AOD2NanoAODOutreachTool/Run2012BC_DoubleMuParked_Muons.root\"\n", 319 | "df = ROOT.RDataFrame(treename, filename)\n", 320 | "\n", 321 | "df.Sum(\"nMuon\").GetValue()\n", 322 | "\n", 323 | "# Disable implicit multithreading when done\n", 324 | "ROOT.DisableImplicitMT()" 325 | ] 326 | } 327 | ], 328 | "metadata": { 329 | "celltoolbar": "Slideshow", 330 | "kernelspec": { 331 | "display_name": "Python 3", 332 | "language": "python", 333 | "name": "python3" 334 | }, 335 | "language_info": { 336 | "codemirror_mode": { 337 | "name": "ipython", 338 | "version": 3 339 | }, 340 | "file_extension": ".py", 341 | "mimetype": "text/x-python", 342 | "name": "python", 343 | "nbconvert_exporter": "python", 344 | "pygments_lexer": "ipython3", 345 | "version": "3.9.6" 346 | } 347 | }, 348 | "nbformat": 4, 349 | "nbformat_minor": 5 350 | } 351 | -------------------------------------------------------------------------------- /course/notebooks/core/06-rdataframe-advanced.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "attachments": {}, 5 | "cell_type": "markdown", 6 | "id": "60ae38e3", 7 | "metadata": { 8 | "slideshow": { 9 | "slide_type": "slide" 10 | } 11 | }, 12 | "source": [ 13 | "# RDataFrame advanced features\n", 14 | "There are many more features available with the RDataFrame that might serve your analysis needs!" 15 | ] 16 | }, 17 | { 18 | "attachments": {}, 19 | "cell_type": "markdown", 20 | "id": "2e8e9ace", 21 | "metadata": { 22 | "slideshow": { 23 | "slide_type": "slide" 24 | } 25 | }, 26 | "source": [ 27 | "## Working with `numpy` arrays\n", 28 | "- RDataFrame offers interoperability with `numpy` arrays. \n", 29 | "\n", 30 | "- It can be created from a dictionary of such arrays and it can also export its contents to the same format. \n", 31 | "\n", 32 | "- All operations are available also when using the `numpy`-based dataset.\n", 33 | "\n", 34 | "- **Note:** this support is limited to one-dimensional numpy arrays, which are directly mapped to columns in the RDataFrame." 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "id": "128e586a", 41 | "metadata": { 42 | "slideshow": { 43 | "slide_type": "subslide" 44 | } 45 | }, 46 | "outputs": [], 47 | "source": [ 48 | "import numpy\n", 49 | "import ROOT\n", 50 | "\n", 51 | "np_dict = {colname: numpy.random.rand(100) for colname in [\"a\",\"b\",\"c\"]}\n", 52 | "\n", 53 | "df = ROOT.RDF.FromNumpy(np_dict)\n", 54 | "\n", 55 | "print(f\"Columns in the RDataFrame: {df.GetColumnNames()}\")" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": null, 61 | "id": "dde693ee", 62 | "metadata": { 63 | "slideshow": { 64 | "slide_type": "subslide" 65 | } 66 | }, 67 | "outputs": [], 68 | "source": [ 69 | "co = df.Count()\n", 70 | "m_a = df.Mean(\"a\")\n", 71 | "\n", 72 | "fil1 = df.Filter(\"c < 0.7\")\n", 73 | "def1 = fil1.Define(\"d\", \"a+b+c\")\n", 74 | "h = def1.Histo1D(\"d\")\n", 75 | "\n", 76 | "c = ROOT.TCanvas()\n", 77 | "h.Draw()\n", 78 | "\n", 79 | "print(f\"Number of rows in the dataset: {co.GetValue()}\")\n", 80 | "print(f\"Average value of column a: {m_a.GetValue()}\")\n", 81 | "c.Draw()" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": null, 87 | "id": "47a18efa", 88 | "metadata": { 89 | "scrolled": true, 90 | "slideshow": { 91 | "slide_type": "subslide" 92 | } 93 | }, 94 | "outputs": [], 95 | "source": [ 96 | "# Export the modified dataframe to a dictionary of numpy arrays\n", 97 | "\n", 98 | "np_dict_mod = def1.AsNumpy()\n", 99 | "\n", 100 | "np_dict_mod" 101 | ] 102 | }, 103 | { 104 | "attachments": {}, 105 | "cell_type": "markdown", 106 | "id": "b3fbd633", 107 | "metadata": { 108 | "slideshow": { 109 | "slide_type": "slide" 110 | } 111 | }, 112 | "source": [ 113 | "## Multiple concurrent RDataFrame runs\n", 114 | "If your analysis needs multiple RDataFrames to run (for example multiple dataset samples, data vs simulation etc.), make use of `ROOT.RDF.RunGraphs` " 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": null, 120 | "id": "f342906a", 121 | "metadata": { 122 | "slideshow": { 123 | "slide_type": "subslide" 124 | } 125 | }, 126 | "outputs": [], 127 | "source": [ 128 | "ROOT.EnableImplicitMT()\n", 129 | "treename1 = \"myDataset\"\n", 130 | "filename1 = \"../../data/collections_dataset.root\"\n", 131 | "treename2 = \"dataset\"\n", 132 | "filename2 = \"../../data/example_file.root\"\n", 133 | "\n", 134 | "df1 = ROOT.RDataFrame(treename1, filename1)\n", 135 | "df2 = ROOT.RDataFrame(treename2, filename2)\n", 136 | "h1 = df1.Histo1D(\"px\")\n", 137 | "h2 = df2.Histo1D(\"a\")\n", 138 | "\n", 139 | "\n", 140 | "ROOT.RDF.RunGraphs((h1, h2))\n", 141 | "ROOT.DisableImplicitMT()" 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": null, 147 | "id": "1b715694", 148 | "metadata": { 149 | "slideshow": { 150 | "slide_type": "subslide" 151 | } 152 | }, 153 | "outputs": [], 154 | "source": [ 155 | "c = ROOT.TCanvas()\n", 156 | "h1.Draw()\n", 157 | "c.Draw()" 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": null, 163 | "id": "795a5420", 164 | "metadata": { 165 | "slideshow": { 166 | "slide_type": "subslide" 167 | } 168 | }, 169 | "outputs": [], 170 | "source": [ 171 | "c = ROOT.TCanvas()\n", 172 | "h2.Draw()\n", 173 | "c.Draw()" 174 | ] 175 | }, 176 | { 177 | "attachments": {}, 178 | "cell_type": "markdown", 179 | "id": "799b981e", 180 | "metadata": { 181 | "slideshow": { 182 | "slide_type": "slide" 183 | } 184 | }, 185 | "source": [ 186 | "# Re-defining values of a column\n", 187 | "\n", 188 | "Sometimes you might want to keep a column with the same name, but modify its values. This is available via the `Redefine` method." 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": null, 194 | "id": "374e35a3", 195 | "metadata": { 196 | "slideshow": { 197 | "slide_type": "subslide" 198 | } 199 | }, 200 | "outputs": [], 201 | "source": [ 202 | "df = ROOT.RDataFrame(5).Define(\"x\", \"42\")\n", 203 | "df.Display().Print()" 204 | ] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "execution_count": null, 209 | "id": "a7a7a553", 210 | "metadata": { 211 | "slideshow": { 212 | "slide_type": "subslide" 213 | } 214 | }, 215 | "outputs": [], 216 | "source": [ 217 | "df = df.Redefine(\"x\", \"x * 10\")\n", 218 | "df.Display().Print()" 219 | ] 220 | }, 221 | { 222 | "attachments": {}, 223 | "cell_type": "markdown", 224 | "id": "d820208f", 225 | "metadata": { 226 | "slideshow": { 227 | "slide_type": "subslide" 228 | } 229 | }, 230 | "source": [ 231 | "Notice how we could use the previous values of the `\"x\"` column to define the new values." 232 | ] 233 | }, 234 | { 235 | "attachments": {}, 236 | "cell_type": "markdown", 237 | "id": "cc296f7d", 238 | "metadata": { 239 | "slideshow": { 240 | "slide_type": "slide" 241 | } 242 | }, 243 | "source": [ 244 | "# Distributed RDataFrame\n", 245 | "\n", 246 | "An `RDataFrame` analysis written in Python can be executed both *locally* - possibly in parallel on the cores of the machine - and *distributedly* by offloading computations to external resources, which include:\n", 247 | "\n", 248 | "- [Spark](https://spark.apache.org/) and \n", 249 | "- [Dask](https://dask.org/) clusters. \n", 250 | "\n", 251 | "- This feature is enabled by the architecture depicted below.\n", 252 | "\n", 253 | "- It shows that RDataFrame computation graphs can be mapped to different kinds of resources via backends.\n", 254 | "\n", 255 | "- In this notebook we will exercise the Dask backend, which divides an `RDataFrame` input dataset in logical ranges and submits computations for each of those ranges to Dask resources.\n", 256 | "\n", 257 | "\"Distributed" 258 | ] 259 | }, 260 | { 261 | "attachments": {}, 262 | "cell_type": "markdown", 263 | "id": "c2731e8a", 264 | "metadata": { 265 | "slideshow": { 266 | "slide_type": "slide" 267 | } 268 | }, 269 | "source": [ 270 | "## Create a Dask client\n", 271 | "\n", 272 | "- In order to work with a Dask cluster we need a `Client` object.\n", 273 | "- It represents the connection to that cluster and allows to configure execution-related parameters (e.g. number of cores, memory). \n", 274 | "- The client object is just the intermediary between our client session and the cluster resources, \n", 275 | "- Dask supports many different resource managers.\n", 276 | "- We will follow the [Dask documentation](https://distributed.dask.org/en/stable/client.html) regarding the creation of a `Client`." 277 | ] 278 | }, 279 | { 280 | "cell_type": "code", 281 | "execution_count": null, 282 | "id": "a4e156e5", 283 | "metadata": { 284 | "slideshow": { 285 | "slide_type": "fragment" 286 | } 287 | }, 288 | "outputs": [], 289 | "source": [ 290 | "from dask.distributed import Client, LocalCluster\n", 291 | "cluster = LocalCluster(n_workers=2, threads_per_worker=1, processes=True, memory_limit=\"2GiB\")\n", 292 | "client = Client(cluster)" 293 | ] 294 | }, 295 | { 296 | "attachments": {}, 297 | "cell_type": "markdown", 298 | "id": "601ffed1", 299 | "metadata": { 300 | "slideshow": { 301 | "slide_type": "slide" 302 | } 303 | }, 304 | "source": [ 305 | "## Create a ROOT dataframe\n", 306 | "\n", 307 | "We now create an RDataFrame based on the same dataset seen in the exercise [rdataframe-dimuon](exercises/rdataframe-dimuon.ipynb).\n", 308 | "\n", 309 | "A Dask `RDataFrame` receives two extra parameters: \n", 310 | "- the number of partitions to apply to the dataset (`npartitions`)\n", 311 | "- the `Client` object (`daskclient`). \n", 312 | "\n", 313 | "Besides this detail, a Dask `RDataFrame` is not different from a local `RDataFrame`: the analysis presented in this notebook would not change if we wanted to execute it locally." 314 | ] 315 | }, 316 | { 317 | "cell_type": "code", 318 | "execution_count": null, 319 | "id": "405cf12f", 320 | "metadata": { 321 | "slideshow": { 322 | "slide_type": "fragment" 323 | } 324 | }, 325 | "outputs": [], 326 | "source": [ 327 | "# Use a Dask RDataFrame\n", 328 | "RDataFrame = ROOT.RDF.Experimental.Distributed.Dask.RDataFrame\n", 329 | "\n", 330 | "df = RDataFrame(\"h42\",\n", 331 | " \"https://root.cern/files/h1big.root\",\n", 332 | " npartitions=4,\n", 333 | " daskclient=client)" 334 | ] 335 | }, 336 | { 337 | "attachments": {}, 338 | "cell_type": "markdown", 339 | "id": "563a28e4", 340 | "metadata": { 341 | "slideshow": { 342 | "slide_type": "slide" 343 | } 344 | }, 345 | "source": [ 346 | "## Run your analysis unchanged\n", 347 | "\n", 348 | "- From now on, the rest of your application can be written **exactly** as we have seen with local RDataFrame. \n", 349 | "\n", 350 | "- The goal of the distributed RDataFrame module is to support all the traditional RDataFrame operations (those that make sense in a distributed context at least). \n", 351 | "\n", 352 | "- Currently only a subset of those is available and can be found in the corresponding [section of the documentation](https://root.cern/doc/master/classROOT_1_1RDataFrame.html#distrdf)" 353 | ] 354 | }, 355 | { 356 | "cell_type": "code", 357 | "execution_count": null, 358 | "id": "43562f8d", 359 | "metadata": { 360 | "slideshow": { 361 | "slide_type": "subslide" 362 | } 363 | }, 364 | "outputs": [], 365 | "source": [ 366 | "%%time\n", 367 | "df1 = df.Filter(\"nevent > 1\")\n", 368 | "df2 = df1.Define(\"mpt\",\"sqrt(xpt*xpt + ypt*ypt)\")\n", 369 | "c = df.Count()\n", 370 | "m = df2.Mean(\"mpt\")\n", 371 | "print(f\"Number of events after processing: {c.GetValue()}\")\n", 372 | "print(f\"Mean of column 'mpt': {m.GetValue()}\")" 373 | ] 374 | } 375 | ], 376 | "metadata": { 377 | "celltoolbar": "Slideshow", 378 | "kernelspec": { 379 | "display_name": "Python 3 (ipykernel)", 380 | "language": "python", 381 | "name": "python3" 382 | }, 383 | "language_info": { 384 | "codemirror_mode": { 385 | "name": "ipython", 386 | "version": 3 387 | }, 388 | "file_extension": ".py", 389 | "mimetype": "text/x-python", 390 | "name": "python", 391 | "nbconvert_exporter": "python", 392 | "pygments_lexer": "ipython3", 393 | "version": "3.11.3" 394 | } 395 | }, 396 | "nbformat": 4, 397 | "nbformat_minor": 5 398 | } 399 | -------------------------------------------------------------------------------- /course/notebooks/extra/extra-00-setup.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "slideshow": { 7 | "slide_type": "slide" 8 | } 9 | }, 10 | "source": [ 11 | "# Working with ROOT during this course" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": { 17 | "slideshow": { 18 | "slide_type": "slide" 19 | } 20 | }, 21 | "source": [ 22 | "**NO NEED to install anything!** \n", 23 | "\n", 24 | "Two options to follow the course:\n", 25 | " * [SWAN](https://swan.cern.ch): CERN's notebook service, **requires CERN account**\n", 26 | " * [Binder](https://mybinder.org/): public notebook service, no account needed" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "metadata": { 32 | "slideshow": { 33 | "slide_type": "slide" 34 | } 35 | }, 36 | "source": [ 37 | "## SWAN (Service for Web-based ANalysis)\n", 38 | "https://swan.cern.ch" 39 | ] 40 | }, 41 | { 42 | "cell_type": "markdown", 43 | "metadata": { 44 | "slideshow": { 45 | "slide_type": "subslide" 46 | } 47 | }, 48 | "source": [ 49 | "### What is SWAN?\n", 50 | "\n", 51 | "**SWAN (Service for Web-based ANalysis)** is a CERN service that allows users to perform interactive data analysis in the cloud, following a \"software as a service\" model. It is built upon the widely-used Jupyter notebooks, which allows users to write - and run - their data analyses using only a web browser.\n", 52 | "\n", 53 | "By connecting to SWAN, users have immediate access to the CERN storage, software and computing resources they need to do their analyses!" 54 | ] 55 | }, 56 | { 57 | "cell_type": "markdown", 58 | "metadata": { 59 | "slideshow": { 60 | "slide_type": "subslide" 61 | } 62 | }, 63 | "source": [ 64 | "
" 65 | ] 66 | }, 67 | { 68 | "cell_type": "markdown", 69 | "metadata": { 70 | "slideshow": { 71 | "slide_type": "subslide" 72 | } 73 | }, 74 | "source": [ 75 | "
" 76 | ] 77 | }, 78 | { 79 | "cell_type": "markdown", 80 | "metadata": { 81 | "slideshow": { 82 | "slide_type": "subslide" 83 | } 84 | }, 85 | "source": [ 86 | "
" 87 | ] 88 | }, 89 | { 90 | "cell_type": "markdown", 91 | "metadata": { 92 | "slideshow": { 93 | "slide_type": "subslide" 94 | } 95 | }, 96 | "source": [ 97 | "
" 98 | ] 99 | }, 100 | { 101 | "cell_type": "markdown", 102 | "metadata": { 103 | "slideshow": { 104 | "slide_type": "subslide" 105 | } 106 | }, 107 | "source": [ 108 | "
" 109 | ] 110 | }, 111 | { 112 | "cell_type": "markdown", 113 | "metadata": { 114 | "slideshow": { 115 | "slide_type": "subslide" 116 | } 117 | }, 118 | "source": [ 119 | "Useful links:\n", 120 | " \n", 121 | "- SWAN help (also available in the service itself): https://github.com/swan-cern/help\n", 122 | "- SWAN galleries (notebook examples): https://swan-gallery.web.cern.ch\n", 123 | "- SWAN community forum: https://swan-community.web.cern.ch" 124 | ] 125 | }, 126 | { 127 | "cell_type": "markdown", 128 | "metadata": { 129 | "slideshow": { 130 | "slide_type": "subslide" 131 | } 132 | }, 133 | "source": [ 134 | "### Working with SWAN for this course\n", 135 | "\n", 136 | "The material for this course is available at [this repository](https://github.com/root-project/software-carpentry).\n", 137 | "\n", 138 | "In order to open the course material with SWAN:\n", 139 | "1. Make sure you log in to CERNBox at least once: https://cernbox.cern.ch\n", 140 | "2. Open the Github repository where the material is: https://github.com/root-project/software-carpentry\n", 141 | "3. Click on the **\"Open in SWAN\"** badge\n", 142 | "4. Start your SWAN session (the default values of the form are ok)\n", 143 | "\n", 144 | "Once the steps above are completed, SWAN will download the course material into a SWAN project in your CERNBox space and open that project. SWAN already provides a ROOT installation that you can use, so no need to worry about that!" 145 | ] 146 | }, 147 | { 148 | "cell_type": "markdown", 149 | "metadata": { 150 | "slideshow": { 151 | "slide_type": "slide" 152 | } 153 | }, 154 | "source": [ 155 | "## Binder\n", 156 | "https://mybinder.org" 157 | ] 158 | }, 159 | { 160 | "cell_type": "markdown", 161 | "metadata": { 162 | "slideshow": { 163 | "slide_type": "subslide" 164 | } 165 | }, 166 | "source": [ 167 | "### What is Binder?\n", 168 | "\n", 169 | "Binder is a public service that allows you to edit and run Jupyter notebooks.\n", 170 | "\n", 171 | "It does not require any credentials. Temporary user sessions are created on demand." 172 | ] 173 | }, 174 | { 175 | "cell_type": "markdown", 176 | "metadata": { 177 | "slideshow": { 178 | "slide_type": "subslide" 179 | } 180 | }, 181 | "source": [ 182 | "### A Binder repository\n", 183 | "\n", 184 | "In order to open your notebooks with Binder, you need to add them to a public code repository (e.g. on GitHub, GitLab or Bitbucket). Such repository must contain at least two things:\n", 185 | "\n", 186 | "* **Code** or content that you would like people to run, e.g. notebooks.\n", 187 | "* **Configuration files**: used by Binder to build the environment needed to run your code." 188 | ] 189 | }, 190 | { 191 | "cell_type": "markdown", 192 | "metadata": { 193 | "slideshow": { 194 | "slide_type": "subslide" 195 | } 196 | }, 197 | "source": [ 198 | "
" 199 | ] 200 | }, 201 | { 202 | "cell_type": "markdown", 203 | "metadata": { 204 | "slideshow": { 205 | "slide_type": "subslide" 206 | } 207 | }, 208 | "source": [ 209 | "### Working with Binder for this course\n", 210 | "\n", 211 | "The material for this course is available at [this repository](https://github.com/root-project/software-carpentry).\n", 212 | "\n", 213 | "In order to open the course material with Binder:\n", 214 | "1. Open the Github repository where the material is: https://github.com/root-project/software-carpentry\n", 215 | "2. Click on the **\"launch binder\"** badge\n", 216 | "\n", 217 | "Once the steps above are completed, Binder will create a temporary Jupyter session for you and open the contents of the repository. Note that the storage of this temporary session is **volatile**: any modifications made to the notebooks will disappear after the session ends.\n", 218 | "\n", 219 | "The repository with the course material already contains an `environment.xml` configuration file for Binder to install the ROOT conda distribution, so no need to worry about that!" 220 | ] 221 | }, 222 | { 223 | "cell_type": "markdown", 224 | "metadata": { 225 | "slideshow": { 226 | "slide_type": "slide" 227 | } 228 | }, 229 | "source": [ 230 | "## Installing ROOT locally\n", 231 | "\n", 232 | "For people interested in having a local installation of ROOT to use **after this course**, please visit the following link\n", 233 | "\n", 234 | "https://root.cern/install\n", 235 | "\n", 236 | "for information about the different available installation options." 237 | ] 238 | } 239 | ], 240 | "metadata": { 241 | "celltoolbar": "Slideshow", 242 | "kernelspec": { 243 | "display_name": "Python 3 (ipykernel)", 244 | "language": "python", 245 | "name": "python3" 246 | }, 247 | "language_info": { 248 | "codemirror_mode": { 249 | "name": "ipython", 250 | "version": 3 251 | }, 252 | "file_extension": ".py", 253 | "mimetype": "text/x-python", 254 | "name": "python", 255 | "nbconvert_exporter": "python", 256 | "pygments_lexer": "ipython3", 257 | "version": "3.9.6" 258 | } 259 | }, 260 | "nbformat": 4, 261 | "nbformat_minor": 2 262 | } 263 | -------------------------------------------------------------------------------- /course/notebooks/extra/extra-01-jupyter.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "slideshow": { 7 | "slide_type": "slide" 8 | } 9 | }, 10 | "source": [ 11 | "# The Jupyter Notebook\n" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": { 17 | "slideshow": { 18 | "slide_type": "slide" 19 | } 20 | }, 21 | "source": [ 22 | "### Interactive computing\n", 23 | "\n", 24 | "The Jupyter Notebook is an **interactive computing environment** that enables users to author notebook documents that include: \n", 25 | "- Live code\n", 26 | "- Interactive widgets\n", 27 | "- Plots\n", 28 | "- Narrative text\n", 29 | "- Equations\n", 30 | "- Images\n", 31 | "- Video\n", 32 | "\n", 33 | "These documents provide a **complete and self-contained record of a computation** that can be converted to various formats and shared with others using email, [Dropbox](https://www.dropbox.com/), version control systems (like git/[GitHub](https://github.com)) or [nbviewer.jupyter.org](https://nbviewer.jupyter.org)." 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": { 39 | "slideshow": { 40 | "slide_type": "slide" 41 | } 42 | }, 43 | "source": [ 44 | "### Multiple languages\n", 45 | "\n", 46 | "Through Jupyter's kernel and messaging architecture, the Notebook allows code to be run in a range of different programming languages. For each notebook document that a user opens, the web application starts a kernel that runs the code for that notebook. There are kernels available in the following languages:\n", 47 | "* Python(https://github.com/ipython/ipython)\n", 48 | "* C++ (https://github.com/root-project/root/)\n", 49 | "* Julia (https://github.com/JuliaLang/IJulia.jl)\n", 50 | "* R (https://github.com/IRkernel/IRkernel)\n", 51 | "* Ruby (https://github.com/minrk/iruby)\n", 52 | "* Scala (https://github.com/Bridgewater/scala-notebook)\n", 53 | "* node.js (https://gist.github.com/Carreau/4279371)\n", 54 | "* Go (https://github.com/takluyver/igo)" 55 | ] 56 | }, 57 | { 58 | "cell_type": "markdown", 59 | "metadata": { 60 | "slideshow": { 61 | "slide_type": "slide" 62 | } 63 | }, 64 | "source": [ 65 | "### The notebook document\n", 66 | "\n", 67 | "Notebooks consist of a **linear sequence of cells**. There are three basic cell types:\n", 68 | "\n", 69 | "* **Code cells:** Input and output of live code that is run in the kernel\n", 70 | "* **Markdown cells:** Narrative text with embedded LaTeX equations\n", 71 | "* **Raw cells:** Unformatted text that is included, without modification, when notebooks are converted to different formats using nbconvert\n", 72 | "\n", 73 | "Internally, notebook documents are [JSON](https://en.wikipedia.org/wiki/JSON) **data** with **binary values** [base64](https://en.wikipedia.org/wiki/Base64) encoded. This allows them to be **read and manipulated programmatically** by any programming language. Because JSON is a text format, notebook documents are version control friendly.\n", 74 | "\n", 75 | "Tip: **shift + enter** to run a cell!" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": null, 81 | "metadata": { 82 | "slideshow": { 83 | "slide_type": "subslide" 84 | } 85 | }, 86 | "outputs": [], 87 | "source": [ 88 | "# Write code and execute it, produce text output\n", 89 | "print(\"This is code!\")" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": null, 95 | "metadata": { 96 | "slideshow": { 97 | "slide_type": "subslide" 98 | } 99 | }, 100 | "outputs": [], 101 | "source": [ 102 | "# Display an image\n", 103 | "from IPython.display import Image\n", 104 | "Image(filename='../../images/cern-logo.png') " 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": null, 110 | "metadata": { 111 | "slideshow": { 112 | "slide_type": "subslide" 113 | } 114 | }, 115 | "outputs": [], 116 | "source": [ 117 | "import ipywidgets as widgets\n", 118 | "widgets.Dropdown(\n", 119 | " options=['1', '2', '3'],\n", 120 | " value='2',\n", 121 | " description='Number:',\n", 122 | " disabled=False,\n", 123 | ")" 124 | ] 125 | }, 126 | { 127 | "cell_type": "markdown", 128 | "metadata": { 129 | "slideshow": { 130 | "slide_type": "subslide" 131 | } 132 | }, 133 | "source": [ 134 | "Write _formatted_ **markdown text**" 135 | ] 136 | }, 137 | { 138 | "cell_type": "markdown", 139 | "metadata": { 140 | "slideshow": { 141 | "slide_type": "slide" 142 | } 143 | }, 144 | "source": [ 145 | "### Use cases\n", 146 | "\n", 147 | "* Programming and Computer Science\n", 148 | "* Statistics, Machine Learning and Data Science\n", 149 | "* Mathematics, Physics, Chemistry, Biology\n", 150 | "* Earth Science and Geo-Spatial data\n", 151 | "* Linguistics and Text Mining\n", 152 | "* Signal Processing, Engineering Education\n", 153 | "* and even Psychology and Neuroscience" 154 | ] 155 | }, 156 | { 157 | "cell_type": "markdown", 158 | "metadata": { 159 | "slideshow": { 160 | "slide_type": "slide" 161 | } 162 | }, 163 | "source": [ 164 | "For more information, please visit: https://jupyter-notebook.readthedocs.io\n", 165 | "\n", 166 | "Notebook examples available at: https://github.com/jupyter/jupyter/wiki/A-gallery-of-interesting-Jupyter-Notebooks" 167 | ] 168 | } 169 | ], 170 | "metadata": { 171 | "celltoolbar": "Slideshow", 172 | "kernelspec": { 173 | "display_name": "Python 3", 174 | "language": "python", 175 | "name": "python3" 176 | }, 177 | "language_info": { 178 | "codemirror_mode": { 179 | "name": "ipython", 180 | "version": 3 181 | }, 182 | "file_extension": ".py", 183 | "mimetype": "text/x-python", 184 | "name": "python", 185 | "nbconvert_exporter": "python", 186 | "pygments_lexer": "ipython3", 187 | "version": "3.8.6" 188 | } 189 | }, 190 | "nbformat": 4, 191 | "nbformat_minor": 2 192 | } 193 | -------------------------------------------------------------------------------- /course/notebooks/extra/extra-02-root-python-cpp.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "slideshow": { 7 | "slide_type": "slide" 8 | } 9 | }, 10 | "source": [ 11 | "# Using ROOT to bind Python and C++" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": { 17 | "slideshow": { 18 | "slide_type": "slide" 19 | } 20 | }, 21 | "source": [ 22 | "## What is PyROOT?\n", 23 | "\n", 24 | "* **PyROOT** is the name of the Python bindings offered by ROOT\n", 25 | "* All the ROOT C++ functions and classes are accessible from Python via PyROOT\n", 26 | " * Python façade, C++ performance\n", 27 | "* But PyROOT is not just for ROOT!\n", 28 | " * It can also call into user-defined C++ code" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": { 34 | "slideshow": { 35 | "slide_type": "slide" 36 | } 37 | }, 38 | "source": [ 39 | "## How does PyROOT work?\n", 40 | "\n", 41 | "* PyROOT is a special type of bindings, since it's **automatic** and **dynamic**\n", 42 | " * No static wrapper generation\n", 43 | " * Dynamic python proxies are created for C++ entities\n", 44 | " * Lazy class/variable lookup\n", 45 | "* Powered by [cppyy](https://cppyy.readthedocs.io/), the ROOT type system and [Cling](https://root.cern/cling/)\n", 46 | " * Reflection information\n", 47 | " * JIT C++ compilation and execution\n", 48 | "* And on top of the automatic bindings: **pythonizations**\n", 49 | " * To make the use of C++ from Python simpler, more pythonic" 50 | ] 51 | }, 52 | { 53 | "cell_type": "markdown", 54 | "metadata": { 55 | "slideshow": { 56 | "slide_type": "slide" 57 | } 58 | }, 59 | "source": [ 60 | "## Using ROOT from Python\n", 61 | "\n", 62 | "To start working with PyROOT, we need to import the ROOT module." 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": null, 68 | "metadata": { 69 | "slideshow": { 70 | "slide_type": "fragment" 71 | } 72 | }, 73 | "outputs": [], 74 | "source": [ 75 | "import ROOT" 76 | ] 77 | }, 78 | { 79 | "cell_type": "markdown", 80 | "metadata": { 81 | "slideshow": { 82 | "slide_type": "fragment" 83 | } 84 | }, 85 | "source": [ 86 | "The ROOT Python module is the entry point for all the ROOT C++ functionality.\n", 87 | "\n", 88 | "For example, we can create a histogram with ROOT using the `TH1D` C++ class from Python:" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": null, 94 | "metadata": { 95 | "slideshow": { 96 | "slide_type": "fragment" 97 | } 98 | }, 99 | "outputs": [], 100 | "source": [ 101 | "h = ROOT.TH1D(\"my_histo\", \"Example histogram\", 100, -4, 4)" 102 | ] 103 | }, 104 | { 105 | "cell_type": "markdown", 106 | "metadata": { 107 | "slideshow": { 108 | "slide_type": "slide" 109 | } 110 | }, 111 | "source": [ 112 | "## Calling user-defined C++ code via PyROOT\n", 113 | "\n", 114 | "We've seen how PyROOT allows to access all the functions and classes that the ROOT C++ libraries define.\n", 115 | "\n", 116 | "In addition, it is possible to make PyROOT call into user-defined C++. For example, it is possible to declare a C++ function, as it is done below by passing its code as a string argument of the `ProcessLine` function:" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": null, 122 | "metadata": { 123 | "slideshow": { 124 | "slide_type": "fragment" 125 | } 126 | }, 127 | "outputs": [], 128 | "source": [ 129 | "ROOT.gInterpreter.ProcessLine(\"\"\"\n", 130 | "double add(double a, double b) {\n", 131 | " return a + b;\n", 132 | "}\n", 133 | "\"\"\")" 134 | ] 135 | }, 136 | { 137 | "cell_type": "markdown", 138 | "metadata": { 139 | "slideshow": { 140 | "slide_type": "fragment" 141 | } 142 | }, 143 | "source": [ 144 | "and use it right away from Python:" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": null, 150 | "metadata": { 151 | "slideshow": { 152 | "slide_type": "fragment" 153 | } 154 | }, 155 | "outputs": [], 156 | "source": [ 157 | "ROOT.add(3.14, 100)" 158 | ] 159 | }, 160 | { 161 | "cell_type": "markdown", 162 | "metadata": { 163 | "slideshow": { 164 | "slide_type": "subslide" 165 | } 166 | }, 167 | "source": [ 168 | "### What about code in C++ libraries?\n", 169 | "\n", 170 | "In the example we just saw, the user-defined C++ code is contained in strings in our program, but PyROOT can also load and call into C++ libraries. This enables you to write high-performance C++, compile it and use it from Python.\n", 171 | "\n", 172 | "More information can be found [here](https://root.cern/manual/python/#loading-user-libraries-and-just-in-time-compilation-jitting)." 173 | ] 174 | }, 175 | { 176 | "cell_type": "markdown", 177 | "metadata": { 178 | "slideshow": { 179 | "slide_type": "slide" 180 | } 181 | }, 182 | "source": [ 183 | "## Type conversions\n", 184 | "\n", 185 | "When calling C++ from Python via PyROOT, there needs to be a conversion between the Python arguments we pass and the C++ arguments that the C++ side expects. PyROOT takes care of such conversion automatically, for example from Python integer to C++ integer:" 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": null, 191 | "metadata": { 192 | "slideshow": { 193 | "slide_type": "fragment" 194 | } 195 | }, 196 | "outputs": [], 197 | "source": [ 198 | "ROOT.gInterpreter.ProcessLine(\"void print_integer(int i) { std::cout << i << std::endl; }\")\n", 199 | "\n", 200 | "ROOT.print_integer(7)" 201 | ] 202 | }, 203 | { 204 | "cell_type": "markdown", 205 | "metadata": { 206 | "slideshow": { 207 | "slide_type": "fragment" 208 | } 209 | }, 210 | "source": [ 211 | "Of course not every conversion is allowed!" 212 | ] 213 | }, 214 | { 215 | "cell_type": "code", 216 | "execution_count": null, 217 | "metadata": { 218 | "slideshow": { 219 | "slide_type": "fragment" 220 | } 221 | }, 222 | "outputs": [], 223 | "source": [ 224 | "ROOT.print_integer([]) # fails with TypeError" 225 | ] 226 | }, 227 | { 228 | "cell_type": "markdown", 229 | "metadata": { 230 | "slideshow": { 231 | "slide_type": "slide" 232 | } 233 | }, 234 | "source": [ 235 | "An example of a useful allowed conversion is Python list to `std::vector`:" 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "execution_count": null, 241 | "metadata": { 242 | "slideshow": { 243 | "slide_type": "fragment" 244 | } 245 | }, 246 | "outputs": [], 247 | "source": [ 248 | "ROOT.gInterpreter.ProcessLine(\"\"\"\n", 249 | "void print_vector(const std::vector &v) {\n", 250 | " for (auto &s : v) {\n", 251 | " std::cout << s << std::endl;\n", 252 | " }\n", 253 | "}\n", 254 | "\"\"\")\n", 255 | "\n", 256 | "ROOT.print_vector(['Two', 'Words'])" 257 | ] 258 | }, 259 | { 260 | "cell_type": "markdown", 261 | "metadata": { 262 | "slideshow": { 263 | "slide_type": "slide" 264 | } 265 | }, 266 | "source": [ 267 | "## A final note on performance\n", 268 | "\n", 269 | "Being able to call into C++ from Python does not guarantee that the performance of your Python script will always be the best, no matter what code you write!\n", 270 | "\n", 271 | "In general, any heavy computation should be pushed to C++, e.g. encapsulating it in some C++ function that you call from Python or relying on libraries with fast C/C++ implementations (e.g. ROOT, NumPy).\n", 272 | "\n", 273 | "In the context of high-energy physics, iterating over the collision events in a dataset is a common operation. Such iteration in Python can be slow for big datasets and should only be done during short exploratory work. Later in this course we will see how the event loop can be efficiently executed in C++, even from a Python script, with the help of ROOT's [RDataFrame](https://root.cern/doc/master/classROOT_1_1RDataFrame.html).\n", 274 | "\n", 275 | "```python\n", 276 | "# This can be slow!\n", 277 | "for event in dataset:\n", 278 | " h.Fill(event.field)\n", 279 | "```" 280 | ] 281 | } 282 | ], 283 | "metadata": { 284 | "celltoolbar": "Slideshow", 285 | "kernelspec": { 286 | "display_name": "Python 3", 287 | "language": "python", 288 | "name": "python3" 289 | }, 290 | "language_info": { 291 | "codemirror_mode": { 292 | "name": "ipython", 293 | "version": 3 294 | }, 295 | "file_extension": ".py", 296 | "mimetype": "text/x-python", 297 | "name": "python", 298 | "nbconvert_exporter": "python", 299 | "pygments_lexer": "ipython3", 300 | "version": "3.8.6" 301 | } 302 | }, 303 | "nbformat": 4, 304 | "nbformat_minor": 2 305 | } 306 | -------------------------------------------------------------------------------- /course/notebooks/extra/extra-03-root-in-jupyter.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# ROOT in Jupyter" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": { 13 | "slideshow": { 14 | "slide_type": "slide" 15 | } 16 | }, 17 | "source": [ 18 | "ROOT can be used in Jupyter notebooks, both in Python and C++. In this course we will focus only on Python, but for people interested in ROOT C++ notebooks some examples can be found [here](https://swan-gallery.web.cern.ch/root_primer/).\n", 19 | "\n", 20 | "There are some specificities and extra features available when running ROOT from a notebook, and that's what will be covered in this section!\n", 21 | "\n", 22 | "In Python, everything starts with importing ROOT as we already saw in [02-root-python-cpp.ipynb](02-root-python-cpp.ipynb)." 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": null, 28 | "metadata": { 29 | "slideshow": { 30 | "slide_type": "fragment" 31 | } 32 | }, 33 | "outputs": [], 34 | "source": [ 35 | "import ROOT" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": { 41 | "slideshow": { 42 | "slide_type": "slide" 43 | } 44 | }, 45 | "source": [ 46 | "## Quickly checking the docs\n", 47 | "\n", 48 | "To take a quick peek at a function signature, we can use the notebook's ? feature:" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": null, 54 | "metadata": { 55 | "slideshow": { 56 | "slide_type": "fragment" 57 | } 58 | }, 59 | "outputs": [], 60 | "source": [ 61 | "?ROOT.TH1D.FillRandom" 62 | ] 63 | }, 64 | { 65 | "cell_type": "markdown", 66 | "metadata": { 67 | "slideshow": { 68 | "slide_type": "slide" 69 | } 70 | }, 71 | "source": [ 72 | "## ROOT Graphics in a notebook\n", 73 | "\n", 74 | "There are two modes in which we can visualize ROOT plots in a notebook\n", 75 | "* Static images (default)\n", 76 | "* Interactive JavaScript displays\n", 77 | "\n", 78 | "To see an example, let's first create a histogram and fill it with random numbers following a Gaussian distribution:" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": null, 84 | "metadata": { 85 | "slideshow": { 86 | "slide_type": "fragment" 87 | } 88 | }, 89 | "outputs": [], 90 | "source": [ 91 | "h = ROOT.TH1D(\"my_histo\", \"Example histogram\", 100, -4, 4)\n", 92 | "h.FillRandom(\"gaus\")" 93 | ] 94 | }, 95 | { 96 | "cell_type": "markdown", 97 | "metadata": { 98 | "slideshow": { 99 | "slide_type": "subslide" 100 | } 101 | }, 102 | "source": [ 103 | "Now let's plot the histogram. For that purpose, we first need to create a canvas, then draw the histogram and finally draw the canvas. See how the output is a static image." 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": null, 109 | "metadata": { 110 | "slideshow": { 111 | "slide_type": "fragment" 112 | } 113 | }, 114 | "outputs": [], 115 | "source": [ 116 | "c = ROOT.TCanvas()\n", 117 | "h.Draw()\n", 118 | "c.Draw() # don't forget to draw the canvas too!" 119 | ] 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "metadata": { 124 | "slideshow": { 125 | "slide_type": "subslide" 126 | } 127 | }, 128 | "source": [ 129 | "### Interactive graphics\n", 130 | "\n", 131 | "As mentioned before, we can also display interactive ROOT graphics in a notebook. To do that, we can use the `%jsroot` magic and turn on the JavaScript mode." 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": null, 137 | "metadata": { 138 | "slideshow": { 139 | "slide_type": "fragment" 140 | } 141 | }, 142 | "outputs": [], 143 | "source": [ 144 | "%jsroot on" 145 | ] 146 | }, 147 | { 148 | "cell_type": "markdown", 149 | "metadata": { 150 | "slideshow": { 151 | "slide_type": "subslide" 152 | } 153 | }, 154 | "source": [ 155 | "If we draw again the same canvas after activating JS graphics, this time the histogram plot is interactive! Try to hover over it with the mouse and zoom in and out." 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": null, 161 | "metadata": { 162 | "slideshow": { 163 | "slide_type": "fragment" 164 | } 165 | }, 166 | "outputs": [], 167 | "source": [ 168 | "c.Draw()" 169 | ] 170 | }, 171 | { 172 | "cell_type": "markdown", 173 | "metadata": { 174 | "slideshow": { 175 | "slide_type": "slide" 176 | } 177 | }, 178 | "source": [ 179 | "## Combining Python and C++ in the same notebook\n", 180 | "\n", 181 | "As we saw in [02-root-python-cpp.ipynb](02-root-python-cpp.ipynb), PyROOT lets us define C++ and call it from Python right away." 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": null, 187 | "metadata": { 188 | "slideshow": { 189 | "slide_type": "fragment" 190 | } 191 | }, 192 | "outputs": [], 193 | "source": [ 194 | "ROOT.gInterpreter.ProcessLine(\"void print_integer(int i) { std::cout << i << std::endl; }\")\n", 195 | "\n", 196 | "ROOT.print_integer(7)" 197 | ] 198 | }, 199 | { 200 | "cell_type": "markdown", 201 | "metadata": { 202 | "slideshow": { 203 | "slide_type": "subslide" 204 | } 205 | }, 206 | "source": [ 207 | "In a notebook, we can use the `%%cpp` magic to do the same. If `%%cpp` is present in a cell, its whole content is interpreted and executed as C++, and it has C++ syntax coloring!" 208 | ] 209 | }, 210 | { 211 | "cell_type": "code", 212 | "execution_count": null, 213 | "metadata": { 214 | "slideshow": { 215 | "slide_type": "fragment" 216 | } 217 | }, 218 | "outputs": [], 219 | "source": [ 220 | "%%cpp\n", 221 | "void print_integer_2(int i) {\n", 222 | " std::cout << i << std::endl;\n", 223 | "}" 224 | ] 225 | }, 226 | { 227 | "cell_type": "markdown", 228 | "metadata": { 229 | "slideshow": { 230 | "slide_type": "fragment" 231 | } 232 | }, 233 | "source": [ 234 | "The function we just defined in the previous (C++) cell can be now invoked from the next (Python) cell:" 235 | ] 236 | }, 237 | { 238 | "cell_type": "code", 239 | "execution_count": null, 240 | "metadata": { 241 | "slideshow": { 242 | "slide_type": "fragment" 243 | } 244 | }, 245 | "outputs": [], 246 | "source": [ 247 | "ROOT.print_integer_2(7)" 248 | ] 249 | }, 250 | { 251 | "cell_type": "markdown", 252 | "metadata": { 253 | "slideshow": { 254 | "slide_type": "subslide" 255 | } 256 | }, 257 | "source": [ 258 | "Note that, besides defining new C++ entities, we can also run C++ statements." 259 | ] 260 | }, 261 | { 262 | "cell_type": "code", 263 | "execution_count": null, 264 | "metadata": { 265 | "slideshow": { 266 | "slide_type": "fragment" 267 | } 268 | }, 269 | "outputs": [], 270 | "source": [ 271 | "%%cpp\n", 272 | "std::cout << \"Hello from C++!\" << std::endl;\n", 273 | "\n", 274 | "print_integer_2(7);" 275 | ] 276 | }, 277 | { 278 | "cell_type": "markdown", 279 | "metadata": { 280 | "slideshow": { 281 | "slide_type": "subslide" 282 | } 283 | }, 284 | "source": [ 285 | "### Redefining C++ entities\n", 286 | "\n", 287 | "In C++, redefinition of entities such as variables is not allowed. However, ROOT provides a cool redefinition feature that is especially useful in interactive interfaces like notebooks. This feature works for types, functions and variables defined in the global namespace.\n", 288 | "\n", 289 | "You can check it by running the next cells, even multiple times!" 290 | ] 291 | }, 292 | { 293 | "cell_type": "code", 294 | "execution_count": null, 295 | "metadata": { 296 | "slideshow": { 297 | "slide_type": "fragment" 298 | } 299 | }, 300 | "outputs": [], 301 | "source": [ 302 | "%%cpp\n", 303 | "int foo = 0;" 304 | ] 305 | }, 306 | { 307 | "cell_type": "code", 308 | "execution_count": null, 309 | "metadata": { 310 | "slideshow": { 311 | "slide_type": "fragment" 312 | } 313 | }, 314 | "outputs": [], 315 | "source": [ 316 | "%%cpp\n", 317 | "class foo {};" 318 | ] 319 | }, 320 | { 321 | "cell_type": "code", 322 | "execution_count": null, 323 | "metadata": { 324 | "slideshow": { 325 | "slide_type": "fragment" 326 | } 327 | }, 328 | "outputs": [], 329 | "source": [ 330 | "%%cpp\n", 331 | "void foo() {}" 332 | ] 333 | } 334 | ], 335 | "metadata": { 336 | "celltoolbar": "Slideshow", 337 | "kernelspec": { 338 | "display_name": "Python 3 (ipykernel)", 339 | "language": "python", 340 | "name": "python3" 341 | }, 342 | "language_info": { 343 | "codemirror_mode": { 344 | "name": "ipython", 345 | "version": 3 346 | }, 347 | "file_extension": ".py", 348 | "mimetype": "text/x-python", 349 | "name": "python", 350 | "nbconvert_exporter": "python", 351 | "pygments_lexer": "ipython3", 352 | "version": "3.9.6" 353 | } 354 | }, 355 | "nbformat": 4, 356 | "nbformat_minor": 2 357 | } 358 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - root 5 | - pyspark 6 | - dask 7 | - distributed 8 | --------------------------------------------------------------------------------