├── .DS_Store ├── .all-contributorsrc ├── .zenodo.json ├── CITATION.cff ├── LICENSE.md ├── README.md ├── ethos-of-open ├── CONTRIBUTORS.md ├── README.md ├── definitions.md ├── files │ └── placeholder.md ├── img │ ├── image2_5_schools_of_open_science.png │ └── image_3_lesson_5_open_science_skills.png ├── lesson1-intro-to-open-science.md ├── lesson2-benefits-challenges-of-open.md ├── lesson3-open-stakeholders.md ├── lesson4-impact-of-open.md └── lesson5-how.md ├── open-data ├── .DS_Store ├── Appendix.md ├── Lesson1-WhatIsOpenData.md ├── Lesson2-Benefits.md ├── Lesson3-Responsible.md ├── Lesson4-CARE&FAIR.md ├── Lesson5-Planning.md ├── Lesson6-Sharing&Reuse.md ├── README.md ├── contributors.md ├── definitions.md ├── docs │ └── placeholder.md ├── img │ ├── OpenSciency_Twitter_banner.png │ └── OpenSciency_logo.png ├── learning-objectives.md ├── references.bib └── references.md ├── open-results ├── CONTRIBUTORS.md ├── README.md ├── figures │ ├── README.md │ ├── acknowledgement.jpg │ ├── authorship-guide.jpg │ ├── benefits.png │ ├── research-cycle.jpg │ └── research-object.jpg ├── files │ └── placeholder.md ├── lesson-1-research-process-and-results.md ├── lesson-2-results-and-open-science.md ├── lesson-3-apply-open-results.md ├── lesson-4-opportunities-and-credits.md └── lesson1-brief-heading.md ├── open-software ├── CONTRIBUTORS.md ├── README.md ├── files │ └── placeholder.md ├── lesson0-preamble.md ├── lesson1-introduction.md ├── lesson2-pros-cons.md ├── lesson3-licensing.md ├── lesson4-code-management.md ├── lesson5-vesion-control.md ├── lesson6-contribution.md └── notes.md ├── open-tools-resources ├── CONTRIBUTORS.md ├── README.md ├── files │ └── placeholder.md ├── lesson1-intro-open-science-tools.md ├── lesson2-tools-across-research-lifecycle.md ├── lesson3-tools-for-reproducibility.md ├── lesson4-practicing-team-open-science.md └── lesson5-open-science-communities.md └── release-workflow.md /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensciency/sprint-content/e94c99f712b75ffbeb6c6b7ea0b72b90a9c6df93/.DS_Store -------------------------------------------------------------------------------- /.all-contributorsrc: -------------------------------------------------------------------------------- 1 | { 2 | "files": [ 3 | "README.md" 4 | ], 5 | "imageSize": 100, 6 | "commit": false, 7 | "commitConvention": "angular", 8 | "contributors": [ 9 | { 10 | "login": "yochannah", 11 | "name": "Yo Yehudi", 12 | "avatar_url": "https://avatars.githubusercontent.com/u/9271438?v=4", 13 | "profile": "http://yo-yehudi.com", 14 | "contributions": [ 15 | "mentoring" 16 | ] 17 | }, 18 | { 19 | "login": "natashabatalha", 20 | "name": "Natasha Batalha", 21 | "avatar_url": "https://avatars.githubusercontent.com/u/6554465?v=4", 22 | "profile": "http://natashabatalha.github.io", 23 | "contributions": [ 24 | "mentoring" 25 | ] 26 | }, 27 | { 28 | "login": "shilaan", 29 | "name": "Shilaan Alzahawi", 30 | "avatar_url": "https://avatars.githubusercontent.com/u/61210785?v=4", 31 | "profile": "https://shilaan.rbind.io", 32 | "contributions": [ 33 | "mentoring" 34 | ] 35 | }, 36 | { 37 | "login": "selgebali", 38 | "name": "Sara", 39 | "avatar_url": "https://avatars.githubusercontent.com/u/23166543?v=4", 40 | "profile": "https://github.com/selgebali", 41 | "contributions": [ 42 | "mentoring" 43 | ] 44 | }, 45 | { 46 | "login": "camriddell", 47 | "name": "Cameron", 48 | "avatar_url": "https://avatars.githubusercontent.com/u/96146940?v=4", 49 | "profile": "https://github.com/camriddell", 50 | "contributions": [ 51 | "mentoring" 52 | ] 53 | }, 54 | { 55 | "login": "dutc", 56 | "name": "James Powell", 57 | "avatar_url": "https://avatars.githubusercontent.com/u/3922744?v=4", 58 | "profile": "http://talks.dutc.io", 59 | "contributions": [ 60 | "mentoring" 61 | ] 62 | }, 63 | { 64 | "login": "dasaderi", 65 | "name": "Daniela Saderi", 66 | "avatar_url": "https://avatars.githubusercontent.com/u/13750121?v=4", 67 | "profile": "https://github.com/dasaderi", 68 | "contributions": [ 69 | "content" 70 | ] 71 | }, 72 | { 73 | "login": "smhall97", 74 | "name": "smhall97", 75 | "avatar_url": "https://avatars.githubusercontent.com/u/43542098?v=4", 76 | "profile": "https://github.com/smhall97", 77 | "contributions": [ 78 | "content" 79 | ] 80 | }, 81 | { 82 | "login": "Jannatul-Ferdush", 83 | "name": "Jannatul Ferdush", 84 | "avatar_url": "https://avatars.githubusercontent.com/u/38519049?v=4", 85 | "profile": "https://github.com/Jannatul-Ferdush", 86 | "contributions": [ 87 | "content" 88 | ] 89 | }, 90 | { 91 | "login": "flavioazevedo", 92 | "name": "Flavio Azevedo", 93 | "avatar_url": "https://avatars.githubusercontent.com/u/18330492?v=4", 94 | "profile": "http://flavioazevedo.com/publications", 95 | "contributions": [ 96 | "content" 97 | ] 98 | }, 99 | { 100 | "login": "libcce", 101 | "name": "Chris Erdmann", 102 | "avatar_url": "https://avatars.githubusercontent.com/u/3680365?v=4", 103 | "profile": "https://www.michaeljfox.org/", 104 | "contributions": [ 105 | "mentoring" 106 | ] 107 | }, 108 | { 109 | "login": "geo-yrao", 110 | "name": "Yuhan (Douglas) Rao", 111 | "avatar_url": "https://avatars.githubusercontent.com/u/38257268?v=4", 112 | "profile": "https://github.com/geo-yrao", 113 | "contributions": [ 114 | "content" 115 | ] 116 | }, 117 | { 118 | "login": "BatoolMM", 119 | "name": "Batool Almarzouq", 120 | "avatar_url": "https://avatars.githubusercontent.com/u/53487593?v=4", 121 | "profile": "https://batool-almarzouq.netlify.app/", 122 | "contributions": [ 123 | "content" 124 | ] 125 | }, 126 | { 127 | "login": "EstherPlomp", 128 | "name": "Esther Plomp", 129 | "avatar_url": "https://avatars.githubusercontent.com/u/46314469?v=4", 130 | "profile": "https://github.com/EstherPlomp", 131 | "contributions": [ 132 | "content" 133 | ] 134 | }, 135 | { 136 | "login": "TomoCoral", 137 | "name": "TomoCoral", 138 | "avatar_url": "https://avatars.githubusercontent.com/u/98495490?v=4", 139 | "profile": "https://github.com/TomoCoral", 140 | "contributions": [ 141 | "content" 142 | ] 143 | }, 144 | { 145 | "login": "melibleq", 146 | "name": "Melissa Black", 147 | "avatar_url": "https://avatars.githubusercontent.com/u/16723182?v=4", 148 | "profile": "https://github.com/melibleq", 149 | "contributions": [ 150 | "content" 151 | ] 152 | }, 153 | { 154 | "login": "malvikasharan", 155 | "name": "Malvika Sharan", 156 | "avatar_url": "https://avatars.githubusercontent.com/u/5370471?v=4", 157 | "profile": "http://malvikasharan.github.io/", 158 | "contributions": [ 159 | "content" 160 | ] 161 | }, 162 | { 163 | "login": "SaranjeetKaur", 164 | "name": "Saranjeet Kaur", 165 | "avatar_url": "https://avatars.githubusercontent.com/u/28556616?v=4", 166 | "profile": "https://saranjeetkaur.github.io/About-Me/", 167 | "contributions": [ 168 | "content" 169 | ] 170 | }, 171 | { 172 | "login": "michelusp", 173 | "name": "Michel Lacerda", 174 | "avatar_url": "https://avatars.githubusercontent.com/u/29486679?v=4", 175 | "profile": "https://github.com/michelusp", 176 | "contributions": [ 177 | "content" 178 | ] 179 | }, 180 | { 181 | "login": "Ismael-KG", 182 | "name": "Ismael-KG", 183 | "avatar_url": "https://avatars.githubusercontent.com/u/64027166?v=4", 184 | "profile": "https://www.tiki-toki.com/timeline/entry/1753034/A-History-of-Research-Ethics/", 185 | "contributions": [ 186 | "content" 187 | ] 188 | }, 189 | { 190 | "login": "andreamedinasmith", 191 | "name": "andreamedinasmith", 192 | "avatar_url": "https://avatars.githubusercontent.com/u/6033782?v=4", 193 | "profile": "https://github.com/andreamedinasmith", 194 | "contributions": [ 195 | "content" 196 | ] 197 | }, 198 | { 199 | "login": "aosman12", 200 | "name": "aosman12", 201 | "avatar_url": "https://avatars.githubusercontent.com/u/68354949?v=4", 202 | "profile": "https://github.com/aosman12", 203 | "contributions": [ 204 | "content" 205 | ] 206 | }, 207 | { 208 | "login": "eliocamp", 209 | "name": "Elio Campitelli", 210 | "avatar_url": "https://avatars.githubusercontent.com/u/8617595?v=4", 211 | "profile": "https://github.com/eliocamp", 212 | "contributions": [ 213 | "content" 214 | ] 215 | }, 216 | { 217 | "login": "smklusza", 218 | "name": "Stephen Klusza", 219 | "avatar_url": "https://avatars.githubusercontent.com/u/28989267?v=4", 220 | "profile": "https://github.com/smklusza", 221 | "contributions": [ 222 | "content" 223 | ] 224 | }, 225 | { 226 | "login": "marimeireles", 227 | "name": "Mariana Meireles", 228 | "avatar_url": "https://avatars.githubusercontent.com/u/17600982?v=4", 229 | "profile": "https://psychonautgirl.space", 230 | "contributions": [ 231 | "content" 232 | ] 233 | }, 234 | { 235 | "login": "karegapauline", 236 | "name": "Pauline Karega", 237 | "avatar_url": "https://avatars.githubusercontent.com/u/27417671?v=4", 238 | "profile": "https://github.com/karegapauline", 239 | "contributions": [ 240 | "content" 241 | ] 242 | }, 243 | { 244 | "login": "annefou", 245 | "name": "Anne Fouilloux", 246 | "avatar_url": "https://avatars.githubusercontent.com/u/8168508?v=4", 247 | "profile": "http://www.mn.uio.no/geo/english/people/adm/annefou/", 248 | "contributions": [ 249 | "content" 250 | ] 251 | }, 252 | { 253 | "login": "camachoreina", 254 | "name": "Reina Camacho Toro", 255 | "avatar_url": "https://avatars.githubusercontent.com/u/12454015?v=4", 256 | "profile": "https://github.com/camachoreina", 257 | "contributions": [ 258 | "content" 259 | ] 260 | }, 261 | { 262 | "login": "Sierra-MC", 263 | "name": "Sierra V. Kaufman", 264 | "avatar_url": "https://avatars.githubusercontent.com/u/88336748?v=4", 265 | "profile": "https://github.com/Sierra-MC", 266 | "contributions": [ 267 | "content" 268 | ] 269 | }, 270 | { 271 | "login": "shmuhammad2004", 272 | "name": "Shamsudddeen Hassan Muhammad", 273 | "avatar_url": "https://avatars.githubusercontent.com/u/38854463?v=4", 274 | "profile": "https://github.com/shmuhammad2004", 275 | "contributions": [ 276 | "content" 277 | ] 278 | }, 279 | { 280 | "login": "likeajumprope", 281 | "name": "Johanna Bayer", 282 | "avatar_url": "https://avatars.githubusercontent.com/u/23728822?v=4", 283 | "profile": "https://github.com/likeajumprope", 284 | "contributions": [ 285 | "content" 286 | ] 287 | }, 288 | { 289 | "login": "hughshanahan", 290 | "name": "Hugh Shanahan", 291 | "avatar_url": "https://avatars.githubusercontent.com/u/3338109?v=4", 292 | "profile": "http://www.shanahanlab.org", 293 | "contributions": [ 294 | "content" 295 | ] 296 | }, 297 | { 298 | "login": "MiguelSilan", 299 | "name": "MiguelSilan", 300 | "avatar_url": "https://avatars.githubusercontent.com/u/32735330?v=4", 301 | "profile": "https://github.com/MiguelSilan", 302 | "contributions": [ 303 | "content" 304 | ] 305 | }, 306 | { 307 | "login": "Elpapado", 308 | "name": "Elli Papadopoulou", 309 | "avatar_url": "https://avatars.githubusercontent.com/u/16916017?v=4", 310 | "profile": "https://github.com/Elpapado", 311 | "contributions": [ 312 | "content" 313 | ] 314 | }, 315 | { 316 | "login": "dunldj", 317 | "name": "dunldj", 318 | "avatar_url": "https://avatars.githubusercontent.com/u/30324612?v=4", 319 | "profile": "https://danieljdunleavy.netlify.app/", 320 | "contributions": [ 321 | "content" 322 | ] 323 | }, 324 | { 325 | "login": "AnaVaz-NOAA", 326 | "name": "Ana Vaz", 327 | "avatar_url": "https://avatars.githubusercontent.com/u/94488549?v=4", 328 | "profile": "https://github.com/AnaVaz-NOAA", 329 | "contributions": [ 330 | "content" 331 | ] 332 | }, 333 | { 334 | "login": "tyson-swetnam", 335 | "name": "Tyson L. Swetnam", 336 | "avatar_url": "https://avatars.githubusercontent.com/u/11527041?v=4", 337 | "profile": "http://tysonswetnam.com", 338 | "contributions": [ 339 | "content" 340 | ] 341 | }, 342 | { 343 | "login": "BabatundeOnabajo", 344 | "name": "Babatunde Valentine Onabajo", 345 | "avatar_url": "https://avatars.githubusercontent.com/u/36359216?v=4", 346 | "profile": "https://github.com/BabatundeOnabajo", 347 | "contributions": [ 348 | "content" 349 | ] 350 | }, 351 | { 352 | "login": "cheginit", 353 | "name": "Taher Chegini", 354 | "avatar_url": "https://avatars.githubusercontent.com/u/13016644?v=4", 355 | "profile": "https://cheginit.github.io", 356 | "contributions": [ 357 | "content" 358 | ] 359 | }, 360 | { 361 | "login": "ee2110", 362 | "name": "ee2110", 363 | "avatar_url": "https://avatars.githubusercontent.com/u/52744798?v=4", 364 | "profile": "https://github.com/ee2110", 365 | "contributions": [ 366 | "content" 367 | ] 368 | }, 369 | { 370 | "login": "rebeccaringuette", 371 | "name": "rebeccaringuette", 372 | "avatar_url": "https://avatars.githubusercontent.com/u/49281118?v=4", 373 | "profile": "https://github.com/rebeccaringuette", 374 | "contributions": [ 375 | "content" 376 | ] 377 | }, 378 | { 379 | "login": "msundukova", 380 | "name": "Mayya", 381 | "avatar_url": "https://avatars.githubusercontent.com/u/34685833?v=4", 382 | "profile": "https://github.com/msundukova", 383 | "contributions": [ 384 | "content" 385 | ] 386 | } 387 | ], 388 | "contributorsPerLine": 7, 389 | "skipCi": true, 390 | "repoType": "github", 391 | "repoHost": "https://github.com", 392 | "projectName": "sprint-content", 393 | "projectOwner": "opensciency" 394 | } 395 | -------------------------------------------------------------------------------- /.zenodo.json: -------------------------------------------------------------------------------- 1 | { 2 | "creators": [ 3 | { 4 | "name": "OpenSciency Sprint Content" 5 | } 6 | ], 7 | "keywords": [ 8 | "training materials", 9 | "open science", 10 | "open ethos", 11 | "open data", 12 | "open software", 13 | "open tools", 14 | "open results" 15 | ], 16 | "license": { 17 | "id": "CC-BY-4.0" 18 | }, 19 | "publication_date": "2023-02-15", 20 | "title": "Opensciency - A core open science curriculum by and for the research community.", 21 | "version": "0.0.0" 22 | } 23 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: 1.2.0 2 | message: "To reference the latest version of the OpenSciency traning modules, please cite it as below." 3 | authors: 4 | - name: "OpenSciency Sprint Content" 5 | title: "Opensciency - A core open science curriculum by and for the research community." 6 | version: 1.0.0 7 | identifiers: 8 | - description: The concept DOI for the collection containing all versions of the Citation File Format. 9 | type: doi 10 | value: "10.5281/zenodo.7392119" 11 | license: "CC-BY-4.0" 12 | date-released: "2023-02-15" 13 | keywords: 14 | - training materials 15 | - open science 16 | - open ethos 17 | - open data 18 | - open software 19 | - open tools 20 | - open results 21 | doi: 10.5281/zenodo.7662732 22 | references: 23 | - type: edited-work 24 | authors: 25 | - family-names: Contributors 26 | given-names: OpenSciency 27 | website: https://github.com/opensciency/sprint-content 28 | - family-names: Almarzouq 29 | given-names: Batool 30 | orcid: https://orcid.org/0000-0002-3905-2751 31 | - family-names: Azevedo 32 | given-names: Flavio 33 | orcid: https://orcid.org/0000-0001-9000-8513 34 | - family-names: Batalha 35 | given-names: Natasha 36 | orcid: https://orcid.org/0000-0003-1240-6844 37 | - family-names: Bayer 38 | given-names: Johanna 39 | orcid: https://orcid.org/0000-0003-4891-6256 40 | - family-names: Bell 41 | given-names: Tomo 42 | orcid: https://orcid.org/0000-0003-4606-6307 43 | - family-names: Bhogal 44 | given-names: Saranjeet 45 | orcid: https://orcid.org/0000-0002-7038-1457 46 | - family-names: Black 47 | given-names: Melissa 48 | orcid: https://orcid.org/0000-0002-5406-2982 49 | - family-names: Brown 50 | given-names: Sierra 51 | orcid: https://orcid.org/0000-0001-6065-5461 52 | - family-names: Campitelli 53 | given-names: Elio 54 | orcid: https://orcid.org/0000-0002-7742-9230 55 | - family-names: Chegini 56 | given-names: Taher 57 | orcid: https://orcid.org/0000-0002-5430-6000 58 | - family-names: Dunleavy 59 | given-names: Daniel 60 | orcid: https://orcid.org/0000-0002-3597-7714 61 | - family-names: Ee 62 | given-names: Yeo Keat 63 | orcid: https://orcid.org/0000-0001-6935-3101 64 | - family-names: El-Gebali 65 | given-names: Sara 66 | orcid: https://orcid.org/0000-0003-1378-5495 67 | - family-names: Erdmann 68 | given-names: Christopher 69 | orcid: https://orcid.org/0000-0003-2554-180X 70 | - family-names: Ferdush 71 | given-names: Jannatul 72 | orcid: https://orcid.org/0000-0002-0917-4854 73 | - family-names: Fouilloux 74 | given-names: Anne 75 | orcid: https://orcid.org/0000-0002-1784-2920 76 | - family-names: Hall 77 | given-names: Siobhan Mackenzie 78 | orcid: https://orcid.org/0000-0002-1520-4220 79 | - family-names: Kherroubi Garcia 80 | given-names: Ismael 81 | orcid: https://orcid.org/0000-0002-6850-8375 82 | - family-names: Klusza 83 | given-names: Stephen 84 | orcid: https://orcid.org/0000-0003-0943-1846 85 | - family-names: Lacerda 86 | given-names: Michel 87 | orcid: https://orcid.org/0000-0002-8433-6964 88 | - family-names: Medina-Smith 89 | given-names: Andrea 90 | orcid: https://orcid.org/0000-0002-1217-701X 91 | - family-names: Meireles 92 | given-names: Mariana 93 | orcid: https://orcid.org/0000-0001-9227-9798 94 | - family-names: Muhammad 95 | given-names: Shamsuddeen 96 | orcid: https://orcid.org/0000-0001-7708-0799 97 | - family-names: Onabajo 98 | given-names: Babatunde 99 | orcid: https://orcid.org/0000-0001-6118-9255 100 | - family-names: Osman 101 | given-names: Amber 102 | orcid: https://orcid.org/0000-0003-1198-7843 103 | - family-names: Papadopoulou 104 | given-names: Elli 105 | orcid: https://orcid.org/0000-0002-0893-8509 106 | - family-names: Pauline 107 | given-names: Karega 108 | orcid: https://orcid.org/0000-0001-7974-048X 109 | - family-names: Plomp 110 | given-names: Esther 111 | orcid: https://orcid.org/0000-0003-3625-1357 112 | - family-names: Rao 113 | given-names: Douglas 114 | orcid: https://orcid.org/0000-0001-6850-3403 115 | - family-names: Ringuette 116 | given-names: Rebecca 117 | orcid: https://orcid.org/0000-0003-0875-2023 118 | - family-names: Saderi 119 | given-names: Daniela 120 | orcid: https://orcid.org/0000-0002-6109-0367 121 | - family-names: Shanahan 122 | given-names: Hugh 123 | orcid: https://orcid.org/0000-0003-1374-6015 124 | - family-names: Sharan 125 | given-names: Malvika 126 | orcid: https://orcid.org/0000-0001-6619-7369 127 | - family-names: Silan 128 | given-names: Miguel 129 | orcid: https://orcid.org/0000-0002-7480-3661 130 | - family-names: Sundukova 131 | given-names: Mayya 132 | orcid: https://orcid.org/0000-0003-1328-0008 133 | - family-names: Swetnam 134 | given-names: Tyson 135 | orcid: https://orcid.org/0000-0002-6639-7181 136 | - family-names: Vaz 137 | given-names: Ana 138 | orcid: https://orcid.org/0000-0003-2705-1724 139 | - family-names: Yehudi 140 | given-names: Yo 141 | orcid: https://orcid.org/0000-0003-2705-1724 142 | title: OpenSciency 143 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Creative Commons License
This work is licensed under a Creative Commons Attribution 4.0 International License. 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Opensciency - A core open science curriculum by and for the research community. 2 | 3 | [![All Contributors](https://img.shields.io/badge/all_contributors-42-orange.svg?style=flat-square)](#contributors-) 4 | 5 | [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.7662732.svg)](https://doi.org/10.5281/zenodo.7662732) 6 | Creative Commons License
This work is licensed under a Creative Commons Attribution 4.0 International License. 7 | 8 | Opensciency is core open science curriculum material, drafted to introduce those beginning their open science journey to important definitions, tools, and resources; and provide for participants at all levels recommended practices. The material is made available under a [CC-BY 4.0 International](https://creativecommons.org/licenses/by/4.0/) license and is structured into five modules: 9 | 10 | - Ethos of Open Science 11 | - Open Tools and Resources 12 | - Open Data 13 | - Open Software 14 | - Open Results 15 | 16 | ## Citation 17 | 18 | **The latest release version (V1.0.0) can be found at this DOI 10.5281/zenodo.7392118.** 19 | 20 | To credit and cite the material, use the following citation - where possible, please include all authors name as listed in the [CITATION file](https://github.com/opensciency/sprint-content/blob/main/CITATION.cff): 21 | > OpenSciency Contributors (2023, February 22). Opensciency - A core open science curriculum by and for the research community. Zenodo. https://doi.org/10.5281/zenodo.7392118 22 | 23 | Shared under the CC-BY 4.0 License, all materials remain open for anyone to build open science curriculums or reuse for other purposes. Please include all author names where possible from the GitHub README contributors table. 24 | 25 | ## Details 26 | 27 | Opensciency is a result of the work of more than 40 open science experts and practitioners from across the world and from different disciplines. The first draft of the curriculum material was developed from [June 27 - July 1, 2022](https://github.com/nasa/Transform-to-Open-Science/blob/main/docs/Area2_Capacity_Sharing/OpenCore/OpenCore_leads.md) as part of the Transform to Open Science (TOPS) [OpenCore](https://github.com/nasa/Transform-to-Open-Science/tree/main/docs/Area2_Capacity_Sharing/OpenCore) sprint. More information about the NASA TOPS initiative is available via their [website](https://science.nasa.gov/open-science/transform-to-open-science). After the TOPS Community Panel on [October 6, 2022](https://github.com/nasa/Transform-to-Open-Science/blob/main/docs/Area1_Engagement/Community_Panels/20221005_community_panel.md), the original contributors created the Opensciency repository to allow all contributors to further engage with the curriculum and invite review on the initial draft material from the wider research community. 28 | 29 | We encourage the wider community to reuse the material, and we are especially interested in creative approaches to displaying the material. An example we like is [Elements of AI](https://course.elementsofai.com/). 30 | 31 | Let us know if you have a creative approach to displaying and reusing the material by [submitting an issue](https://github.com/opensciency/sprint-content/issues). Please provide your contact details so we can add you to the contributors list. 32 | 33 | ## Contributors ✨ 34 | 35 | Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/docs/en/emoji-key)): 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 |
Yo Yehudi
Yo Yehudi

🧑‍🏫
Natasha Batalha
Natasha Batalha

🧑‍🏫
Shilaan Alzahawi
Shilaan Alzahawi

🧑‍🏫
Sara
Sara

🧑‍🏫
Cameron
Cameron

🧑‍🏫
James Powell
James Powell

🧑‍🏫
Daniela Saderi
Daniela Saderi

🖋
smhall97
smhall97

🖋
Jannatul Ferdush
Jannatul Ferdush

🖋
Flavio Azevedo
Flavio Azevedo

🖋
Chris Erdmann
Chris Erdmann

🧑‍🏫
Yuhan (Douglas) Rao
Yuhan (Douglas) Rao

🖋
Batool Almarzouq
Batool Almarzouq

🖋
Esther Plomp
Esther Plomp

🖋
TomoCoral
TomoCoral

🖋
Melissa Black
Melissa Black

🖋
Malvika Sharan
Malvika Sharan

🖋
Saranjeet Kaur
Saranjeet Kaur

🖋
Michel Lacerda
Michel Lacerda

🖋
Ismael-KG
Ismael-KG

🖋
andreamedinasmith
andreamedinasmith

🖋
aosman12
aosman12

🖋
Elio Campitelli
Elio Campitelli

🖋
Stephen Klusza
Stephen Klusza

🖋
Mariana Meireles
Mariana Meireles

🖋
Pauline Karega
Pauline Karega

🖋
Anne Fouilloux
Anne Fouilloux

🖋
Reina Camacho Toro
Reina Camacho Toro

🖋
Sierra V. Brown
Sierra V. Brown

🖋
Shamsudddeen Hassan Muhammad
Shamsudddeen Hassan Muhammad

🖋
Johanna Bayer
Johanna Bayer

🖋
Hugh Shanahan
Hugh Shanahan

🖋
MiguelSilan
MiguelSilan

🖋
Elli Papadopoulou
Elli Papadopoulou

🖋
dunldj
dunldj

🖋
Ana Vaz
Ana Vaz

🖋
Tyson L. Swetnam
Tyson L. Swetnam

🖋
Babatunde Valentine Onabajo
Babatunde Valentine Onabajo

🖋
Taher Chegini
Taher Chegini

🖋
ee2110
ee2110

🖋
rebeccaringuette
rebeccaringuette

🖋
Mayya
Mayya

🖋
98 | 99 | 100 | 101 | 102 | 103 | 104 | This project follows the [all-contributors](https://github.com/all-contributors/all-contributors) specification. Contributions of any kind welcome! 105 | -------------------------------------------------------------------------------- /ethos-of-open/CONTRIBUTORS.md: -------------------------------------------------------------------------------- 1 | List alphabetically by last name, first 2 | 3 | Last Name, First Name 4 | Affiliation 5 | Full ORCID link 6 | Full GitHub link 7 | Full Twitter link 8 | 9 | 10 | Tomoko Tomo Bell 11 | University of Guam 12 | 0000-0003-4606-6307 13 | TomoCoral 14 | 15 | Ismael Kherroubi Garcia 16 | Open Life Science and Royal Society of Arts, Manufactures and Commerce 17 | 0000-0002-6850-8375 18 | Ismael-KG 19 | hermeneuticist 20 | 21 | Amber Osman 22 | DOAJ 23 | 0000-0003-1198-7843 24 | aosman12 25 | amb3r12 26 | 27 | Miguel Silan 28 | Annecy Behavioral Science Lab; Université Lumière Lyon 2 29 | 0000-0002-7480-3661 miguelsilan 30 | MetaMethodsPH 31 | 32 | Yo Yehudi 33 | Open Life Science 34 | 0000-0003-2705-1724 35 | yochannah 36 | yoyehudi 37 | 38 | Shamsuddeen Muhammad 39 | Bayero University, Kano 40 | 0000-0001-7708-0799 41 | shmuhammad2004 42 | shmuhammadd 43 | -------------------------------------------------------------------------------- /ethos-of-open/README.md: -------------------------------------------------------------------------------- 1 | # Ethos of Open Science: Introduction 2 | 3 | By the end of this module, learners will be familiar with the definitions central to open science and have explored some concrete examples of the benefits of open science principles and practices. The course will include best practices for building open science communities, increasing collaboration, and introducing open principles to project design, as well as an overview of open science norms. This module will also explore the historical impact of “closed” science, and how open science seeks to create a more diverse and equitable scientific community. 4 | -------------------------------------------------------------------------------- /ethos-of-open/definitions.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /ethos-of-open/files/placeholder.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /ethos-of-open/img/image2_5_schools_of_open_science.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensciency/sprint-content/e94c99f712b75ffbeb6c6b7ea0b72b90a9c6df93/ethos-of-open/img/image2_5_schools_of_open_science.png -------------------------------------------------------------------------------- /ethos-of-open/img/image_3_lesson_5_open_science_skills.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensciency/sprint-content/e94c99f712b75ffbeb6c6b7ea0b72b90a9c6df93/ethos-of-open/img/image_3_lesson_5_open_science_skills.png -------------------------------------------------------------------------------- /ethos-of-open/lesson4-impact-of-open.md: -------------------------------------------------------------------------------- 1 | # Lesson 4: The Where 2 | 3 | ## Impact of Open Science on academia, communities and society as a whole: Where open science happens. 4 | 5 | We have so far explored the fundamental parts of what Open Science is: why to pursue it and who the stakeholders of open research are. Where you are in the world when performing open science can have an impact on how you perform it, too. Laws across the world vary, and the advantage of open science means people from around the world can participate, co-create, and consume content together. This can affect your work from social and legal perspectives, and may present technical challenges as well. 6 | 7 | Legal frameworks that affect responsible Open Science 8 | Open Science promises to make research work more accessible, all-encompassing, participatory, understandable and re-usable for wider audiences. Keep in mind, making the process open does not in itself result in wide participation unless it's partnered with sufficient financial resources, technological advancements, knowledge and skills. It's important that all these are available across regions, institutions and socio-demographics (review by Hellauer et al. 2022) 9 | 10 | ### Data protection, privacy, and data sovereignty 11 | **⚠️Caution:** To perform open science responsibly, it is important to consider not only what you should share, but also what not to share. 12 | 13 | Individuals may have a right to privacy in their communications, for medical records, and for their physical locations. Similarly, certain countries, communities, and especially Indigenous peoples may historically have been exploited, and may wish to retain more rights over their knowledge to protect from further exploitation. Globally, there are laws around the world that may cover some of these issues, but not all countries and regions have equal levels of protection, and some have none at all. 14 | 15 | We share some case studies: 16 | 17 | #### European case: General Data Protection regulation 18 | There are protective laws and legal frameworks in certain places around the globe that affect open science. European researchers have to abide by the General Data Protection regulation (GDPR) while making a data sharing statement stating the non-availability of data sharing. This hinders sharing particular data. Here, the scientific society should come forward to allow responsible Open Science data sharing possibilities in the global scientific space (Giske Ursin & Heidi Beate Bentzen, 2021) 19 | 20 | #### South African case: Protection of Personal Information Act (POPI Act) and Open Science 21 | 22 | The POPI Act No. 4 of 2013 is regulation by the government of South Africa to safeguard the personal information of South African citizens, like the General Data Protection Regulation (GDPR) in Europe. The regulation states that if one is obtaining personal information of South African citizens through phones, focus groups, interviews, containing identifiers such as names, contact information then you have to be POPI Act compliant. 23 | 24 | In the research context, one needs to make sure that if the personal identifiers are collected then they must not be shared with third parties and stored securely in an access-controlled location to prevent a data breach. The act doesn't impede open data sharing, but personal identifiers should be removed from shared datasets. The POPI act affects the research process, in a way to make sure that storing of data of only de-identified datasets on cloud storage & onsite data storage is strictly controlled to specific designated individuals to ensure data safety (POPIA Code of Conduct for Research, 2021). 25 | 26 | #### United States case: 27 | 28 | In the United States, there is no federal-level legislation similar to POPI or GDPR, but there are some state-level laws, such as the California Privacy Rights Act, and the Virginia Consumer Data Protection Act. 29 | 30 | **📝 Exercise:** Check what laws, if any, apply in your state. 31 | 32 | #### Summary: Working in a global society with varied data protection laws 33 | 34 | Given the broad variation of data protection laws around the world, it may seem tricky to navigate. By practicing responsible Open Science, however, our response can get a little bit clearer. We can consider relevant legislation (if any) to be a bare minimum, and instead ensure that we are involving relevant stakeholders, as discussed in lesson 4, and listening to their needs respectfully, even if it means we are more cautious than local legislation may require. 35 | 36 | ### Whose laws apply to my community? 37 | 38 | Social, cultural, and legal norms will vary from country to country, and international communities. Avoiding culture clashes can be made more manageable by setting out explicit cultural norms for your community, such as may be specified in a code of conduct, which we discussed in lesson one of this module. Try to avoid assumptions that tie to a specific physical location or culture. Some examples why this is important: 39 | 40 | - Laws are not uniform. If activity X is legal to do in one country, but not another, a code of conduct which says "obey the law" becomes impossible to interpret fairly or to enforce. 41 | - Hosting a conference in a country that doesn't have strong human rights records might result in someone breaking the law by being LGBTQIA+, or by not wearing religious garb. 42 | - "We plan to release this in the summer" might be clear if you're all in the same country, but if your collaboration is spread across the northern and southern hemisphere, is summer in the middle of the year or the end of the year? Consider using a month name instead - "we plan to release this by March" is unambiguous. 43 | 44 | ## Equity and Open Science 45 | Many countries in Asia, Africa and Latin America face many challenges, including lack of funding, inadequate access to literature and poor infrastructure. Across these regions, young scientists are working to build practices for open science from bottom-up. The aim is that scientific communities will incorporate these principles as they grow but these communities' needs differ from those that are part of mature research systems. 46 | 47 | The reasons for falling behind are lack of funding, poor infrastructure, inadequate access to research resources. There are government policies, which want greater productivity at the expense of quality. The open science collaborations can bridge the gap for developing countries by providing new ways and provide researchers access that might be currently out of reach (Onie, S. 2020). 48 | 49 | #### Equitable terminology: what words should we use? 50 | 51 | When talking about equity from a global perspective, it can be very hard to choose appropriate language, and historically many phrases have come and gone as we learn more equitable ways to communicate. Common phrases you may see include "Higher Income Country" and "Lower or Middle Income Country". These are terms defined by the World Bank. Some people prefer to use "Global North" when referring to more privileged / high income countries, and "Global South" for lower income / more exploited and marginalized countries - but some "Global South" countries are in the northern hemisphere, and vice versa! Other times, people use "minority" and "majority", but again sometimes the phrase "minority" might be used for a populace that is not actually a minority! An older phrase is "first world country" or "third world country". Many of these terms also have accidental or intentional negative connotations. For this module, we aim to use the phrases "marginalized" and "privileged" when referring to the inequitable distributions of resources and power amongst humanity. 52 | 53 | The Global North have ascendency over authorship and synergies in research networks, which margins out the Global South (Cash-Gibson L et al 2018). 54 | 55 | In richer regions, a compulsion for the goal of excellence nurtures cumulative benefit in funding allocation for the highest funded institutions (Noble P et al) 56 | Across many countries, very few women have higher positions, senior positions are given at a later age, given less grant funding and few have high-impact publications (Gesiarz F et al 2020) 57 | (Brown JVE et al 2020 ) 58 | These are the impartialities, which are the societal imbalances (Zuckerman H. (1988). The above stated societal imbalances, which Open Science is focused to minimize in order to elevate the underrepresented societies, groups and create avenues for Global South countries to come forward & contribute to the global science community. 59 | 60 | Prainsack & Lionello (2018) stated that open science is a political assignment greater than its technological part. The Open Science policy in Europe is shifting across nations, institutions & funding organizations. (Sveinsdottir T et al 2020). 61 | The emphasis on policies drive the incentive/reward structures and resource allocation and later helps in establishing strategies. Open Science started as a bottom-up approach by the researchers but has gone to the top-end level making it to the national and institutional policies setting wider goals like economic growth. The European Commission favors Open Science but in 2016 EU publication, the concern of Open Science perceived potential is being given that greater importance for fostering Europe's competitive advantage in global markets (link to EU publication, 2016) 62 | Open Science positions to cover literature in languages other than English, supporting the value of 📖bibliodiversity📖. We see a diverse set of communities in organizations working for Open Science data, software, tools, resources together as multilingual teams' covering different languages of the world. Research indicates that there is a demand for regionally focused titles, in regional languages (Snijder 2022).. 63 | 64 | ## A global perspective on open science 65 | ### UNESCO on Open Science Infrastructure 66 | 67 | UNESCO's recommendation on Open Science states the potential of open science is in minimizing the present inequalities in Science, Technology and Innovation and pace towards SDGs 2030 implementation agenda, specifically in Africa, least developed countries, small island developing states and landlocked developing countries. 68 | 69 | Open Science infrastructures are shared infrastructures (referred as virtual/physical, knowledge-based resources such as journals, collections, and open access publication platforms, archives, repositories, scientific data, present research informations systems, sets of instruments, open bibliometrics, scientometrics systems for assessing & analyzing scientific areas, open computational & data manipulation service infrastructures, multidisciplinary data analysis & digital infrastructures) where open science happens and serves the needs of diverse communities. Please see UNESCO Recommendation on Open Science 70 | 71 | UNESCO on Open Science policies clearly recommends monitoring Open Science through combining qualitative and quantitative methods to assess the efficacy and efficiency of Open Science as per the member states' particular conditions, constitutional structures and constitutional provisions. Also, gathering & communicating progress, good practice, research work & innovation in open science and its outcomes with support of UNESCO and diverse stakeholders approach. 72 | 73 | ### Organisation for Economic Co-operation and Development (OECD) and Open Science 74 | 75 | The OECD's recommendation regarding research data from public funding helped gain collaboration and global sharing of data as a policy priority, with the objective of making the global science system more effective and seamless. There has been progression in a number of OECD member states and partner economics, with 58 countries successfully delineating their policies for open data & research publications. 76 | - For IT infrastructure, academic institutions and data repositories, international networks have been established in the form of repository networks such as OpenAIRE. 77 | - "Science clouds" - national and international computational resources - are being initiated such as European Open Science Cloud, the Australian cloud NECTAR, the National Research Data Infrastructure in Germany, the National Institute of Health Data Commons in the USA & Research Center for Open Science and Data Platform in Japan. 78 | 79 | ### Questions/Reflection: 80 | 81 | - What strengths do marginalized communities bring to open science? What challenges may they face compared to privileged communities? 82 | - Name at least one data privacy law, and describe ways you can keep personal data safe. Do all countries have data privacy laws? 83 | - Bonus: You're working on an open science consortium that gathers data in the Netherlands, Kenya, and India. You plan to use servers in the EU to store your data. What concerns should you take into account? 84 | 85 | -------------------------------------------------------------------------------- /open-data/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensciency/sprint-content/e94c99f712b75ffbeb6c6b7ea0b72b90a9c6df93/open-data/.DS_Store -------------------------------------------------------------------------------- /open-data/Appendix.md: -------------------------------------------------------------------------------- 1 | # Appendix: Finding Open Data 2 | 3 | The reusability of openly shared data relies on the prospects of it being found in the first place, therefore data findability is a key step in accessing and utilizing data. There are three major ways to find Open Data that are shared by researchers – repository, web search, and literature search. 4 | 5 | ### Repositories 6 | 7 | Ideally, Open Data should be available in repositories where the datasets are properly indexed and assigned a unique persistent identifier (as discussed in **Lesson 6 – Sharing Open Data**) thereby ensuring the data is unambiguously identifiable, searchable, discoverable along with associated metadata and documentations. 8 | 9 | Therefore, the first step in finding Open Data related to your field is to identify discipline specific repositories (if there are any) and search for datasets there (see **Lesson 6.4 – Repositories and Other Sharing Methods**). 10 | 11 | Find repositories in your field: 12 | 13 | 14 | 15 | * _[Re3data.org](http://re3data.org) is a global registry of research data repositories that covers research data repositories from different academic disciplines._ 16 | * _[FAIRsharing](https://fairsharing.org/) is a curated, informative, and educational resource on data and metadata standards, inter-related to databases and data policies._ 17 | * _Recommended repositories by publishers (e.g., Recommended Data Repositories suggested by [Scientific Data](https://www.nature.com/sdata/policies/repositories#envgeo) and[ PLOS One](https://journals.plos.org/plosone/s/recommended-repositories))_ 18 | * _[World Data System](https://www.worlddatasystem.org/) represents a network of repositories._ 19 | 20 | _Examples of generic repositories:_ 21 | 22 | 23 | 24 | * _[Zenodo](https://zenodo.org/)_ 25 | * _[Mendeley Data](https://data.mendeley.com/)_ 26 | * _[Figshare](https://figshare.com/)_ 27 | * _[Dryad](https://datadryad.org/stash)_ 28 | 29 | The[ Generalist Repository Comparison Chart](https://zenodo.org/record/3946720#.YUKQ18RS-Uk) is a tool you can use to decide where to store and share their FAIR data outside of their institutional repositories. Dataverse has also published a[ comparative review of eight data repositories.](https://dataverse.org/blog/comparative-review-various-data-repositories) 30 | 31 | 32 | ### Web-searches 33 | 34 | To explore a wide variety of datasets from projects or popular topics, the use of a more general search engine can be helpful. Some disciplines or large institutions such as NASA and the National Institute of Health’s National Center for Biotechnology Information (NCBI) offer their own portal where you can search for their datasets, related publications and oftentimes tools for analysis (e.g., EMBL's European Bioinformatics Institute[ https://www.ebi.ac.uk/](https://www.ebi.ac.uk/) ). There are also an increasing number of international and national data portals to enable data discoveries. 35 | 36 | ### **Generic data search portals:** 37 | 38 | * Google[ https://datasetsearch.research.google.com/](https://datasetsearch.research.google.com/) 39 | * Kaggle[ https://www.kaggle.com/datasets](https://www.kaggle.com/datasets) 40 | * Wikidata[ https://www.wikidata.org/wiki/Wikidata:Main_Page](https://www.wikidata.org/wiki/Wikidata:Main_Page) 41 | * Open Data Network [https://www.opendatanetwork.com/](https://www.opendatanetwork.com/) 42 | * Awesome Public Datasets[ https://github.com/awesomedata/awesome-public-datasets#readme](https://github.com/awesomedata/awesome-public-datasets#readme) 43 | 44 | ### **Examples of Discipline specific:** 45 | 46 | * NASA Earth[ https://www.earthdata.nasa.gov/](https://www.earthdata.nasa.gov/) 47 | * Cern[ https://opendata.cern.ch/](https://opendata.cern.ch/) 48 | * NCBI National Center for Biotechnology Information[ https://www.ncbi.nlm.nih.gov/](https://www.ncbi.nlm.nih.gov/) 49 | * EMBL's European Bioinformatics Institute[ https://www.ebi.ac.uk/](https://www.ebi.ac.uk/) 50 | * ISPCR[ https://www.icpsr.umich.edu/web/pages/](https://www.icpsr.umich.edu/web/pages/) 51 | * International Monetary Fund [https://www.imf.org/en/Data](https://www.imf.org/en/Data) 52 | * NOAA Climate Data Online [https://www.ncdc.noaa.gov/cdo-web/datasets](https://www.ncdc.noaa.gov/cdo-web/datasets) 53 | * Federal Reserve Economic Research [https://fred.stlouisfed.org/](https://fred.stlouisfed.org/) 54 | * USGS EarthExplorer [https://earthexplorer.usgs.gov/](https://earthexplorer.usgs.gov/) 55 | * Open Science Data Cloud (OSDC) [https://www.opensciencedatacloud.org/](https://www.opensciencedatacloud.org/) 56 | * NASA Planetary Data System [https://pds.nasa.gov/](https://pds.nasa.gov/) 57 | 58 | 59 | ### **Examples of National or international data portal** 60 | 61 | * US Federal data[ https://data.gov/](https://data.gov/) 62 | * EU Data Portal[ https://data.europa.eu/en](https://data.europa.eu/en) 63 | * WHO[ https://apps.who.int/gho/data/node.home](https://apps.who.int/gho/data/node.home) 64 | * THE WORLD BANK [https://data.worldbank.org/](https://data.worldbank.org/) 65 | * DATA.GOV.UK [https://www.data.gov.uk/](https://www.data.gov.uk/) 66 | * UNICEF [https://data.unicef.org/](https://data.unicef.org/) 67 | 68 | 69 | ### Literature search 70 | 71 | While not ideal, datasets are often attached to scholarly publications in the form of supplementary material, or referenced in text where to find them e.g. GitHub repository or personal/institutional websites. In addition, there are emerging journals and special collections/issues focused on describing and publishing data (e.g. Nucleic Acids Research database issues[ https://doi.org/10.1093/nar/gkab1195](https://doi.org/10.1093/nar/gkab1195), Scientific Data, Earth System Science Data, etc.). In other words, while the datasets are openly available in these media, they are not properly indexed and therefore not very findable nor machine readable. 72 | 73 | Finding academic publications can be a challenge in itself depending on the discipline and field of study. For instance, in life science and biomedical research, there are a number of repositories and search engines (e.g. PubMed, EuropePMC) indexing research outputs (e.g. publications, abstracts, references and communications) from various journals. 74 | 75 | However in other disciplines (e.g. arts and humanities), search is often carried out with general search engines or research databases such as Google Scholar and JSTOR. In that case, it is advisable to reach out to library personnel and community members for further advice on where to find related literature and data, see lesson 5.4 Help section. 76 | 77 | **Generic:** 78 | 79 | * Google Scholar[ https://scholar.google.com](https://scholar.google.com) 80 | * Open knowledge map: A visual interface allowing the exploration of interconnected topics with relevant documents and concepts. [https://openknowledgemaps.org/](https://openknowledgemaps.org/) 81 | * JSTOR a wide range of scholarly content[ https://www.jstor.org/](https://www.jstor.org/) 82 | * ResearchGate[ https://www.researchgate.net/search](https://www.researchgate.net/search) 83 | 84 | **Discipline specific:** 85 | 86 | * EuropePMC Life sciences [https://europepmc.org/](https://europepmc.org/) 87 | * Pubmed biomedical literature [https://pubmed.ncbi.nlm.nih.gov/](https://pubmed.ncbi.nlm.nih.gov/) 88 | * arXiv is a free distribution service and an open-access archive for scholarly pre-prints in the fields of physics, mathematics, computer science, quantitative biology, quantitative finance, statistics, electrical engineering and systems science, and economics [https://arxiv.org/](https://arxiv.org/) 89 | * Biorxiv Preprint server for biology [https://www.biorxiv.org/](https://www.biorxiv.org/) 90 | * EarthArXiv ([https://eartharxiv.org](https://eartharxiv.org)) and Earth and Space Science Open Archive ([https://essoar.org](https://essoar.org)) 91 | * ASAPbio provides a catalog of preprint servers [https://asapbio.org/preprint-servers](https://asapbio.org/preprint-servers) 92 | 93 | 94 | 95 | -------------------------------------------------------------------------------- /open-data/Lesson2-Benefits.md: -------------------------------------------------------------------------------- 1 | # Lesson 2: Benefits of Open Data 2 | 3 | ## Learning Objectives 4 | - Communicate the benefits and challenges of Open data and it’s effects on science 5 | 6 | ## Introduction 7 | In this lesson, we’ll discuss the benefits of open data and in particular its direct effect in advancing Open Science. We will also discuss details of how Open Data can impact the response of science in global emergencies, and how Open Data facilitates multidisciplinary work. 8 | 9 | ## 2.1 Open Data for the greater good 10 | As we mentioned earlier, data plays a significant role in our day-to-day lives. Open Data, in particular, has played a key role. If you pause and think about it, you may realize that Open Data is not only common in our society, but you might have benefited from it and used it yourself. 11 | 12 | Here, are some notable examples of Open Data that has positively impacted society at large: 13 | 14 | Each country or territory often provides open access to a variety of socioeconomic information about the population, community, and business in its jurisdiction. These data are often called census survey data which may include the aggregated statistics of gender, race, ethnicity, education, income, and health data of a community. These data are often used to understand the composition of a local neighborhood and are critical to inform decisions on resource allocation to ensure the quality of life for the community. 15 | 16 | The changing climate poses a significant risk to our daily lives and has been responsible for intensifying drought, increasing flooding, and devastating fire incidents worldwide. Open data is therefore critical in providing life-saving information to adapt to the changing climate and help assess the climate risks of the place where we live. Government agencies (e.g., National Oceanic Atmospheric Administration in the U.S., UK Met Office, European Centre for Medium-Range Weather Forecasts) have been providing public access to long-term weather and climate information for decades. A more recent initiative stems from organizations developing value-added open data products to advise society on the risk of changing climate. One recent example is the flood and fire risk in the United States developed by a non-profit organization First Street Foundation 17 | 18 | ## 2.2 Open Data for better Open Science 19 | Scientific discovery and innovation stand to gain a tremendous amount from Open Data. This impact stems directly from the multiple inputs and methods developed for investigating problems. Specifically, three core components of Open Data drive this diverse scientific innovation and provide enormous societal and scientific benefits: 20 | 21 | ### Validation: 22 | 23 | Open Data that is easily accessible by other researchers allows for scrutiny, which helps discover mistakes more quickly and ingrains confidence that the research was conducted with sound and ethical principles and methods. Evidence-based progress is important in providing confidence in the scientific results and is important for the insights drawn to inform future research. 24 | 25 | Data that has been reviewed, maintained and scrutinized by many, as well as informed by diverse consultation, drives robust and thorough scientific pursuits. 26 | 27 | This validation process is a key component of reproducibility, which is important in building on prior research. Reproducibility is the cornerstone of pushing science forward, as it is the very baseline to check results and expand upon them by introducing new experiments and questions. 28 | 29 | ### Transparency: 30 | 31 | Building on the idea of validation and scrutinization, transparency facilitates this process. It allows for early engagement with the data and ensures the data was collected with sound and ethical principles (these will be elaborated upon in lessons 3 (Responsible Open Data) & 4 (The CARE and FAIR principles). 32 | 33 | This transparency allows for early intervention if there are unexpected harms. This is where the idea of multiple perspectives becomes important again. 34 | Collaboration: 35 | 36 | Open datasets are made available to all (see section Inclusivity in lesson 1) - which means new, robust insights are gathered at a faster pace as mistakes can be caught more easily, expensive data collection doesn’t need to be repeated, and researchers build upon the work of their peers. For example, the first image of a black hole; Scientists recently produced the first image of a black hole in our galaxy. This achievement was only possible through open collaboration and sharing of telescope data by different observatories distributed across different parts of the world [1]. 37 | 38 | The data isn’t limited to those within a specific field nor exclusive to those with institutional access. Importantly, this means the data can be shared with non-traditional academic researchers such as nurses, social workers, agronomists, journalists and other communities. This allows for researchers to also derive insights from varying perspectives. 39 | 40 | The scope of research can be easily expanded to derive more holistic insights. For example, the Coupled Model Intercomparison Project (CMIP) that started in 1995 paved the way to understand how climate change was impacting our daily lives by investigating factors such as malaria distribution in Africa, infrastructure and urban design as well the implications of climate change on the risk of epilepsy [2, 3]. 41 | 42 | Collating similar data sets and performing meta-analyses on those data sets can provide a substantially improved signal that would not be possible in any one of these data sets. Additionally, this facilitates convergence across scientific disciplines, increasing the value of the research. 43 | 44 | ## 2.3 Open Data to support policy change 45 | Open data can lead to policy change which directly impacts the lives of communities, such as those destined to suffer first from the slow changes to the Arctic. A study, taking advantage of the OpenStreetMap data [4], helped map projected changes in the Arctic. These mappings in turn helped emphasize the need for adaptation-based policies at community and regional levels to avoid stagnation of change in the light of a sudden and dramatically worse situation fueled by climate change. 46 | 47 | ## 2.4 Open Data in face of global emergencies 48 | The COVID-19 pandemic demonstrated to the world, in real-time, how the collective movement of researchers sharing their data (such as sharing of coronavirus genome data [5]) can lead to an unprecedented number of discoveries in a relatively short amount of time. This directly impacted radical vaccine development efforts and the timely control of the COVID-19 infection [6]. These insights will continue to pay off, with this research spurring future developments. 49 | 50 | Data sharing has many benefits and can aid access to knowledge. However, it is also important to bear in mind where the data has come from, who should have a say in its interpretation and use, and how the data can be shared responsibly, more on that in lessons 3 & 4. 51 | 52 | ## 2.5 Open Data and public engagement (citizen science) 53 | A citizen scientist is a citizen or amateur scientist that will collaborate with professional researchers to help gather data on a broader spatial and temporal scale than the researchers might be able to achieve on their own [7, 8]. This outsourcing of responsibility helps members of the public engage in scientific pursuits that ultimately benefit them and allow research to be conducted on a grander scale than that might be possible with only professional researchers. Citizen science is gaining popularity, with increasing recognition as a valuable contribution to scientific advancements [9]. 54 | 55 | For example, volunteer citizen scientists in Beirut were recruited from 50 villages to help test water quality [10]. These volunteers were trained to be able to conduct the tests and in turn, not only was the data collected to inform the scientific advancements, the citizen scientists had the opportunity to learn to better manage their water resources and were able to improve conditions, creating a mutually beneficial interaction. 56 | 57 | ## 2.6 Open Data and decolonisation of knowledge 58 | Free distribution of knowledge gives rise to increased participation in science. Open Data is central to fostering science that is inclusive and diverse, with direct and relevant benefits to impacted individuals and communities. This fostering is particularly important in the mission towards the decolonisation of knowledge [11]. 59 | 60 | In a world where knowledge can be a commodity, with currency in the form of published papers and hoarded datasets, exclusion from research can limit progress and negatively impact a community’s progress in a world driven by a knowledge-based economy. 61 | 62 | Open Data, and its positive side effect of decolonisation of knowledge, promotes and benefits from diverse perspectives through purposeful inclusion of African, Latin American and other underrepresented Low and Middle Income Countries. This inclusion allows a dramatic change in who has access to work with and reuse data. 63 | 64 | It can also become a powerful tool in the fight for visibility and credit. By fostering a global research culture of transparency and validation, where the work of underrepresented groups is celebrated and compensated, such as giving credit or much needed vaccines in exchange for the world-class genome sequencing in Africa, we will create a sustainable model that ensures under-represented countries are able to keep contributing towards a global revolution for example against infectious disease. It also gives marginalized groups such as women, under-represented communities, indigenous scholars, non-Anglophone scholars, as well as scholars from less-advantaged countries a voice in how the global and nuanced narrative of science is developed. This broad scale participation and inclusion shows respect to the involved people and communities and helps raise the profile of the research through considerate inclusion. 65 | 66 | Having said that, Open Data has been demonstrated to further marginalize or exploit small-scale and community driven initiatives, such as in the case of African researchers neither receiving due credit nor compensation for their genome sequencing during the COVID-19 pandemic [12]. This is further explored in the next section as we introduce ways of mitigating harms that could happen via unthoughtful and irresponsible sharing of data. 67 | 68 | ## Summary 69 | Open Data which is purposefully inclusive and open to scrutiny, benefits scientific innovation by allowing for a more diverse and robust scientific process that draws on multiple perspectives. This also allows for the early identification of mistaken insights as well as early intervention for unforeseen harms to impacted communities. 70 | 71 | Open Data allows non-traditional researchers to contribute to scientific development and bring their unique insights to the table. With these benefits in mind, we should always bear in mind that Open Data requires careful consideration of the possible downsides of making data open without due credit and consultation with potentially vulnerable and/or marginalized communities. The next lesson discusses important considerations for the responsible management, collection and use of open data by all stakeholders. 72 | 73 | ## Assessment 74 | Can you think of any examples where opening data might help you answer a question, or a question that will impact your community? 75 | 76 | ## References 77 | 1. https://eventhorizontelescope.org/ 78 | 2. https://oceanrep.geomar.de/id/eprint/12875/1/CMIP.pdf 79 | 3. https://doi.org/10.1002/epi4.12359 80 | 4. https://www.openstreetmap.org/#map=5/54.910/-3.432 81 | 5. [https://www.nature.com/articles/d41586-021-00305-7#:~:text=Other%20researchers%20say%20that%20restrictions,while%20protecting%20data%20providers](https://www.nature.com/articles/d41586-021-00305-7#:~:text=Other%20researchers%20say%20that%20restrictions,while%20protecting%20data%20providers) 82 | 6. https://www.nature.com/articles/d41586-020-01246-3 83 | 7. https://www.oed.com/view/Entry/33513?redirectedFrom=citizen+scientist#eid316597459 84 | 8. https://en.unesco.org/science-sustainable-future/open-science/recommendation 85 | 9. https://ecsa.citizen-science.net/ 86 | 10. https://www.idrc.ca/en/book/contextualizing-openness-situating-open-science 87 | 11. https://zenodo.org/record/3946773#.YsFyqHbMJPb 88 | 12. https://www.nature.com/articles/d41586-021-01194-6 89 | -------------------------------------------------------------------------------- /open-data/Lesson3-Responsible.md: -------------------------------------------------------------------------------- 1 | # Lesson 3: Responsible Open Data 2 | 3 | ## Learning Objectives 4 | - Recognize open data that is created responsibly 5 | - Appreciate how to use data responsibly 6 | 7 | 8 | ## Introduction 9 | Data is a precious resource that should be shared whenever possible. As demonstrated in the previous lesson, dramatic improvements can arise from Open Data and the decolonisation of knowledge by ensuring sure data is open and available to all. 10 | 11 | While Open Data benefits science in wonderful ways and already provides enormous benefits to society, the misuse and inconsiderate sharing of data can have far-reaching harmful effects. There may be also cases where the research data should not be collected nor shared publicly out of respect for the legal frameworks and communities needs. Understanding these potential harms requires reflection on the part of the research team and consultation with people and communities impacted by the research. 12 | 13 | In this lesson, we introduce the concept of Responsible Open Data. These are points for consideration when thinking about making data open and managing it once it is open, as well as elaborating on ways for providing impacted communities the opportunity to drive the scientific narrative and the direct impact on their lives. In the next lesson, we will discuss a framework for actively engaging in and actioning these considerations in your research (CARE principles in lesson 4 - CARE and FAIR principles). 14 | 15 | ## 3.1 Empowering Individuals and Communities through Open Data 16 | 17 | The needs of marginalized and underrepresented communities can and have been ignored with respect to Open Data. Communities that are the participants, or the main drivers of some types of data collection tend to be invisible when it comes to publishing as credit is taken by the bigger academic or institutional researchers. 18 | 19 | Some of the notable factors that contribute to the exploitation of marginalized and underrepresented communities, oftentimes leading to disastrous outcomes including inappropriate use and sharing of data, include: 20 | 21 | ### Lack of protective frameworks: 22 | There are instances where it might not be appropriate to share data openly. For example, there are legal frameworks on a regional, national and international level to take into account; however, these might not always be sufficient to protect contributors and communities from exploitation. It is also important to note that there may be instances where no such frameworks exist, and people as contributors to the content of the data might be open for exploitation. In any case, whether a framework exists or not, careful, frequent, and ongoing communication and direct involvement of communities/contributors in any data decisions is needed, or a blanket ban should be assumed where consultation is not feasible. 23 | 24 | ### Lack of proper informed consent: 25 | Informed consent is an essential step in ethical research practices and is a responsibility for researchers to fulfill before the research takes place. Informed consent allows participants to participate fully, with a complete understanding of the research, without coercion or undue influence. This consent can be withdrawn at any time, without consequence [[1](https://researchsupport.admin.ox.ac.uk/governance/ethics/resources/consent#:~:text=Informed%20consent%20is%20one%20of,before%20they%20enter%20the%20research.)]. While an exceptionally important component of science and open science in general, the exact requirements for obtaining informed consent are highly discipline specific and understanding these nuances are beyond the scope of this work. 26 | 27 | With this in mind, it is important to understand that even if one has obtained true informed consent, it is not a once-off action. It requires consultation and education. This is important in the context of data being put online for use and reuse - especially seeing as research and its impact changes over time, and as such, communities could be opened up to unexpected harms in the future. Therefore measures need to be in place so that this consent can be withdrawn or altered without consequence to the communities at risk. This understanding needs to be ensured, as a lack of understanding can be demonstrated in the open data 1000 Human Genomes consortium’s consent form [[2](https://www.internationalgenome.org/sites/1000genomes.org/files/docs/Informed%20Consent%20Form%20Template.pdf)]: the consent form has a passage most don’t catch, but open themselves to biocolonialism by agreeing to have their blood samples used for an unlimited supply of DNA. 28 | 29 | 30 | ### Lack of equitable participation: 31 | Open Data that is shared with due consideration and consultation allows impacted communities to take charge and guide research in a way that best suits their narrative, values and needs. It allows more autonomy in these communities to further their scientific development and to contribute to the larger field of open science. 32 | 33 | ## 3.2 Managing Research Data responsibly 34 | Many research disciplines work with personal data that can be used to identify an individual (see [[3](https://the-turing-way.netlify.app/reproducible-research/rdm/rdm-personal.html)]). This type of data cannot be shared easily, as data should be anonymized before doing so, and this is increasingly difficult in the current rapid state of development. New technical progressions may make it easier to recombine datasets and re-identify individuals. Some individuals or communities are more susceptible to exploitation, as described earlier. 35 | 36 | The accidental detrimental effects of Open Data may extend beyond individuals and affect others; i.e., endangered species or natural resources that should be protected [[4](https://doi.org/10.1038/s41559-018-0608-1)], for example; the local extinction of Goniurosaurus luii (Chinese cave geckos) in Vietnam was attributed to poaching activities which occurred shortly after data related to their discovery was published, this, in turn promoted a call for scrutinizing Open Data sharing practices in the field of biodiversity [[5](https://doi.org/10.1126/science.aan1362)]. 37 | 38 | Additionally, research can be carried out in collaboration with industry, generating commercially sensitive data, which may place restrictions on what can be shared. Research can be used for harmful purposes (see Ethos, lesson 2) or pose a risk to (inter)national security. 39 | 40 | There are several tools available that will help making decisions about what you can share publicly: 41 | 42 | * CARE and FAIR principles (lesson 4) 43 | * (inter)national laws that apply to data sharing (lesson 6 - Sharing Open Data) 44 | * Guidelines/policies set up by your discipline or research institute (lesson 6 - Sharing Open Data) 45 | * License restrictions (lesson 6 - Sharing Open Data) 46 | 47 | ## Summary 48 | In summary, you may not always be able to share the research data openly and there may be other responsibilities that are associated with managing the data if it has been made open. In such instances, the focus is placed on controlled and limited access with reuse in mind. 49 | 50 | The CARE principles, presented in the next lesson provide a framework for responsibly collecting data with all stakeholders in mind. The FAIR (Findable, Accessible, Interoperable, Reusable) principles, also described in the next lesson, provide guidelines for this and allow you to share part of the data without necessarily disclosing all the data. 51 | 52 | ## Assessment 53 | 54 | * Can you think of a specific example in which releasing data could lead to harm? Which people and/or communities might you consult to determine this and discuss remedies? 55 | 56 | * Example of how one can re-identify a person from shared data? 57 | 58 | ## References 59 | 60 | 1. https://researchsupport.admin.ox.ac.uk/governance/ethics/resources/consent#:~:text=Informed%20consent%20is%20one%20of,before%20they%20enter%20the%20research. 61 | 2. https://www.internationalgenome.org/sites/1000genomes.org/files/docs/Informed%20Consent%20Form%20Template.pdf 62 | 3. https://the-turing-way.netlify.app/reproducible-research/rdm/rdm-personal.html 63 | 4. https://doi.org/10.1038/s41559-018-0608-1 64 | 5. https://doi.org/10.1126/science.aan1362 65 | -------------------------------------------------------------------------------- /open-data/Lesson4-CARE&FAIR.md: -------------------------------------------------------------------------------- 1 | # Lesson 4: CARE & FAIR Principles 2 | ## Learning Objectives 3 | - Recognise the relationship between FAIR, CARE and Open Data 4 | 5 | 6 | ## Introduction 7 | 8 | In the previous lesson on Responsible Open Data, we acknowledged that you may not always be able to share the research data openly. This lesson will introduce you to two sets of principles that provide a framework for responsible open data. The CARE principles may help you to responsibly collect and share data. If you are able to make (part of) the data openly available, it is helpful to do this in a manner that facilitates reuse by yourself and others. The FAIR principles provide guidelines for this, and allow you to share part of the data without necessarily disclosing all the data. After this lesson, you’ll be able to understand the relationship between FAIR, CARE and Open Data. 9 | 10 | ## 4.1 CARE Principles of Indigenous Data Sovereignty 11 | The CARE Principles of Indigenous Data Sovereignty apply whenever you’re collecting data with or that belong to a particular community. The CARE principles are people– and purpose-oriented, and are originally set up to use data in a way that advances data governance and self-determination among Indigenous Peoples [[1](http://doi.org/10.5334/dsj-2020-043)]. The principles are applicable to any research that involves communities or local stakeholders and cover: 12 | 13 | * **Collective Benefit**: data must facilitate collective benefit to achieve inclusive development and innovation, improve governance and citizen engagement, and realize equitable outcomes. 14 | * **Authority to control**: Recognition of the rights of (Indigenous) communities to govern data 15 | * **Responsibility**: nurture respectful relationships with the communities from whom the data originate 16 | * **Ethics** requires representation and participation of Indigenous Peoples, who must be the ones to assess benefits, harms, and potential future uses based on community values and ethics. 17 | 18 | The [Global Indigenous Data Alliance](https://www.gida-global.org/care) has made further resources available and translated the CARE principles in other languages [[2](https://www.gida-global.org/care)]. The genomic research community has also worked on a framework for enhancing ethical genomic research with Indigenous communities [[3](https://doi.org/10.1038/s41467-018-05188-3)]. 19 | 20 | Indigenous scientists have already written extensively of the harms visited upon indigenous communities through promises of medical benefits that have never materialized and sharing of genomic data without tribal consent [[4](https://doi.org/10.1038/s41576-019-0161-z), [5](https://doi.org/10.1080/15265161.2021.1891347), [6](https://doi.org/10.1038/d41586-021-00758-w)]. Whenever you are handling data that belongs to an indigenous or other under-served community, the CARE principles are more important than the benefits of Open Data. Developments are currently underway to provide practical guidelines or ways to assess whether the CARE principles have been followed throughout the research process. 21 | 22 | The CARE principles are complementary to the FAIR principles which were developed to facilitate data sharing practices. 23 | 24 | 25 | ## 4.2 FAIR (Findable, Accessible, Interoperable, Reusable) 26 | The FAIR principles for scientific data management and stewardship are guidelines to improve the Findability, Accessibility, Interoperability and Reusability of digital assets [[7](https://doi.org/10.1038/sdata.2016.18)]. A dataset that is FAIR is not necessarily Open. The phrase “as open as possible, as closed as necessary” [[8](https://ec.europa.eu/research/participants/data/ref/h2020/grants_manual/hi/oa_pilot/h2020-hi-oa-data-mgt_en.pdf)] is often used to describe the interaction between the principles. Thus a dataset describing fishery locations might not be open (due to the harm caused by illegal fishing), but could be FAIR with a rich metadata record available and an identifying persistent ID. Datasets can be FAIR, but closed, because of personal data or because they fall under other ethical precepts that would mean opening them would be harmful (Lesson 3 - Responsible Data). 27 | 28 | The [FAIR Data Principles](https://www.go-fair.org/fair-principles/) emphasize both human and machine readability and machine-actionability for data as research becomes more dependent on computation and automation [[9](https://www.go-fair.org/fair-principles/)]. For example a PDF version of a spreadsheet is human readable, but it is not easily used by machines. A better format for both humans and machines would be a structured data format like CSV or XML. 29 | 30 | ### FAIR principles explained 31 | * **Findable**: It is important that data is not only open but also Findable, by you and others in your field. If people from your community of practice can not find it, it will not be used frequently and its value will decline over time. Depositing your data in repositories will preserve it over time (see Lesson 6, Sharing Open Data for more on repositories) and assign datasets with a persistent identifier (PID). Sharing data using a data repository will ensure that data are uniquely identifiable, and searchable. Another aspect that helps with searchability is having robust documentation (sometimes called data dictionaries/codebooks, metadata or a README file). Images, large files and binary data are examples of data that can not be searched by machines or humans. Providing metadata that is searchable is particularly important in these cases [[10](https://doi.org/10.5281/zenodo.6532282)]. 32 | 33 | * **Accessible**: Once someone has found your data, they should be able to access the data using standardized mechanisms (e.g. https). Your data should be accessible (both retrievable and understandable) for both humans and machines. In other words, specify what the users need to do to access this data, and ideally, a machine can automatically translate those requirements and act on it (such as two factor authentication or request access from the author). Accessible does not equate to open. If the full content can not be made openly available, the metadata can be made openly available [[10](https://doi.org/10.5281/zenodo.6532282)]. 34 | 35 | * **Interoperable**: During reuse, data may need to be integrated with other data, allowing machines and humans to interpret and use the data in different settings. Metadata must be detailed enough for data to be understood, especially by those who do not own or create the data in the first place. Keep in mind that people can have a hard time interpreting another person - some words can be different in spoken and formal languages; things get lost in translation, and many different terms can describe the same object. The same word can even have different meanings across various disciplines. The use of controlled terminologies, vocabularies, and ontologies for interoperability helps ameliorate otherwise substantial barriers to interoperability [[10](https://doi.org/10.5281/zenodo.6532282)]. 36 | 37 | * **Reusable**: To be reusable, data and collections should have a clear usage license and provide accurate information on provenance. Provenance metadata provides context and details on the history of the source and its authenticity. Credit attribution (citation) is another important aspect to consider with regard to (re)usability and “paying it forward” to the researcher who released their data [[10](https://doi.org/10.5281/zenodo.6532282)], more on that in lesson 6 (Sharing Open Data). 38 | 39 | ## Summary 40 | ### FAIR in short 41 | **Make your data as FAIR as possible by:** 42 | 43 | * Depositing your data in a repository that can: 44 | * Assign a PID 45 | * Make sure the metadata will always be available even if the data isn’t 46 | * Using a standard data format for your domain 47 | * Assign an appropriate license to your dataset 48 | * Describe your data as richly as possible 49 | * FAIR is not FAIR without due CARE 50 | 51 | It is easier to adhere to the CARE and FAIR principles when you plan for this at the start of your research, the topic of the next lesson. 52 | 53 | 54 | ## Assesment 55 | 56 | * Consider a dataset that you contributed to. Have you followed the CARE/FAIR principles? Which of the principles can you incorporate in your workflow? 57 | 58 | * When you reviewed datasets generated and shared by other researchers, were they following the CARE/FAIR principles? What did they do well and where could they improve? 59 | 60 | Want to do a more extensive assessment on your knowledge of the FAIR principles? Beginners can use [FAIR-Aware](https://fairaware.dans.knaw.nl/), and if you’re already more familiar you can try the [ARDC self assessment tool](https://ardc.edu.au/resources/aboutdata/fair-data/fair-self-assessment-tool/). 61 | 62 | ## References 63 | 64 | 1. http://doi.org/10.5334/dsj-2020-043 65 | 2. https://www.gida-global.org/care 66 | 3. https://doi.org/10.1038/s41467-018-05188-3 67 | 4. https://www.nature.com/articles/s41576-019-0161-z 68 | 5. https://doi.org/10.1080/15265161.2021.1891347 69 | 6. https://doi.org/10.1038/d41586-021-00758-w 70 | 7. https://doi.org/10.1038/sdata.2016.18 71 | 8. https://ec.europa.eu/research/participants/data/ref/h2020/grants_manual/hi/oa_pilot/h2020-hi-oa-data-mgt_en.pdf 72 | 9. https://www.go-fair.org/fair-principles/ 73 | 10. https://doi.org/10.5281/zenodo.6532282 74 | 75 | -------------------------------------------------------------------------------- /open-data/Lesson5-Planning.md: -------------------------------------------------------------------------------- 1 | # Lesson 5: Planning for Open Data 2 | 3 | ## Learning Objectives 4 | - Understand what the data life cycle is and how that affects the outlook on research. 5 | - Understand what a Data Managment Plan (DMP) and metadata are. 6 | - Have an initial grounding on what communities to contact for support in this area. 7 | 8 | 9 | ## Introduction 10 | In the previous lessons it has been shown that effective open data needs to be managed. As we have seen this is not trivial and requires work and preparation. Correspondingly, there can be cost implications for your institutions to do this. Rather than facing these issues on an ad hoc basis, one should plan and prepare what you will need to do before you generate the data. With this in mind, we will 11 | 12 | * discuss the data life cycle which places a focus on the reuse of data as it is generated. 13 | * Introduce the concept of a data management plan, where one documents the steps that will be carried out to ensure that your data can be shared in an appropriate fashion. 14 | * Introduce the concept of metadata, namely documenting your data which is essential if another researcher is to make use of your data. 15 | * Finally, who to contact in terms of advice and support. 16 | 17 | ## 5.1 Planning 18 | ### The data life cycle 19 | With a focus on generating papers, a researcher implicitly ended up with the following research workflow model in mind of how they worked with their data. 20 | It’s important to note here that because the focus is on the paper, there’s no thought to how the data changes at different stages of the process, or thought to how the data should be managed after a paper is published. Usually the data were included as part of the paper as a supplementary file. 21 | 22 | This can be summarized in the following image. 23 | 24 | ![Linear workflow focussed on publications](https://github.com/learnopenscience/TOPS-OC2-data/blob/adb7137694dde403ca54c7b8f755e79dd60fe8d8/assets/Figure5.1.png "Figure 5.1 Linear workflow model") 25 | 26 | 27 | 28 | Figure 5.1: Linear workflow model 29 | 30 | On the other hand, if one thinks of open data that can be FAIR (and thus reused) then this model emerges. In particular that 31 | Data needs to be available beyond the publication of a paper. 32 | Data no longer has to be associated with one paper. 33 | Data can be reanalysed. 34 | More data, from different sources or the same lab, can be added in at any time, including later. 35 | Instead of the process being a linear progression, with a start and a finish, the process for data becomes more complex and there is cycle. 36 | These ideas were put together in the [DCC Curation Lifecycle model](http://www.ijdc.net/article/view/69) [[1](https://doi.org/10.2218/ijdc.v3i1.48)]. The original life cycle is complicated but a summary of the life-cycle is listed below 37 | 38 | ![The DataOne Data life cycle](https://old.dataone.org/sites/all/images/DLC2015_sm.png "Figure 5.2 A summary of the data life cycle") 39 | 40 | Figure 5.2: A summary of the data life cycle (reproduced from https://old.dataone.org/data-life-cycle) 41 | 42 | Here the focus is very much moved away from the idea of research -> publication and instead is on the data itself as a first class research output. 43 | 44 | Let’s look at these individual steps 45 | 46 | * **Plan**: a description of the data that will be compiled, how the data will be managed and made accessible throughout its lifetime. 47 | * **Collect**: this corresponds to the data gathering step (illustrated in Figure 5.1). It can include both primary (raw) and processed data. 48 | * **Assure**: the quality of the data is assured through checks and inspections. 49 | * **Describe**: data is accurately and thoroughly described through documentation (e.g. metadata). 50 | * **Preserve**: these are the steps necessary to make sure that the data will be accessible going forward so in particular ensuring that the data is stored in a fashion that others can use it (in particular storing at a data repository). Ideally this should be done in a fashion that matches the CARE and FAIR principles (lesson 4). This may also include the step of removing data that may not be of use to future researchers. For example, high resolution images may no longer be themselves useful if in the analysis step one has extracted the features of interest from them. Not storing the high resolution image and simply storing the feature data would provide a considerable saving of storage. 51 | * **Discover**: here other researchers can extract either the entirety or some subset of the data for their own purposes. 52 | * **Integrate**: data from disparate sources are combined to form one homogeneous set of data that can be readily analyzed (this could include this one data set being analyzed). 53 | * **Analyze**: corresponds to the data analysis step as illustrated in Figure 5.1. 54 | There are a variety of different interpretations of the data life-cycle (see the reading list for this lesson) with varying degrees of complexity. It’s also important to note that this is an idealization of what goes in general. Nonetheless, it is important to think of all these steps as an ongoing, interactive process that requires thorough planning and continued consideration and to recognize that they are non-trivial to do. 55 | 56 | ## 5.2 Data Management Plans (DMP) 57 | Seeing as the above steps are not trivial before one begins to gather, collate or generate a data set it is useful to plan out what you will do with the data. This is referred to as a Data Management Plan or DMP for short. 58 | 59 | A DMP means that you can think ahead of any particular issues that might crop up in terms of handling the data, such as the potential cost of storage, whether data needs to be anonymised and so on. 60 | 61 | A detailed description of what one should put into a DMP is described [here](https://the-turing-way.netlify.app/reproducible-research/rdm/rdm-dmp.html) [3]. As outlined in this [document from the UKRI](https://www.ukri.org/councils/stfc/guidance-for-applicants/what-to-include-in-your-proposal/data-management-plan/) [4], the central funder for the UK, these can include answering questions such as 62 | 63 | * What type of data will be generated or preserved? This could include data formats, rough estimates of the amount of data to be stored during a research project and similarly what will be preserved beyond the lifetime of the project? 64 | 65 | * What type of metadata will be used and preserved. It is worth noting that one of the more detailed aspects of the FAIR principles is to keep the metadata of the data set available even if the original data set no longer exists. 66 | 67 | * Where should the data be preserved? i.e. what repository will be used (repositories are discussed in the next lesson). How long should it be stored? (five years? ten years?) More concretely, data regulations can require that certain data be kept in certain ways for at least a certain amount of time. This will vary depending on the type of data (e.g. medical records, population statistics). It is advised that these expiration dates are explored in the literature, and/or policy guidelines. 68 | * How will any private data be stored so that it is kept securely? 69 | 70 | DMPs are not meant to be exhaustive documents! Typically they are 1-2 pages of A4 and often are less than a few thousand words. The important point is that they sketch out what a researcher or research team plans to do with their data well before they are gathered and can identify any steps that need to be taken rather than facing a major challenge now. 71 | 72 | DMPs are [increasingly used by funders](https://dmptool.org/public_templates) and their institutions as a means to have researchers map out what they will do with their data in a research proposal. Research proposals often require DMPs, and hence DMPs are often the ‘sharp end of the stick’ for researchers with respect to Open Science [5]. A good DMP is a criterion for assessment in grant applications and hence doing a good DMP will help your grant be funded. 73 | 74 | ## 5.3 Documenting your Data (Metadata) 75 | 76 | As discussed in the previous lessons, the FAIR principles emphasize the importance of metadata, namely documenting your data. Metadata is described in more detail [here](https://the-turing-way.netlify.app/reproducible-research/rdm/rdm-metadata.html) [6]. 77 | 78 | A perennial question is what type of metadata and description of the data should be provided for a data set. If you are dealing with electronic data should one provide metadata for a whole set of files, an individual file … each individual bit? 79 | 80 | The simplest rule of thumb is if there aren’t any guidelines for your type of data or domain repositories, then try and provide enough documentation about your data that you would ask for if you were downloading this data yourself. 81 | 82 | For example if this was data taken from a field trip where location is important then you might want to include longitudinal and latitudinal coordinates. If it’s data from a wet lab then it might include parameters you normally include in the materials and methods section of a paper. If it’s data from purely computational work you may want to list the software run and the parameters used. 83 | 84 | Data repositories will be discussed in the next lesson. Domain specific repositories will often give more precise requirements on metadata (another reason to use them). 85 | 86 | If there are no guidelines then a simple README file attached with the data is a start (for an example see [here](https://cornell.app.box.com/v/ReadmeTemplate)) - though it’s important to note that ideally one should use metadata schema which is described in much more detail [here](https://www.dcc.ac.uk/guidance/standards) as FAIR data should be machine-actionable [7] [8]. 87 | 88 | ## 5.4 Help 89 | Much of the ins and outs of dealing with Open Data, or more particularly Open Data that follows good practice such as the FAIR principles, can be technical and lies beyond the domain of knowledge of researchers. How does one navigate this landscape? 90 | 91 | This can be summarized in the following diagram - 92 | 93 | ![Figure 5.3 Diagram pointing to four possible sources of informaiton a researcher can approach.](https://github.com/learnopenscience/TOPS-OC2-data/blob/8509153045f69f2c52c6a6192c52476c54560071/lessons/Figure5.3.png "Figure 5.3 Sources of information and support on Open Data that a researcher could access.") 94 | 95 | Figure 5.3 Sources of information and support on Open Data that a researcher could access. 96 | 97 | ### **Research communities (international and national)** 98 | 99 | Individual research disciplines may already have put together materials and have advice on how to implement Open Science in their discipline. For example [FAIRsharing](https://fairsharing.org/) is a educational and information resource on data and metadata standards [9]. The [Research Data Alliance](https://rd-alliance.org/) have a variety of different [interest and working groups](https://www.rd-alliance.org/groups) in data sharing in specific disciplines. Scientific Societies and Publishers can also provide advice [10] [11]. 100 | 101 | 102 | ### **Open Science related communities** 103 | 104 | There are a number of communities that are focussed on Open Science activities. [ReproducibiliTea](https://reproducibilitea.org/) is a grass-roots journal club initiative that is based in over 100 institutions and is a forum to discuss reproducibility, closely allied to Open Science [12]. The [FAIRdata forum](https://fairdataforum.org/) allows you to browse materials and raise questions that are related to FAIR [13]. Correspondingly the [PID forum](https://pidforum.org/) allows you to ask questions on PIDs in general [14]. A list of Open Science communities is provided in the next module (Open Tools). 105 | 106 | 107 | ### **Tools and resources** 108 | 109 | Finally, there are a range of different tools to help you. For example, [DMPtool](https://dmptool.org/quick_start_guide) and [DMPonline](https://dmponline.dcc.ac.uk/) allow you to build your own DMPs [15] [16]. See the module Open Tools for more details. There are a variety of different catalogs out there one can use to search for materials in this area. [Shanahan, Hoebelheinrich and Whyte](https://www.sciencedirect.com/science/article/pii/S2666389921001720) (2021) have a table of catalogs to search for materials [17]. 110 | 111 | 112 | ### **Local library or IT services** 113 | 114 | The long term vision is that Higher Education Institutions (HEIs) or Research Performing Organisations (RPOs) [will employ data professionals to advise and support researchers](http://insights.uksg.org/articles/10.1629/uksg.484/) [18]. These individuals have a variety of possible job titles such as Data Librarian, Data Steward, Data Curator and so on. These individuals would advise on aspects on how to make your data adhere to the CARE and FAIR principles, providing appropriate metadata and so on. Some HEIs/RPOs have already made Open Science (or Open Research) policy statements and may not yet have an infrastructure to help but will be interested in supporting you. In some countries there has been progress in this area but it is very early days. Nonetheless, it is worth contacting your University library as they may be able to advise you even on relatively small questions or requests. 115 | 116 | 117 | ## Summary 118 | Making data open is not trivial. It is not simply a matter of placing a data set onto a cloud drive. Nonetheless, if it is done correctly then the open data is available for reuse. Reuse can be a completely different research team or it could be the same research team that need to carry after a member of the team responsible for the data has moved on. This means one has to think of the data as part of life-cycle and that it is important to make plans (a Data Management Plan) prior to creating the data to ensure that it is stored appropriately. Part of making your data FAIR is provide metadata that describes the data that you are depositing. Finally, do not feel that you have to do all this from scratch. There are a variety of different avenues that you can approach, either on an online basis or sometimes on your own campus. 119 | 120 | ## Assessment 121 | Think about the data sets that were described in lesson 1 as examples of good data. 122 | 123 | * Can you identify what were the above steps with that data? 124 | 125 | Think now about a data set in your own discipline. 126 | 127 | * What would be the steps that you would need to take with that data to match up with the data life cycle? 128 | 129 | ## References 130 | 1. Higgins, S. ,”The DCC Curation Lifecycle model”, Intl. J. Digital Curation, **3** (1), 2008, DOI [10.2218/ijdc.v3i1.48](https://doi.org/10.2218/ijdc.v3i1.48) 131 | 2. [https://old.dataone.org/data-life-cycle](https://fairsharing.org/) 132 | 3. [https://the-turing-way.netlify.app/reproducible-research/rdm/rdm-dmp.html](https://the-turing-way.netlify.app/reproducible-research/rdm/rdm-dmp.html) 133 | 4. [https://www.ukri.org/councils/stfc/guidance-for-applicants/what-to-include-in-your-proposal/data-management-plan/](https://www.ukri.org/councils/stfc/guidance-for-applicants/what-to-include-in-your-proposal/data-management-plan/) 134 | 5. [https://dmptool.org/public_templates](https://dmptool.org/public_templates) 135 | 6. [https://the-turing-way.netlify.app/reproducible-research/rdm/rdm-metadata.html](https://the-turing-way.netlify.app/reproducible-research/rdm/rdm-metadata.html) 136 | 7. [https://cornell.app.box.com/v/ReadmeTemplate](https://fairsharing.org/) 137 | 8. [https://www.dcc.ac.uk/guidance/standards](https://www.dcc.ac.uk/guidance/standards) 138 | 9. [https://fairsharing.org/](https://fairsharing.org/) 139 | 10. [https://www.rd-alliance.org/](https://www.rd-alliance.org/) 140 | 11. [https://www.rd-alliance.org/groups](https://www.rd-alliance.org/groups) 141 | 12. [https://reproducibilitea.org/](https://reproducibilitea.org/) 142 | 13. [https://fairdataforum.org/](https://fairdataforum.org/) 143 | 14. [https://pidforum.org/](https://pidforum.org/) 144 | 15. [https://dmptool.org/quick_start_guide](https://dmptool.org/quick_start_guide) 145 | 16. [https://dmponline.dcc.ac.uk/](https://dmponline.dcc.ac.uk/) 146 | 17. Shanahan, H., Hoebelheinrich, N., & Whyte, A. (2021). Progress toward a comprehensive teaching approach to the FAIR data principles. _Patterns_, _2_(10), 100324. [https://doi.org/10.1016/j.patter.2021.100324 ](https://doi.org/10.1016/j.patter.2021.100324) 147 | 18. Plomp, E., Dintzner, N., Teperek, M. & Dunning, A., (2019). “Cultural obstacles to research data management and sharing at TU Delft”, _Insights_, **32**(1), [http://doi.org/10.1629/uksg.484](http://doi.org/10.1629/uksg.484) 148 | -------------------------------------------------------------------------------- /open-data/README.md: -------------------------------------------------------------------------------- 1 | # Open Data: Introduction 2 | 3 | By the end of this module you will be able to describe what Data and Open Data is; explain why Open Data is an enabler of Open Science and why making data open brings huge benefits; demonstrates that while the default for data should be open, this should not be done in all circumstances and data should be made open in a responsible fashion; be able to explain the CARE and FAIR principles, the former being principles associated with indigenous data governance and the latter a set of principles for sharing data effectively; describe how data should be managed so that making the data open can be done as effectively as possible. Finally you will be able to outline practices for Open Data including repositories, licenses and citing data. 4 | -------------------------------------------------------------------------------- /open-data/contributors.md: -------------------------------------------------------------------------------- 1 | #### Jannatul Ferdish 2 | https://github.com/Jannatul-Ferdush 3 | 4 | #### Siobhan Hall 5 | https://github.com/smhall97 6 | 7 | https://twitter.com/smhall97 8 | 9 | #### Pauline Karega 10 | https://orcid.org/0000-0001-7974-048X 11 | 12 | https://github.com/karegapauline 13 | 14 | https://twitter.com/KaregaP 15 | 16 | #### Steven Klusza 17 | https://github.com/smklusza 18 | 19 | #### Andrea Medina-Smith 20 | https://orcid.org/0000-0002-1217-701X 21 | 22 | https://github.com/andreamedinasmith 23 | 24 | #### Esther Plomp 25 | https://orcid.org/0000-0003-3625-1357 26 | 27 | https://github.com/EstherPlomp 28 | 29 | https://twitter.com/PhDToothFAIRy 30 | 31 | #### Yuhan (Douglas) Rao 32 | https://orcid.org/0000-0001-6850-3403 33 | 34 | [https://github.com/geo-yrao](https://github.com/geo-yrao) 35 | 36 | [https://twitter.com/douglas_rao](https://twitter.com/douglas_rao) 37 | 38 | #### Hugh Shanahan 39 | https://orcid.org/0000-0003-1374-6015 40 | 41 | http://www.shanahanlab.org/ 42 | 43 | -------------------------------------------------------------------------------- /open-data/definitions.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /open-data/docs/placeholder.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /open-data/img/OpenSciency_Twitter_banner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensciency/sprint-content/e94c99f712b75ffbeb6c6b7ea0b72b90a9c6df93/open-data/img/OpenSciency_Twitter_banner.png -------------------------------------------------------------------------------- /open-data/img/OpenSciency_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensciency/sprint-content/e94c99f712b75ffbeb6c6b7ea0b72b90a9c6df93/open-data/img/OpenSciency_logo.png -------------------------------------------------------------------------------- /open-data/learning-objectives.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /open-data/references.bib: -------------------------------------------------------------------------------- 1 | %% This BibTeX bibliography file was created using BibDesk. 2 | %% https://bibdesk.sourceforge.io/ 3 | 4 | %% Created for Lorena Barba at 2022-06-13 10:28:22 +0200 5 | 6 | 7 | %% Saved with string encoding Unicode (UTF-8) 8 | 9 | 10 | 11 | @article{agarwal2021retrieval, 12 | author = {Agarwal, Pooja K. and Nunes, Ludmila D. and Blunt, Janell R.}, 13 | date-added = {2022-06-13 10:08:32 +0200}, 14 | date-modified = {2022-06-13 10:28:22 +0200}, 15 | doi = {10.1007/s10648-021-09595-9}, 16 | journal = {Educational Psychology Review}, 17 | number = {4}, 18 | pages = {1409--1453}, 19 | publisher = {Springer}, 20 | title = {Retrieval practice consistently benefits student learning: A systematic review of applied research in schools and classrooms}, 21 | volume = {33}, 22 | year = {2021}} 23 | 24 | @misc{polka_jessica_k_2020_3955154, 25 | author = {Polka, Jessica K. and Penfold, Naomi C.}, 26 | date-added = {2022-06-11 18:04:11 +0200}, 27 | date-modified = {2022-06-12 09:02:26 +0200}, 28 | doi = {10.5281/zenodo.3955154}, 29 | howpublished = {Zenodo data set}, 30 | month = jul, 31 | publisher = {Zenodo}, 32 | title = {{Biomedical preprints per month, by source and as a fraction of total literature}}, 33 | version = {4.0}, 34 | year = 2020, 35 | bdsk-url-1 = {https://doi.org/10.5281/zenodo.3955154}} 36 | -------------------------------------------------------------------------------- /open-data/references.md: -------------------------------------------------------------------------------- 1 | # References 2 | 3 | ```{bibliography} 4 | :style: alpha 5 | ``` 6 | 7 | ## Lesson 1 References: 8 | 1. https://www.openstreetmap.us/ 9 | 2. https://keydifferences.com/difference-between-ungrouped-data-and-grouped-data.html 10 | 3. https://www.datamation.com/big-data/structured-vs-unstructured-data/ 11 | 4. https://eospso.gsfc.nasa.gov 12 | 5. https://www.rd-alliance.org/groups/physical-samples-and-collections-research-data-ecosystem-ig 13 | 6. https://www.igsn.org/ 14 | 7. https://isamplesorg.github.io/home/ 15 | 8. https://zenodo.org/record/6818076#.YtgQhITMK3B 16 | 9. https://www.wcrp-climate.org/wgcm-cmip 17 | 10. https://www.climateurope.eu/a-short-introduction-to-climate-models-cmip-cmip6/ 18 | 11. https://www.sciencedirect.com/science/article/abs/pii/S0023969001910987?via%3Dihub 19 | 12. https://the-turing-way.netlify.app/reproducible-research/reproducible-research.html 20 | 13. https://www.paristechreview.com/2013/03/29/brief-history-open-data/ 21 | 14. https://opendatahandbook.org/ 22 | 15. https://okfn.org/ 23 | 16. https://doi.org/10.1093/bioinformatics/btn464 24 | 17. https://data.worldbank.org/indicator/SI.POV.DDAY?locations=1W&start=1981&end=2015&view=chart 25 | 26 | ## Lesson 2 References: 27 | 1. https://eventhorizontelescope.org/ 28 | 2. https://oceanrep.geomar.de/id/eprint/12875/1/CMIP.pdf 29 | 3. https://doi.org/10.1002/epi4.12359 30 | 4. https://www.openstreetmap.org/#map=5/54.910/-3.432 31 | 5. https://www.nature.com/articles/d41586-021-00305-7#:~:text=Other%20researchers%20say%20that%20restrictions,while%20protecting%20data%20providers 32 | 6. https://www.nature.com/articles/d41586-020-01246-3 33 | 7. https://www.oed.com/view/Entry/33513?redirectedFrom=citizen+scientist#eid316597459 34 | 8. https://en.unesco.org/science-sustainable-future/open-science/recommendation 35 | 9. https://ecsa.citizen-science.net/ 36 | 10. https://www.idrc.ca/en/book/contextualizing-openness-situating-open-science 37 | 11. https://zenodo.org/record/3946773#.YsFyqHbMJPb 38 | 12. https://www.nature.com/articles/d41586-021-01194-6 39 | 40 | ## Lesson 3 References: 41 | 1. https://researchsupport.admin.ox.ac.uk/governance/ethics/resources/consent#:~:text=Informed%20consent%20is%20one%20of,before%20they%20enter%20the%20research. 42 | 2. https://www.internationalgenome.org/sites/1000genomes.org/files/docs/Informed%20Consent%20Form%20Template.pdf 43 | 3. https://the-turing-way.netlify.app/reproducible-research/rdm/rdm-personal.html 44 | 4. https://doi.org/10.1038/s41559-018-0608-1 45 | 5. https://doi.org/10.1126/science.aan1362 46 | 47 | ## Lesson 4 References: 48 | 1. http://doi.org/10.5334/dsj-2020-043 49 | 2. https://www.gida-global.org/care 50 | 3. https://doi.org/10.1038/s41467-018-05188-3 51 | 4. https://www.nature.com/articles/s41576-019-0161-z 52 | 5. https://doi.org/10.1080/15265161.2021.1891347 53 | 6. https://doi.org/10.1038/d41586-021-00758-w 54 | 7. https://doi.org/10.1038/sdata.2016.18 55 | 8. https://ec.europa.eu/research/participants/data/ref/h2020/grants_manual/hi/oa_pilot/h2020-hi-oa-data-mgt_en.pdf 56 | 9. https://www.go-fair.org/fair-principles/ 57 | 10. https://doi.org/10.5281/zenodo.6532282 58 | 59 | ## Lesson 5 References: 60 | 1. Higgins, S. ,”The DCC Curation Lifecycle model”, Intl. J. Digital Curation, **3** (1), 2008, DOI [10.2218/ijdc.v3i1.48](https://doi.org/10.2218/ijdc.v3i1.48) 61 | 2. [https://old.dataone.org/data-life-cycle](https://fairsharing.org/) 62 | 3. [https://the-turing-way.netlify.app/reproducible-research/rdm/rdm-dmp.html](https://the-turing-way.netlify.app/reproducible-research/rdm/rdm-dmp.html) 63 | 4. [https://www.ukri.org/councils/stfc/guidance-for-applicants/what-to-include-in-your-proposal/data-management-plan/](https://www.ukri.org/councils/stfc/guidance-for-applicants/what-to-include-in-your-proposal/data-management-plan/) 64 | 5. [https://dmptool.org/public_templates](https://dmptool.org/public_templates) 65 | 6. [https://the-turing-way.netlify.app/reproducible-research/rdm/rdm-metadata.html](https://the-turing-way.netlify.app/reproducible-research/rdm/rdm-metadata.html) 66 | 7. [https://cornell.app.box.com/v/ReadmeTemplate](https://fairsharing.org/) 67 | 8. [https://www.dcc.ac.uk/guidance/standards](https://www.dcc.ac.uk/guidance/standards) 68 | 9. [https://fairsharing.org/](https://fairsharing.org/) 69 | 10. [https://www.rd-alliance.org/](https://www.rd-alliance.org/) 70 | 11. [https://www.rd-alliance.org/groups](https://www.rd-alliance.org/groups) 71 | 12. [https://reproducibilitea.org/](https://reproducibilitea.org/) 72 | 13. [https://fairdataforum.org/](https://fairdataforum.org/) 73 | 14. [https://pidforum.org/](https://pidforum.org/) 74 | 15. [https://dmptool.org/quick_start_guide](https://dmptool.org/quick_start_guide) 75 | 16. [https://dmponline.dcc.ac.uk/](https://dmponline.dcc.ac.uk/) 76 | 17. Shanahan, H., Hoebelheinrich, N., & Whyte, A. (2021). Progress toward a comprehensive teaching approach to the FAIR data principles. _Patterns_, _2_(10), 100324. [https://doi.org/10.1016/j.patter.2021.100324 ](https://doi.org/10.1016/j.patter.2021.100324) 77 | 18. Plomp, E., Dintzner, N., Teperek, M. & Dunning, A., (2019). “Cultural obstacles to research data management and sharing at TU Delft”, _Insights_, **32**(1), [http://doi.org/10.1629/uksg.484](http://doi.org/10.1629/uksg.484) 78 | 79 | ## Lesson 6 references: 80 | 1. https://doi.org/10.5281/zenodo.6532282 81 | 2. https://grants.nih.gov/grants/guide/notice-files/NOT-OD-21-013.html 82 | 3. https://doi.org/10.3389/fdata.2019.00043 83 | 4. https://opendatacommons.org/licenses/ 84 | 5. https://creativecommons.org/about/program-areas/open-data/ 85 | 6. https://resources.data.gov/open-licenses/ 86 | 7. https://creativecommons.org/publicdomain/zero/1.0/ 87 | 8. https://senselab.med.yale.edu/ModelDB/ 88 | 9. https://jie.yale.edu/datastatements 89 | 10. https://think.f1000research.com/wp-content/uploads/2021/02/F1000Research-Open-Data-How-to-Write-a-Data-Availability-Statement.pdf 90 | 11. https://doi.org/10.5281/zenodo.6505587 91 | -------------------------------------------------------------------------------- /open-results/CONTRIBUTORS.md: -------------------------------------------------------------------------------- 1 | __Batalha, Natasha__ 2 | NASA Ames Research Center 3 | [https://orcid.org/0000-0003-1240-6844](https://orcid.org/0000-0003-1240-6844) 4 | [https://github.com/natashabatalha](https://github.com/natashabatalha) 5 | [https://twitter.com/natashabatalha](https://twitter.com/natashabatalha) 6 | 7 | __Camacho Toro, Reina__ 8 | CERN/CNRS, LA-CoNGA physics 9 | [https://orcid.org/0000-0002-9192-8028](https://orcid.org/0000-0002-9192-8028) 10 | [https://github.com/camachoreina](https://github.com/camachoreina) 11 | [https://twitter.com/rcamachotoro](https://twitter.com/rcamachotoro) 12 | 13 | __Campitelli, Elio__ 14 | University of Buenos Aires 15 | [https://orcid.org/0000-0002-7742-9230](https://orcid.org/0000-0002-7742-9230) 16 | [https://github.com/eliocamp](https://github.com/eliocamp) 17 | [https://mastodon.social/@eliocamp](https://mastodon.social/@eliocamp) 18 | 19 | __Dunleavy, Daniel__ 20 | Florida State University 21 | [https://orcid.org/0000-0002-3597-7714](https://orcid.org/0000-0002-3597-7714) 22 | [https://github.com/dunldj](https://github.com/dunldj) 23 | [https://twitter.com/Dunleavy_Daniel](https://twitter.com/Dunleavy_Daniel) 24 | 25 | __Erdmann, Christopher__ 26 | Michael J. Fox Foundation 27 | [https://orcid.org/0000-0003-2554-180X](https://orcid.org/0000-0003-2554-180X) 28 | [https://github.com/libcce](https://github.com/libcce) 29 | [https://twitter.com/libcce](https://twitter.com/libcce) 30 | 31 | __Fouilloux, Anne__ 32 | University of Oslo, Norway 33 | [https://orcid.org/0000-0002-1784-2920](https://orcid.org/0000-0002-1784-2920) 34 | [https://github.com/annefou](https://github.com/annefou) 35 | 36 | __Lacerda, Michel__ 37 | Georgia Institute of Technology 38 | [https://orcid.org/0000-0002-8433-6964](https://orcid.org/0000-0002-8433-6964) 39 | [https://github.com/michelusp](https://github.com/michelusp) 40 | 41 | __Saderi, Daniela__ 42 | PREreview, Code for Science & Society 43 | [https://orcid.org/0000-0002-6109-0367]() 44 | [https://github.com/dasaderi](https://github.com/dasaderi) 45 | [https://twitter.com/Neurosarda](https://twitter.com/Neurosarda) 46 | 47 | __Sharan, Malvika__ 48 | The Alan Turing Institute and Open Life Sciences 49 | [https://orcid.org/0000-0001-6619-7369](https://orcid.org/0000-0001-6619-7369) 50 | [https://github.com/malvikasharan](https://github.com/malvikasharan) 51 | [https://twitter.com/malvikasharan](https://twitter.com/malvikasharan) 52 | 53 | -------------------------------------------------------------------------------- /open-results/README.md: -------------------------------------------------------------------------------- 1 | # Open Results Training Module 2 | 3 | Welcome to the Open Results Module! 4 | 5 | Recap: In Open Ethos, we learned about the ethics and principles underlying responsible open science practices. In Open Software, we explored and identified the right tools and methods that allow us to ensure reproducibility through version control, code testing, workflow, and a virtual research environment. In Open Data we developed a data management plan that can ensure the Findability, Accessibility, Interoperability and Reusability (FAIR) of our data throughout the research process, and not just at the end when the final report from the project is released. 6 | 7 | In this module, we will explore the different stages of the research process—including identifying the different types of Research Objects in a study and the various ways in which they can be shared and disseminated as open results. We will define a Research Object and provide an overview of how they relate to the research lifecycle (Lesson 1). Specifically, we will discuss the different stages of the research process, from ideation and planning all the way through and beyond dissemination. Then, we will consider how these Research Objects can be shared (Lessons 2-3). By the end of the module, we will have looked at the important concepts and practices for publishing and sharing research components before, during and after the project. Lastly, we address ethical contributorship, – making sure collaboration is fair and inclusive, and that credit is assigned transparently and equitably (Lesson 4). 8 | 9 | ## Objectives: 10 | 11 | 1. Identify research stages and elements of research objects that can be considered results 12 | 2. Identify the guiding practices and principles related to open results and the advantages of implementing them across stages of a research process 13 | 3. Identify paths for publicly communicating results 14 | 4. Create open results contributor guidelines and opportunities for open and equitable collaborations 15 | 5. Give credit to contributors in open results 16 | 6. Contribute and provide constructive feedback to others' results 17 | 7. Apply open result principles to new and ongoing research projects 18 | 19 | ## Overview and key messages 20 | 21 | This module addresses different questions discussed systematically across the following four lessons: 22 | 23 | Lesson 1: The Research Process and Its Results 24 | 25 | 1. What are the different stages of the research process? 26 | 2. What are "Research Objects"? 27 | 28 | Lesson 2: Results in the Context of Open Science 29 | 30 | 1. What are the advantages of making results open throughout the research process? 31 | 2. What resources are available to help make results open? 32 | 3. What are the guiding principles to turn a research result into an open result? 33 | 34 | Lesson 3: Applying Open Result Framework to your Research 35 | 36 | 1. How can you apply an open framework across different research objects? 37 | 2. How can you share your results, and select **tools** that support open science? 38 | 3. Using a checklist to achieve open results 39 | 40 | Lesson 4: Providing Equitable Opportunities and Credit for Contributors to Results 41 | 42 | 1. How can you define contributors to each digital research object and determine their suitable form of recognition? 43 | 2. How can you create contributor guidelines that ensure equity, access, inclusion, and diversity? 44 | 3. How can you ensure your open results are properly attributed and cited by others? 45 | -------------------------------------------------------------------------------- /open-results/figures/README.md: -------------------------------------------------------------------------------- 1 | All images used in the open results module have been stored here. 2 | -------------------------------------------------------------------------------- /open-results/figures/acknowledgement.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensciency/sprint-content/e94c99f712b75ffbeb6c6b7ea0b72b90a9c6df93/open-results/figures/acknowledgement.jpg -------------------------------------------------------------------------------- /open-results/figures/authorship-guide.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensciency/sprint-content/e94c99f712b75ffbeb6c6b7ea0b72b90a9c6df93/open-results/figures/authorship-guide.jpg -------------------------------------------------------------------------------- /open-results/figures/benefits.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensciency/sprint-content/e94c99f712b75ffbeb6c6b7ea0b72b90a9c6df93/open-results/figures/benefits.png -------------------------------------------------------------------------------- /open-results/figures/research-cycle.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensciency/sprint-content/e94c99f712b75ffbeb6c6b7ea0b72b90a9c6df93/open-results/figures/research-cycle.jpg -------------------------------------------------------------------------------- /open-results/figures/research-object.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensciency/sprint-content/e94c99f712b75ffbeb6c6b7ea0b72b90a9c6df93/open-results/figures/research-object.jpg -------------------------------------------------------------------------------- /open-results/files/placeholder.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /open-results/lesson-1-research-process-and-results.md: -------------------------------------------------------------------------------- 1 | # Lesson 1: The Research Process and Its Results 2 | 3 | ## Introduction 4 | 5 | With the overarching goal of maintaining research integrity and ethical practices from the start, we need to consider reproducibility methods, collaborative approaches and transparent reporting for the research teams to ensure that all results can be replicated, validated, and built upon by other independent researchers. As researchers, this means: 1) broadening our perspectives regarding what shareable research outputs are produced throughout the research process, 2) providing sufficient documentation that describes the research workflow and the decision-making process, and 3) publishing all research outputs that would eventually enable others to validate the research findings. 6 | 7 | Before we can begin to do that, we need to define what we mean by the research process, and what we consider research outputs at various stages of our research. Accordingly, this lesson will enable you to answer two questions: 8 | 9 | 1. What are the different stages of the research process? 10 | 2. What research objects can be considered a result? 11 | 12 | ## What is a research object? 13 | 14 | A **Research Object (RO)** is a method for the identification, aggregation and exchange of scholarly information on the Web [[Garcia-Silva et al. 2019](https://www.sciencedirect.com/science/article/abs/pii/S0167739X18314638)]. RO can be composed of both research data and digital research objects that are defined as follows by Organisation for Economic Co-operation and Development ([OECD Legal Instruments](https://legalinstruments.oecd.org/en/instruments/OECD-LEGAL-0347)). 15 | 16 | **Research data** consists of "_factual records (such as numerical scores, textual records, images, and sounds) resulting from research that is partially or fully funded by public funds, used as primary sources for scientific research, and that are commonly accepted in the scientific community as necessary to validate research findings._" 17 | 18 | A **research-relevant "digital" research object** consists of any "_metadata, algorithms, workflows, models, and software (including code) resulting from research that is partially or fully funded by public funds, which are used in a research and development context._" 19 | 20 | Research Objects are often given an identifier. In this way, there is a mechanism to trace back related resources about a scientific investigation. The most important aspects to consider about ROs: 21 | 22 | - They are not only associated with the end products as publications and final reports but also encompass research outputs created, revised and shared throughout the research lifecycle that help validate findings claimed in scholarly publications. More simply, ROs apply to any "single information unit" or research material that can be **shared and cited** with other scientists within and outside the project. 23 | - Motivation behind RO is the need to identify and share all components such as data, source code, tools, and method documentation, as well as communication materials such as presentations, videos, blogs and other tangible outcomes. 24 | - ROs facilitate reproducibility and reuse of the scientific methods and results through access to resources, context and metadata 25 | - ROs help us to understand the entire research lifecycle through research outcomes including publications shared progressively. They also allow us to track the versioning and development of the entire project. 26 | 27 | Ultimately, there are three guiding principles for ROs [[reference](https://the-turing-way.netlify.app/communication/research-objects.html)]: 28 | 29 | 1. Digital identity - Using unique identifiers, such as DOIs (link to data) for tangible outcomes such as publications or data, and ORCID ids for researchers (explained in detail in the next lesson). This enables others to cite and use individual components of your work. 30 | 2. Data aggregation - Using a method to aggregate all outcomes so that they are discoverable and hence allow anyone to investigate and reproduce the research. 31 | 3. Annotation - Use rich machine-readable metadata (discussed in open data) that help ensure the findability and accessibility of all scientific work. 32 | 33 | ![This image shows how research objects evolve and grow in content during the collaboration process and how new research objects can be derived from existing ones.](figures/research-object.jpg) 34 | 35 | Figure 1: _Research Objects allow working open by design and share during the research process and not only the research outputs at the end. The Turing Way project illustration by Scriberia. Used under a CC-BY 4.0 licence. DOI:_[_10.5281/zenodo.3332807_](https://doi.org/10.5281/zenodo.3332807)_._ 36 | 37 | Following from these we can now build a definition for an **Open Result.** 38 | 39 | An **Open Result** is all the research outcomes, including successful products, reports on potential risks, experiments that worked as well as failed, or any other information such as experimental protocols, standards as well as all the individuals who contributed to the research can be recorded in the RO and shared as open results. 40 | 41 | ## What are the different stages of the research process? 42 | 43 | In previous modules, we have learned the fundamentals and practical concepts for planning our research for open science. Specifically, in the Ethos of Open Science [addlink-ethos] module, we learned that open science should be considered throughout the research process, and not just at the time of publication. With this understanding, when considering shareable research outputs, it is important to think about the entire research life cycle – different tasks carried out during the life cycle of a research project. 44 | 45 | Many of us might be very familiar with the research life cycle but may not have considered what results could be shared openly throughout the process. 46 | 47 | ![The research process is represented as a perpetual cycle of generating research ideas, performing data planning and design, data collection, and data processing and analysis, publishing, preserving and hence, allowing re-use of data.](figures/research-cycle.jpg) 48 | 49 | Figure 2: _The Turing Way_ project illustration by Scriberia. Used under a CC-BY 4.0 licence. DOI:[10.5281/zenodo.3332807](https://doi.org/10.5281/zenodo.3332807). 50 | 51 | There are many ways to describe a research life cycle, but in this lesson, we define it in _nine_ distinct phases based on Figure 1 [_The Turing Way_](https://the-turing-way.netlify.app/reproducible-research/overview.html) that builds on various published examples. 52 | 53 | ### Conceptualization/Ideation 54 | 55 | In this stage, we focus on outlining and describing the research idea to different collaborators, students and/or postdocs. This could also encompass proposal writing, obtaining ethics approval documents, and/or securing funding. 56 | 57 | ### Planning 58 | 59 | In this stage, we are thinking about project management and workflows. Who is needed for the research project to be successful? During the planning phase, collaborations are often extended beyond close collaborators. Methods of collaboration are often defined, including team member roles and responsibilities. 60 | 61 | ### Project Design 62 | 63 | In this stage, we are concerned with describing the research protocols. For example, what the research hypothesis is, what protocols will be used to conduct the study, how will the data be collected, and processed, where will it be stored, and more. 64 | 65 | ### Data Collection 66 | 67 | In this stage data collection (from publicly available databases or resources) or data generation (through experiments or quantitative/qualitative studies) commence. See Open Data [addlink-data]. 68 | 69 | ### Data Wrangling and Processing 70 | 71 | At this stage, we use existing software or write custom code to process the data that has been collected. See Open Software [addlink-software]. 72 | 73 | ### Data Exploration and Statistical Analysis 74 | 75 | At this stage, we combine the workflows from Stages 4 and 5 and begin using our tools, code or software to analyse the data that has been obtained. 76 | 77 | ### Reporting 78 | 79 | Here we report on our findings, in other words, we share them with the research community. This can be done in the form of a research manuscript first published on a preprint server and in a peer-reviewed journal. However, reporting now far exceeds publication alone. Reporting also encompasses presentation materials (such as posters, and slide decks), lab websites or blogs, outreach materials for social media, podcasts or press releases, and many more. 80 | 81 | ### Preservation and Reuse 82 | 83 | In this stage, we consider archiving all outcomes for long-term preservation. This ensures that our research is accessible, and reusable, meaning that someone else can go through this whole process of reproducing or building upon our work. 84 | 85 | ### Scientific Engagement, Training, and Feedback (cross-cutting) 86 | 87 | In this stage, we conduct effective collaboration through active engagement, skill development and peer-review processes for both direct and indirect stakeholders of our research. 88 | 89 | **Important note:** Although we describe these stages in sequential order, these stages may not always be linear. For instance, scientific engagement and data management efforts will be applied at all stages of research. Data exploration, analysis and reporting will be an iterative process, and reporting will happen at different points of the research lifecycle. Even before the study begins, research questions, hypotheses, and planned approaches may be openly reported or preregistered [[Nosek et al. 2018](https://www.pnas.org/doi/10.1073/pnas.1708274114)]. Preregistration differentiates research outcomes which are the results of predictions, which occur before data collection, from predictions, which occur once the results of the data are obtained. 90 | 91 | To build high-quality research outcomes, it is essential that everyone (1) can work together efficiently at all stages of the project, (2) has a shared understanding of how results from their work will be shared with each other, and more broadly beyond the project, and (3) gets fairly recognized for all their contributions. 92 | 93 | ## What research objects are commonly associated with research stages? 94 | 95 | Now that we understand the different stages of the research lifecycle, Research Objects and open results, 96 | we can expand on how they operate in the context of the research lifecycle. The most important outcome to 97 | consider is that these ROs can be produced throughout the research lifecycle and should be published throughout, 98 | rather than at the end of the research process. 99 | 100 | ### Research stages and open result table 101 | 102 | | **Research Stages** | **Possible research objects as open results** | 103 | | --- | --- | 104 | | Conceptualization and planning | Proposal, ethics approval document, budget/funding plan, contributor and partnership plans (see lesson 4 [addlink]), preregistration reports, research materials, research protocol | 105 | | Project design | Versioning system, shared project repository, project planning document (project goals, roadmap, ways of working, roles and responsibilities, communication), hypothesis and pre-registration, collaboration plan, Equity, Diversity, Inclusion and Accessibility (EDIA) guidelines, data management plan, metadata standards, governance plan, data safety and security guide | 106 | | Data collection | File formats and data types, parameters/dimension, test data, metadata, data access plan/details, raw data | 107 | | Data wrangling and processing | Statistical methods, tools, workflow and analysis pipeline, processed data, code for data exploration, statistical results | 108 | | Data exploration, statistical analysis | Notebooks, figures, code, software package (R package, python library), code documentation, models, technical reports on scope and limitation of data, configuration and virtual research environment | 109 | | Engagement, training, and feedback from peers (communications and collaboration) | Contribution guideline (feedback documents, process for inviting feedback), review sprint plan and outcomes, departmental and conference talks, user testing information, tutorials, executable notebooks, videos | 110 | | Preservation and reuse (Research Data Management) | Data management plan with the versioning system, metadata standards, data governance and archiving plans, data sharing and archiving information, code packages, virtual research environments, hardware (if produced), physical samples | 111 | | Reporting, publication | Posters/figures, talks/slides, preprints, journal/book publications, layman summary, lab website/blogs, outreach materials for social media, podcast/press release, containers for testing (Docker, Binder), documentation and manuals, research compendia, configuration files (for reproducibility), software release information, hardware plan and associated documentation | 112 | 113 | ### Contributions that are not Research Objects but should be considered as results and recorded openly 114 | 115 | Research, like most technical professions, involves different kinds of contributions that do not always result in tangible outcomes and hence, can't always be defined by RO. For example, responsibilities associated with maintenance of RO, community management, data stewardship, library and archiving work, "Equity, Diversity, Inclusion and Accessibility" (EDIA) efforts, as well as tasks associated with funding, project management, scientific event organization, training activities and more. Outcomes from these roles cannot always be accurately captured besides documenting their processes, methods and impact, often recorded by some people involved in those roles. In Lesson 4, we discuss how to properly acknowledge the contributors to your results. 116 | 117 | ## Assessment #1: Identify the research objects in your project or a case study 118 | 119 | Invite project ideas from the learners and the broader open science community before delivering the training. 120 | 121 | ## Self-assessment #2: Identify the research objects to be shared as open results of a project you are/were involved in 122 | 123 | Provide an empty version of the "research stages and open result table" table to be filled by the learners. 124 | 125 | ## Conclusion 126 | 127 | The research consists of many different stages, each with several important tasks. In the early stages, we deal with Conceptualization and Planning. This can include a number of different things - depending on the project - but typically involves the development of a study protocol, research questions, and other study materials. Next, comes Project Design. In this stage, we often focus on developing a study timeline (or roadmap), assigning different roles to project team members, creating data and metadata management plans, and planning for data collection, management, and security. Next, is the active responsibility for Data Collection. Taking a step back from a project can help us establish an understanding of this multifaceted process and give us an appreciation of all the important elements (and people) involved in bringing a project or study from conceptualization through to completion and dissemination. In the next lesson, we will consider the advantages - for ourselves and the broader scientific community - of making our results open and transparent. In doing so, we will explore best practices for transforming our work from closed to open. 128 | 129 | ## References 130 | 131 | 1. The Turing Way Chapters: Guide for Reproducible Research and Research Object to capture the Research Life Cycle, [https://the-turing-way.netlify.app/welcome.html](https://the-turing-way.netlify.app/welcome.html), The Turing Way Community, Zenodo, 27 July 2022, doi:10.5281/zenodo.6909298. 132 | 2. Garcia-Silva, Andres, et al. "Enabling FAIR research in Earth Science through research objects." Future Generation Computer Systems, vol. 98, 1 Sept. 2019, pp. 550-64, doi:10.1016/j.future.2019.03.046. 133 | 3. "OECD Legal Instruments." 25 Aug. 2022, legalinstruments.oecd.org/en/instruments/OECD-LEGAL-0347. 134 | 4. Nosek, Brian A., et al. "The preregistration revolution." Proceedings of the National Academy of Sciences, vol. 115, no. 11, 13 Mar. 2018, pp. 2600-2606, doi:10.1073/pnas.1708274114. 135 | -------------------------------------------------------------------------------- /open-results/lesson1-brief-heading.md: -------------------------------------------------------------------------------- 1 | - General points on structure 2 | - Break up each lesson into a separate markdown document 3 | - Learning objective(s) listed at the top of each lesson file 4 | - References at the end of each lesson file 5 | - Cross-referencing between lessons on different modules 6 | - : - 7 | - Single “files” directory for additional figures etc. 8 | - Summary rather than conclusions 9 | - Label each section of lesson 2.1, 2.2, etc. 10 | - Contributor list, like FAIR cookbook, but then without roles and alphabetical 11 | -------------------------------------------------------------------------------- /open-software/CONTRIBUTORS.md: -------------------------------------------------------------------------------- 1 | __Bayer, Johanna__ 2 | University of Melbourne 3 | [https://orcid.org/0000-0003-4891-6256](https://orcid.org/0000-0003-4891-6256) 4 | [https://github.com/likeajumprope](https://github.com/likeajumprope) 5 | [https://twitter.com/likeajumprope](https://twitter.com/likeajumprope) 6 | 7 | __Brown, Sierra__ 8 | Million Concepts, LLC 9 | [https://orcid.org/0000-0001-6065-5461](https://orcid.org/0000-0001-6065-5461) 10 | [https://github.com/Sierra-MC](https://github.com/Sierra-MC) 11 | 12 | __Chegini, Taher__ 13 | University of Houston 14 | [https://orcid.org/0000-0002-5430-6000](https://orcid.org/0000-0002-5430-6000) 15 | [https://github.com/cheginit](https://github.com/cheginit) 16 | [https://twitter.com/_taher_](https://twitter.com/_taher_) 17 | 18 | __Keat, Yeo__ 19 | University of Putra Malaysia 20 | [https://orcid.org/0000-0001-6935-3101](https://orcid.org/0000-0001-6935-3101) 21 | [https://github.com/ee2110](https://github.com/ee2110) 22 | [https://twitter.com/EeYeoKeat](https://twitter.com/EeYeoKeat) 23 | 24 | __Onabajo, Babatunde__ 25 | ChurchMapped Limited 26 | [https://orcid.org/0000-0001-6118-9255](https://orcid.org/0000-0001-6118-9255) 27 | [https://github.com/BabatundeOnabajo](https://github.com/BabatundeOnabajo) 28 | [https://twitter.com/babatundeonabaj](https://twitter.com/babatundeonabaj) 29 | 30 | Powell, James 31 | Don’t Use This Code 32 | ORCID 33 | [https://github.com/CRiddler](https://github.com/CRiddler) 34 | 35 | [https://twitter.com/dontusethiscode](https://twitter.com/dontusethiscode) 36 | 37 | Riddell, Cameron 38 | Don’t Use This Code 39 | ORCID 40 | [https://github.com/dutc](https://github.com/dutc) 41 | 42 | [https://mobile.twitter.com/riddlemecam](https://mobile.twitter.com/riddlemecam) 43 | 44 | __Vaz, Ana__ 45 | University of Miami 46 | [https://orcid.org/0000-0003-0336-5227](https://orcid.org/0000-0003-0336-5227) 47 | [https://github.com/AnaVaz-NOAA](https://github.com/AnaVaz-NOAA) 48 | [https://github.com/anacarolvaz](https://github.com/anacarolvaz) 49 | [https://twitter.com/anacarolvaz](https://twitter.com/anacarolvaz) 50 | 51 | -------------------------------------------------------------------------------- /open-software/README.md: -------------------------------------------------------------------------------- 1 | # Open Software: Preamble 2 | 3 | Have you ever marveled at mesmerizing scientific visualizations and wondered how they were generated and whether you can recreate them or even maybe tweak them to produce new results? These types of images have been created by researchers using **research software**. These software products and sometimes their **source codes** are freely available to the public. Reproducing such results and using them to advance the knowledge produced by these types of research software products are among the pillars of open science. For example, Figure 1, is generated using [E3SM](https://e3sm.org/), an Earth System model, the source code of which is available on [GitHub](https://github.com/E3SM-Project/E3SM). 4 | 5 | |![E3SM](https://i.imgur.com/zIdfW3i.jpg)| 6 | |--| 7 | |Figure 1. Global E3SM simulation showing eddy activity, credits M. Petersen, P. Wolfram and T. Ringler| 8 | 9 | Now, let's say that you are intrigued by the idea of recreating Figure 1 and tweaking the E3SM's source code. We should start with obtaining the source code. Someone might ask since this project already has a fancy website why is the source code on GitHub? Let's assume that we successfully got the source code and want to start recreating the figure. Naturally, the next question is how do we install it since there is no executable file in the source code? Maybe you are used to installing software packages using [installation wizards](https://en.wikipedia.org/wiki/Wizard_(software)), or maybe you are comfortable working from [command line](https://en.wikipedia.org/wiki/Command-line_interface). Which one is possible or preferable for installing this software? The next step after installation is running the software and visualizing the results. So, the question is, for generating the desired outputs, how do we configure the software, what are the required input data, and how do we get them? Let's take it a step further and say that you have some brilliant new ideas and want to implement in the source code, analyze the outputs, publish the results, and make your code publicly available. Therefore, the questions become: How do we facilitate navigating this seemingly complicated source code? After making modifications, are we allowed to share and republish the modified source code, and if so, how do go about it? How do we ensure that the republished code is findable and other researchers can reuse and build upon it? 10 | 11 | The purpose of this module is to answer these questions, provide guidance for streamlining the workflow and ensuring that we give/get proper credits, and last but not least, draw your attention to and promote the importance of contributing and giving back to the Open Science community. 12 | -------------------------------------------------------------------------------- /open-software/files/placeholder.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /open-software/lesson0-preamble.md: -------------------------------------------------------------------------------- 1 | # Preamble 2 | 3 | Have you ever marveled at mesmerizing scientific visualizations and wondered how they were generated and whether you can recreate them or even maybe tweak them to produce new results? These types of images have been created by researchers using **research software**. These software products and sometimes their **source codes** are freely available to the public. Reproducing such results and using them to advance the knowledge produced by these types of research software products are among the pillars of open science. For example, Figure 1, is generated using [E3SM](https://e3sm.org/), an Earth System model, the source code of which is available on [GitHub](https://github.com/E3SM-Project/E3SM). 4 | 5 | |![E3SM](https://i.imgur.com/zIdfW3i.jpg)| 6 | |--| 7 | |Figure 1. Global E3SM simulation showing eddy activity, credits M. Petersen, P. Wolfram and T. Ringler| 8 | 9 | Now, let's say that you are intrigued by the idea of recreating Figure 1 and tweaking the E3SM's source code. We should start with obtaining the source code. Someone might ask since this project already has a fancy website why is the source code on GitHub? Let's assume that we successfully got the source code and want to start recreating the figure. Naturally, the next question is how do we install it since there is no executable file in the source code? Maybe you are used to installing software packages using [installation wizards](https://en.wikipedia.org/wiki/Wizard_(software)), or maybe you are comfortable working from [command line](https://en.wikipedia.org/wiki/Command-line_interface). Which one is possible or preferable for installing this software? The next step after installation is running the software and visualizing the results. So, the question is, for generating the desired outputs, how do we configure the software, what are the required input data, and how do we get them? Let's take it a step further and say that you have some brilliant new ideas and want to implement in the source code, analyze the outputs, publish the results, and make your code publicly available. Therefore, the questions become: How do we facilitate navigating this seemingly complicated source code? After making modifications, are we allowed to share and republish the modified source code, and if so, how do go about it? How do we ensure that the republished code is findable and other researchers can reuse and build upon it? 10 | 11 | The purpose of this module is to answer these questions, provide guidance for streamlining the workflow and ensuring that we give/get proper credits, and last but not least, draw your attention to and promote the importance of contributing and giving back to the Open Science community. 12 | -------------------------------------------------------------------------------- /open-software/lesson1-introduction.md: -------------------------------------------------------------------------------- 1 | # Lesson 1: Open software in the context of Open Science 2 | 3 | Learning objective: 4 | 5 | - Understanding core principals of Open Software 6 | - Learning Open Software terminologies 7 | 8 | ## Introduction 9 | 10 | The software that is created through/during research can be an important research product in and of itself. Open science principles like reproducibility, reusability, and replicability are especially important when it comes to research software. Within this module we will use the terms software and code interchangeably. We use these terms refer to any product written in a programming language, and can cover anything from a short script to a full software package with a full graphical interface. 11 | 12 | ## Open Science Principles: How they relate to software/code 13 | 14 | Reproducing findings of a published study is imperative for the scientific community. Therefore, results that are produced by a scientific software should be **reproducible**, *i.e.*, users should be able to obtain 15 | > "consistent results using the same input data; computational steps, methods, and code; and conditions of analysis" [^NAS2019]. 16 | 17 | If software/code used to make a figure or generate results is not shared along with the results/figures themselves, then it would take significant time, effort, and likely funding, for another researcher to reproduce those same results and determine they were correct. 18 | 19 | We all aim to make significant contributions to our field and can do this by "standing on the shoulders of giants" (Isaac Newton). By sharing the code, trust in the work can increase, and future work can build on it without duplicating effort. Therefore, it is important for a research software to be developed in such a way that it can be understood, modified, built upon, or incorporated into other software. This is called **reusability**. [^fair] 20 | 21 | Another important aspect of scientific studies is **replicability**, *i.e.*, studies answering the same scientific questions - but using independent data and/or methods - should find consistent results [^NAS2019] [^NAS2018]. 22 | 23 | > Many communities already have a strong replication tradition, where trust in any scientific result is built when multiple codes achieve results that demonstrate consistent behaviors. By requiring multiple codes to achieve the same scientific finding, replication reduces the impact of individual code errors or numerical issues. [^NAS2018] 24 | 25 | ### Open Software and Open as a Spectrum 26 | 27 | As we've said, sharing code can increase trust and lead to better science by allowing a more thorough review process. However, the degree to which a code is shared and when that code is shared can vary. Any sharing is a step on the spectrum of what we will refer to as **open software**, the most open of these equates to what is known in the computer science and software development industry as **open source software**. Open software can be a spectrum that can be anything from sharing an executable of a code with a description of how it was used to developing the software in a public repository from the start of the project. There are also a variety of license choices that can be made under the umbrella of open software which can allow the developer/researcher to retain various levels of ownership and rights to future commercialization. 28 | 29 | Now, let's take a step back and give formal definitions for some of the terms that we just used. 30 | 31 | Source Code 32 | : Source code is a human-readable (vs. machine-readable) text written in a specific programming language. The goal of the source code is to set exact rules and specifications for the computer that can be translated into the machine's language. [^ion] 33 | 34 | Open Source Software (OSS) 35 | : An Open Source Software is distributed with its source code without additional cost that makes it available for use, modification, and distribution with its original rights and permissions. [^syn] 36 | 37 | We should note that researchers are not always able to share their complete code, or software package (*e.g.*, due to national security concerns, data privacy, institutional policies). Again, open software doesn't necessarily mean open-source software and sharing to the level that is allowed by funding agencies, institutions, and security requirements is still a step in the right direction towards a world with more open science. 38 | 39 | From [Openscapes](https://openscapes.github.io/series/mindset.html#open-as-a-way-to-work): 40 | 41 | >Open is a spectrum – what you share, who you share it with, or how you share it. It’s not all-or-nothing. 42 | > What: slides, tweets, blogs, forums, wikis… then also code, data, protocols 43 | >Who: your self, research group, project team, institution…then also public 44 | >How: internal servers, Dropbox … then also Google Drive, GitHub, data repos 45 | 46 | We might also add here: 47 | 48 | **When**: at the start of your project, when it reaches its first fully usable version, at the end during publication, etc. 49 | 50 | Before jumping into the next lessons, let's have a brief overview of the core principals of open source software in general and, more importantly, in the context of research software. 51 | 52 | ## Core Principals of Open Source Software: What research software can move towards 53 | 54 | In the previous section, we provided a formal definition for open software and open source software. For better understanding, let's define what these concepts are juxtaposed against: **Closed Source Software** 55 | 56 | Closed Source Software (CSS) 57 | : Closed source software is a proprietary software that its source code is not distributed to the public. Therefore, only the original authors who created the code exclusively have rights to legally copy, modify, update, and edit the source code. Closed software imposes restrictions on what the end user can do with the application, preventing users from modifying, sharing, copying, or republishing the source code. [^ibm] 58 | 59 | The major differences between CSS and OSS products are two-fold: End-users cannot modify CSS products and although, OSS products might have some restrictions on redistribution, CSS products usually are more restrictive on their terms of usage and redistribution. We can think of OSS as a form of thinking based on intellectual freedom that follows three core principles: transparency, participation, and collaboration. [^ibm] 60 | 61 | Transparency 62 | : Operating in such a way that it is easy for others to see what actions are performed and implies openness, communication, and accountability. [^wiki] 63 | 64 | Participation 65 | : Actively giving back and contributing to OSS through either committing time and lending skills, or monetary sponsorship. [^os] 66 | 67 | Collaboration 68 | : Collective engagement toward making improvements and advancements through knowledge sharing and creating an inclusive environment. [^os] 69 | 70 | The exchange of ideas and software developed by communities has driven creative, scientific, and technological advancement in nearly every aspect of our lives. Developers share insights, ideas, and code to create innovative software solutions both collectively and individually. Open source software operates with the underlying principles of peer production and mass collaboration, creating more sustainable software development for end users. [^ibm] 71 | 72 | Not only users can make any kind of changes to the source code, but they can repurpose it into other new software and distribute their own software. However, there are some nuances on redistribution that we will cover in [Lesson 3](https://hackmd.io/@TOPS-OC3-code/rk2U4xz5q/%2FtDBYARbRTZObZUQuKbFJ6Q). 73 | 74 | Open source software is also sometimes conflated with the free software movement. Usually, "free software" is meant to emphasize freedom in the rights of end-users, but can sometimes be confused as meaning "free of cost". In actuality, neither free software nor open source software denote anything about cost—both kinds of software can be legally sold or given away. Free software and open source software share common values, and the terms are sometimes combined in the popular phrase "free and open source software" (FOSS). [^red] 75 | 76 | To support adapting OSS principals (transparency, participation, and collaboration), several new concepts have been introduced by the open source community. These are especially useful in the move to open science and has produced tools and methodologies that can be used to make research software more open: 77 | 78 | - To facilitate sharing and community engagement a central file location storage is needed for source codes which is called a **Code Repository**. Some examples of such repositories are [GitHub](https://github.com/), [GitLab](https://about.gitlab.com/), and [Bitbucket](https://bitbucket.org/product/). Although, source code sharing and community engagement are their most basic capabilities, they go much beyond that and provide a wide range of tools for code *testing* and *version control*. Code testing in general refers to the process of evaluating and verifying that a software product does what it is supposed to do. The benefits of testing include preventing bugs, reducing development costs, and improving performance [^ibm_test]. There are various types of tests with different objectives that will be covered in more details in [Lesson 5](link). Version control is the practice of tracking and managing changes to source code over time. It keeps track of every modification to the code in a special kind of database. If a mistake is made, developers can turn back the clock and compare earlier versions of the code to help fix the mistake while minimizing disruption to all team members [Lesson 5](link). [^atlas] 79 | 80 | - In addition to sharing the source code, software executables require a storage location to facilitate *software packaging* (for developers) and installation process (for end-users). These types of storage locations are called **Software Repositories**. These repositories are usually programming language dependent, for example, [PyPi](https://pypi.org/) and [Conda](https://docs.conda.io) for Python-based software, [CRAN](https://cran.r-project.org/) for R-based software, and [Julia Packages](https://juliapackages.com/) for Julia-based software. However, software packaging cannot always be done using automated services such as PyPi due to complexities of the source code structure itself (*e.g.*, intricacies of the software objectives, use of several programming languages, etc.) and/or its dependencies (other software packages that it depends on). In these situations, *containerization* is a viable option. [Docker](https://www.docker.com/) and [Apptainer](https://apptainer.org/) are example services for containerization. 81 | 82 | ## Summary 83 | 84 | Here we introduced the concept of open software, how it relates to the broader open science principles, and how sharing and openness can be a spectrum. At the most open end of this spectrum is what the computer science/software development community refers to as open source software. The core principles of open source software are introduced as a paradigm towards which research software can move towards. The tools and methodologies developed by the open source community are particularly helpful in opening research software. Next, we'll dive into the benefits and hurdles associated with having open software. 85 | 86 | 87 | 88 | [^ibm_test]: [IBM software testing](https://www.ibm.com/topics/software-testing) 89 | [^atlas]: [Atlassian](https://www.atlassian.com/git/tutorials/what-is-version-control) 90 | [^red]: [RedHat](https://www.redhat.com/en/topics/open-source/what-is-open-source-software) 91 | [^os]: [OpenSource](https://opensource.com/principles) 92 | [^wiki]: [Wikiperdia](https://en.wikipedia.org/wiki/Transparency_(behavior)) 93 | [^ibm]: [IBM](https://www.ibm.com/topics/open-source) 94 | [^ion]: [Ionos](https://www.ionos.com/digitalguide/websites/web-development/source-code-explained-definition-examples/) 95 | [^syn]: [Synopsys](https://www.synopsys.com/glossary/what-is-open-source-software.html) 96 | [^fair]: Chue Hong, Neil P., Katz, Daniel S., Barker, Michelle, Lamprecht, Anna-Lena, Martinez, Carlos, Psomopoulos, Fotis E., Harrow, Jen, Castro, Leyla Jael, Gruenpeter, Morane, Martinez, Paula Andrea, Honeyman, Tom, Struck, Alessandra, Lee, Allen, Loewe, Axel, van Werkhoven, Ben, Jones, Catherine, Garijo, Daniel, Plomp, Esther, Genova, Francoise, … RDA FAIR4RS WG. (2022). FAIR Principles for Research Software (FAIR4RS Principles) (1.0). 97 | [^NAS2019]: National Academies of Sciences, Engineering, and Medicine; Policy and Global Affairs; Committee on Science, Engineering, Medicine, and Public Policy; Board on Research Data and Information; Division on Engineering and Physical Sciences; Committee on Applied and Theoretical Statistics; Board on Mathematical Sciences and Analytics; Division on Earth and Life Studies; Nuclear and Radiation Studies Board; Division of Behavioral and Social Sciences and Education; Committee on National Statistics; Board on Behavioral, Cognitive, and Sensory Sciences; Committee on Reproducibility and Replicability in Science. Washington (DC): National Academies Press (US); 2019 May 7. 98 | [^NAS2018]: National Academies of Sciences, Engineering, and Medicine 2018. Open Source 99 | Software Policy Options for NASA Earth and Space Sciences. Washington, DC: The 100 | National Academies Press. . 101 | -------------------------------------------------------------------------------- /open-software/lesson2-pros-cons.md: -------------------------------------------------------------------------------- 1 | # Lesson 2: The Pros and Cons of Open Software 2 | 3 | Learning objective: 4 | 5 | - Benefits of Open Software for developers and users 6 | - Understanding the responsibilities of developers and user for a thriving Open Software culture 7 | 8 | ## Introduction 9 | 10 | This lesson addresses particular benefits of open-software, presenting how you as a researcher can benefit from it, and also how can it improve your research, moving yourself and your teams towards Open Science. We will also address some common challenges - and misconceptions - of adopting open software, and how to overcome them. 11 | 12 | ## Benefits of open software 13 | 14 | Open software offers a multitude of advantages to both developers and users. There are several benefits of open software are highlighted in this section. 15 | 16 | ### As a developer/provider 17 | 18 | - **High Visibility**: Publishing open software enables the repository to be more reachable and attainable. It can broaden the audience from a diverse group and draw more attention to the software repository. 19 | 20 | - **Long-term Sustainability**: Subsequently, open software allows more people to access the repository and can cultivate more users to be involved in its development. It results in the long-term sustainability of the software. [^helda] Since it is unlikely to have perfect software, having a larger user base is likely to have more collaboration or feature requests that can directly contribute to some improvements in the software. "Given enough eyeballs, all bugs are shallow." [^linus] Testing out software with a large base of users can easily detect the issues in the software, and they can submit bug reports or submit proposed fixes directly. 21 | 22 | - **Quality Improvement**: Besides bug fixes, the contributions can also be in feature enhancement, such as submitting additional features to the software repository or proposing modified codes that increase the effectiveness of the software. As a result, open software that comes with community support will tend to have continuous improvement, unlocking the potential to create new inventions, and produce better quality software versions. By ensuring the quality of the open software, it can gain users' trust to rely on it rather than redeveloping a software, therefore, minimizes the duplication of efforts, both within an organization and across organizations, by allowing for individual components to be shared. 23 | 24 | - **Future Employability**: As a developer or maintainer of open source software, your skills and experience are an important asset to improve your chances of getting a job. [^springer] Experience in developing open software is a positive portrayal of the abilities as it helps in demonstrating technical abilities. In addition, it also demonstrates the personality and work ethic in software development. If someone has experience working on complex software development and maintenance, it can make the profile outstanding, especially to companies that will take into account the contributions of the candidate to open software. The hiring manager may also view the product or shared code. Hence, open source provides visibility into both how a candidate solves problems, and how they collaborate in a team. 25 | 26 | 27 | 28 | ### As a user [^Salem] 29 | 30 | - **Accessibility**: Shared code certainly increases the democratization of science, it promotes more diverse and inclusive community to use the open software without a cost-prohibitive barrier. 31 | - **Flexibility**: Open software provides users a certain freedom to utilize the software for any purposes as they wish. It also allows users to make changes freely on the software and customize it according to their needs or even redistribute the software based on the license that has been applied. 32 | - **Knowledge Sharing**: Open software is also a great learning opportunity for the community [^synopsys], it can help to achieve knowledge sharing through the community, which in turn, increases motivation for a continued practice. 33 | 34 | ## Are there any disadvantages of open software - and if so, how to mitigate them? 35 | 36 | Making a software open source and valuable to the community requires additional efforts and considerations. In this section, we will discuss responsibilities that come with this decision and provide you with guidance for maximizing the impacts of your efforts. 37 | 38 | ### As a user 39 | 40 | #### Require a skill set 41 | 42 | Open software comes in many forms and shapes. There are open-source codes that come as packages available in a repository for a programming language or environment (*e.g.* PyPi for Python, CRAN for R, Conda for a variety of languages). Others are code that require installation from scratch. Even for skilled programmers, this setup can incur in costs (time and financial). 43 | 44 | So, if you are familiar with a programming language that offer repositories which are easy to download from within your environment (*e.g.*, R), you can start from there, and build up your confidence and skills. 45 | 46 | To compile and generate an executable code from a repository from scratch, you will need to be able to check for the necessary computation environment, check and install dependencies, and compile the code. Programming language might be a barrier, as well as operating within a command line environment. The good news is that there are many resources to help you go through these stages. Widely used open software are usually well documented, with step-by-step instructions, and some even have a community which can offer support for installation and running their code. Sometimes, developers share alongside their open-source code an executable version for your operating system. *E.g.*, the repository of [Stock Synthesis](https://github.com/nmfs-stock-synthesis/stock-synthesis/releases)[^Methot], a software used for stock assessment of fisheries populations, offers both the source code and compiled versions for different operational systems. So these are good choices for a beginner. 47 | 48 | Bear repeating that while learning these skills incur a cost, by doing so you might not only gain access to a useful research tool, but might also gain experience and skills that are useful for your career. 49 | 50 | #### Depreciation 51 | 52 | Technology changes fast, and software - open and closed - becomes depreciated. If you rely on a certain open-source tool for your work, you run the risk of it becoming depreciated. It can happen to projects that are not maintained, or no longer maintained, for a number of reasons. 53 | 54 | If this happens to you code you use, you can offer the developer to be a contributor to their open-software and update the code yourself. This will require programming skills, but it is a viable route. You can also team up with other users for a group effort. 55 | 56 | If you are choosing a tool and are not interested to fix depreciation issues in the future, aim for widely-used community open software, which are maintained by numerous people and thus, less likely to be depreciated. 57 | 58 | #### Security concerns 59 | 60 | Open-software can be perceived as to present more vulnerabilities than proprietary software - when all software can present vulnerabilities. You should check if your institution has an open software security policy in place - if so, follow their guidelines to assure compliance and up-to-date security protocols [^Linux]. To minimize security risks, we also encourage you to download code/software from an authoritative source - such as the original project repository - rather than a third party site. 61 | 62 | However, an important benefit of open source is that you can see exactly what the code is doing and know what are the dependencies, what is useful if any of them becomes vulnerable. You don't have the same level of transparency with a closed-source code. Open source codes also might have (some or many) eyes on them, which can result in better oversight. Widely-used open software will have a community of researchers and developers working on its code, looking closely at inputs, outputs and computer performance. But always, check with your institution about their requirements, guidelines and policies regarding open-source software. 63 | 64 | ### As a developer/provider 65 | 66 | #### Open Software can require extra work 67 | 68 | Some extra work might be required to share code that is already written to improve readability (*e.g.*, comments, variable names, indentation) and documentation (*e.g.*, README and code of conduct files) of your work, so others can easily understand it and use it. However - and we cannot stress this enough - open software is a journey, not a destination. How much to change and add is totally up to you. The important part is to publicly share your code. 69 | 70 | By writing code that is easily readable by humans, you can make it more usable even to yourself! It will save you time when you want to re-use it years later. Moreover, the more upfront effort you put into developing an accessible code, the more others will be able to use it - which might lead to more collaborations, better feedback, and career opportunities. 71 | 72 | There is also a time commitment for basic steps of creating documentation, choosing a license, getting a DOI. Our module gives you an understanding of these terms, providing you a checklist with clear steps to sharing your code. We also point you to resources to make this process smooth and save you time in decision-making. 73 | 74 | After sharing your code in a repository, you will have a reliable backup that won't depend on your own hard drive - and you have many free options to choose from! Added benefits are that by creating a license, you are allowing others to use your work on the terms you will choose. By having a [DOI](link), your code is a findable (by online search engines) and [citable](http://citeas.org) reference, and you thus, you will get credit for your work! You can also learn more about DOIs in the lesson about [Licenses](link). 75 | 76 | #### Becoming a [maintainer](vocab) 77 | 78 | Maintaining an open software (particularly open-source) long-term can bring its special sets of challenges - from the time commitment, to the procurement of funding, to navigating requests from users. Maintaining your code after sharing it is a personal choice, and you can step out of this role at any time you chose (more about this in [Lesson 5](link)]). 79 | 80 | #### Sustainability 81 | 82 | Despite the importance of open-software for researchers, support and incentive for open-software development and maintenance are frequently inadequate [^cite1] [^cite2] [^cite3] [^cite4][^cite4.5] [^cite5]. As reported by the Australian Data Commons (2022): 83 | 84 | > Software is an often invisible part of research, produced quickly within a funding window, often struggling to be maintained beyond that. [^cite1] 85 | 86 | Contributions to open software within traditional academia don’t carry the same weight as publications - software is often seem as a by-product of research, and dedicated funding is unusual [^cite1] [^cite2] [^cite3] [^cite4] [^cite4.5] [^cite5]. As reflected by reports and analyses from several countries, a shift in paradigms of funding and career advancement are required, along with an increase in software literacy, so open-software can be more sustainable. 87 | 88 | While this is a larger, structural issue that cannot be easily overcome by an individual, we have strength in numbers. More researchers in the open-source community, will result in more visibility of these issues, both for our institutions and funding entities. As more researchers move towards an open, collaborative framework of science, it is expected that more changes will happen to the current paradigm, allowing a fruitful future for open-software. 89 | 90 | ## Summary 91 | 92 | In this module, you reviewed particular benefits of open software to improve: 1) visibility of your work, 2) Long-term Sustainability, 3) Quality of your software, and 4) your career prospectus. You also could explore how open-software furthers the open-science principles, increasing 1) accessibility, 2) freedom, and 3) democratization of science. 93 | 94 | Despite its multiple benefits, adopting and creating open-software also brings challenges. In this module, we addressed some common challenges, with some tips to overcome - perceived and real - barriers to open-software. 95 | 96 | Lastly, we want to emphasize that adopting open-software (as a user or as a developer) on your research is a journey. As with the practice of open-science, there is a spectrum, and you make your own choices of how, what and when you are able to share, given your personal skill set, institutional policies, time and funding limitations. The most important is to take the first steps, and continue this journey together with the open-source community. 97 | 98 | 99 | 100 | [^Salem]:[Open Source Software (OSS) Quality Assurance: A Survey Paper](https://www.sciencedirect.com/science/article/pii/S1877050915017172) 101 | 102 | [^helda]:[Forking: the Invisible Hand of Sustainability in Open Source Software](https://helda.helsinki.fi/handle/10138/157663) 103 | 104 | [^springer]:[Open source and accessibility: advantages and limitations](https://journalofinteractionscience.springeropen.com/articles/10.1186/2194-0827-1-2) 105 | 106 | [^synopsys]:[Synopsys](https://www.synopsys.com/glossary/what-is-open-source-software.html#:~:text=Open%20source%20fosters%20ingenuity%3B%20programmers,learning%20opportunities%20for%20new%20programmers.) 107 | 108 | [^linus]:[Linus’ Law](http://www.catb.org/~esr/writings/cathedral-bazaar/cathedral-bazaar/) 109 | 110 | [^Methot]:Methot Jr, R. D., & Wetzel, C. R. (2013). Stock synthesis: a biological and statistical framework for fish stock assessment and fishery management. Fisheries Research, 142, 86-99. 111 | 112 | [^Linux]:[Linux foudnation](https://www.linuxfoundation.org/) 113 | 114 | [^cite1]: ARDC Ltd. (2022). A National Agenda for Research Software. Viewed online at: 115 | 116 | [^cite2]: NAA, N. A. of A. (2021). Current state assessment | naa.gov.au. September, 1–127. 117 | 118 | [^cite3]: Akhmerov, A., Cruz, M., Drost, N., Hof, C. H. J., Knapen, T., Kuzak, M., Martinez-Ortiz, C., Turkyilmaz-van der Velden, Y., & van Werkhoven, B. (2020). Raising the profile of research software: Recommendations for funding agencies and research institutions in the Netherlands. Zenodo. 119 | 120 | [^cite4]: Katz, D.S., Druskat, S., Haines, R., Jay, C. and Struck, A., 2019. The State of Sustainable Research Software: Learning from the Workshop on Sustainable Software for Science: Practice and Experiences (WSSSPE5.1). Journal of Open Research Software, 7(1), p.11. DOI: 121 | 122 | [^cite4.5]: National Academy of Sciences (2018). In Open Source Software Policy Options for NASA Earth and Space Sciences. 123 | 124 | [^cite5]: Mangul S, Mosqueiro T, Abdill RJ, Duong D, Mitchell K, et al. (2019) Challenges and recommendations to improve the installability and archival stability of omics computational tools. PLOS Biology 17(6): e3000333. 125 | -------------------------------------------------------------------------------- /open-software/lesson3-licensing.md: -------------------------------------------------------------------------------- 1 | # Lesson 3: Licensing/Ownership & DOIs 2 | 3 | Learning objective: 4 | 5 | - Understanding ethical and legal aspect of giving credits and attributions 6 | - Learning about existing open source licenses and Digital Object Identifier 7 | 8 |
9 | Disclaimer: the contents of this lesson are for educational purposes only. They do not constitute legal advice and should not be used as such. 10 |
11 |
12 | 13 | ## Introduction 14 | 15 | After deepening your understanding of the reasons to use open-software in the context of open-science, we here address the first considerations when using an open software tool in your research. First and foremost, if you are going to be building your own code on prior work you need to choose a software that is **open**, *i.e.,* that you are allowed to use, modify and redistribute. As a developer, you also need to ensure that you are sharing a product that is open - and thus, usable - to others. This is presented on the section [Licenses](link). 16 | 17 | Then, we present how you get credit for your work, and how you give credit to others' work. This is the content of the section *Attribution and citation*. 18 | 19 | At the end of this lesson, you will be able to: *Choose and abide by appropriate usage and referencing standards of open-software*. 20 | 21 | ## Licenses 22 | 23 | A software license is a legal document that grants users particular rights to the use of a certain software. This license can take many forms, but in many cases they outline contractual obligations (if any exist) between the company/software developer behind the software and the end user, what the user can do with the software, who the user can distribute the software to (if any such distribution rights exist) and the length of time the user has the right to use the software. 24 | 25 | A user cannot (technically and ethically), use a software without a license! A user can reach out to the developer/owner to ask for permission, and go ahead *if* the owner/developer furnishes written permission. But, if you share your software without a license, no one can use it without your written permission! 26 | 27 | ### Types of licenses 28 | 29 | A license can fall under several categories. License types have general definitions of what can be done with the software. By picking a type of license, or by understanding what type the license of a software you're considering using is, you'll be able to navigate the license process more quickly than reading each license individually and interpreting the permissions. An overview of types of licenses is given in the table below[^licensetypes]: 30 | 31 | | Public domain license | Lesser general domain license | Permissive | Copyleft | Proprietary | 32 | | -------- | -------- | --- | --- | -------- | 33 | |Anyone can use or modify the software.|Can link to open source libraries and code can be licensed under any license type.|Has some requirements for distribution and modification. |Licensed code can be distributed or modified if all the code involved is licensed under the same license.| Software cannot be copied, modified or distributed | 34 | 35 | Summary of selected attributes of licenses types [^Morin]: 36 | ![Table](https://i.imgur.com/usGIw8P.jpg) 37 | 38 | Some of the common licenses used in open software are: 39 | 40 | 1. MIT license 41 | 2. Apache License 2.0 42 | 3. Mozilla Public License 2.0 43 | 4. BSD 3-Clause "New" license 44 | 5. GNU General Public License (GPL) 45 | 6. Common Development and Distribution License 46 | 47 | For more information on different types of licenses please refer to the ([Open Source Initiative OSI](https://opensource.org/licenses/category)). 48 | 49 | ### How to choose a license 50 | 51 | There are a number of steps that have to be made before choosing a particular license. Arguably one of the first decisions to be made is based upon whether you intend to use the code for commercial purposes or not, or at least foresee it as a possibility in the future. Some licenses are more favorable for commercial purposes than others, such as the *General Public License, version 2*. 52 | 53 | The next decision that has to be made is relating to the issue of distribution. When using other software as a dependency, you should always be wary of their licenses. Some licenses enforce certain types of licenses upon redistribution. The GNU GPL, for instance, is incompatible with proprietary licenses, because it requires the combined work to be licensed under the GPL, with no additional restrictions allowed. Having a part of the work under a proprietary license is such an additional restriction, so you cannot distribute such a combination (unless the copyright owner of the GPL code gives special permission). 54 | [^turing] 55 | 56 | For licensing open software, it is always good practice to consult with the [Open Source Initiative (OSI)](https://opensource.org/) website. They provide a list of approved licenses that guides you through this process. Remember that a first step is always to consult with your institution (if applicable). You should ensure that you are complying with any applicable local laws and any policies set by your employer and/or funding entity. 57 | 58 | ![schematic](https://i.imgur.com/IzeJix8.jpg) 59 | 60 | ### Additional Resources 61 | 62 | - [Choosing a License](https://choosealicense.com) 63 | - [Turing Way on licensing](https://the-turing-way.netlify.app/reproducible-research/licensing.html) 64 | 65 | ## Attribution and citation [^Katz][^Smith][^Chue] 66 | 67 | Both when choosing a license and publishing your software for future citation, a decision has to be made in relation to the issue of *attribution*, *i.e.*, crediting a person or group of people or other entity with a particular action in relation to the software. This can be thought of as the software/code equivalent to authorship on an academic paper. It is important to consider this to avoid accusations of plagiarism or copyright infringement. There is a short discussion in the final lesson in this module regarding ethical considerations on how contributions can be considered for authorship/attribution/ownership. 68 | 69 | When deciding to cite a software or code that was used in your research you can start with the question: is this research software? Research software includes source code files, algorithms, scripts, computational workflows, and executables that were created during the research process or for a research purpose. Software components (e.g., operating systems, libraries, dependencies, packages, scripts, etc.) that are used for research but were not created during or with a clear research intent should be not be cited (e.g. Microsoft Word, Linux, Python --the language itself; specific packages might be citable in this context). This differentiation may vary between disciplines. Some examples of research software that would be cited are [E3SM](https://e3sm.org/), [SciML](https://sciml.ai/), and [Stock Synthesis](https://github.com/nmfs-stock-synthesis/stock-synthesis). 70 | 71 | The majority of open source software licenses require some degree of attribution, and a small minority (such as the 0BSD) do not. The license will also dictate where the attribution must be displayed - some licenses will require the user to include attribution in a dedicated file such as the software license agreement. 72 | 73 | ### Digital Object Identifier (DOI) 74 | 75 | By having a persistent identifier, a software version can be cited. A digital object identifier (DOI) is a persistent identifier or handle used to uniquely identify various objects. It is provided and standardized by the International Organization for Standardization ([ISO](https://www.iso.org/home.html)). In contrast to dynamic web addresses such as URLs, DOIs are static, *i.e.*, do not change over the life of a document, and point to the location of the document on the internet. You can get a DOI for your own software/code by adding it to a preservation repository. 76 | 77 | Just as we publish scientific findings in writing in journals to ensure its preservation over time, its supplementary material, *e.g.*, source code and produced data, should also be stored in a permanent location. We call these preservation repositories. Some of these repositories are general-purpose such are [Zenodo](https://zenodo.org/) and [Figshare](https://figshare.com/), and some are more research field-oriented such as [Hydroshare](https://www.hydroshare.org/). 78 | 79 | It is important to keep in mind that a DOI refers to a static version of your code and so, you'll need to get a new DOI for each version you release and want cited. By using the same repository each time you need a DOI for a new version, you can be sure that when a user looks for your DOI they are directed towards the most recent version. 80 | 81 | ### Citing code without a DOI 82 | 83 | As a user, if you'd like to cite a software that does not have a DOI you can use [Software Heritage](https://www.softwareheritage.org/) to create a SWH-ID which is also a citable persistent identifier, but can be created for codes that are not your own. This should only be done *after* ensuring a DOI is not available otherwise there can be multiple identifiers being used for the same piece of software. 84 | 85 | ### Attribution for pieces/snippets of code 86 | 87 | While DOI and SWH-ID allow citations of full pieces of software/code, there is also the case where a small code snippet or section might be copied into another code. It is common practice to take a few lines of code to solve a piece of a problem from websites such as [StackExchange](https://stackexchange.com/) or [Code Ranch](https://coderanch.com/), but there should still be an attribution if no changes are being made. This can be done effectively with a comment that includes a link to the webpage from which the code was taken (most sites have an option to create a shortened shareable link that is more code friendly). 88 | 89 | ### Publishing open software in peer-reviewed journals 90 | 91 | It is also possible to publish open software or a research article detailing the inner workings of that software in peer review journals. A general example is the [Journal of Open Source Software](https://joss.theoj.org/); there are also more discipline specific journals such as [Astronomy and Computing](https://www.journals.elsevier.com/astronomy-and-computing) and [Environmental Modeling & Software](https://www.sciencedirect.com/journal/environmental-modelling-and-software). The peer review process for some of these journals may include a review of the code itself, some may be focused just on the describing journal article that accompanies it. These publications will also come with a permanent identifier as is customary with most journal articles. 92 | 93 | ## External Requirements 94 | 95 | There are various legal considerations to keep in mind with regard to software and code you write. For example, these may be considered intellectual property, and you may wonder who has ownership over it. Generally speaking much of this is dependent on your employment and funding situation at the time you did the work. Your institution may have claim to part or all of the work product, however it is highly variable, and your institutional offices should be contacted to understand this better. 96 | 97 | There may also be other institutional, governmental, or other legal policies that may be dependent on your region. Please make sure you understand your locality's laws and regulations regarding the sharing of research software and follow your institution and funding agency's requirements (if any) on licensing and intellectual property. 98 | 99 | ### Additional Resources 100 | 101 | *[Code publication](https://scicodes.net) 102 | *[Computational Infrastructure for Geodynamics - example of a preservation repository that provides peer review](https://geodynamics.org) 103 | *[When to cite software](https://f1000research.com/articles/9-1257/v2) 104 | *[CiteAs: a resource for finding the correct attribution of a research product](http://citeas.org) 105 | 106 | ## Summary 107 | 108 | Here, we reviewed that a software license is a legal binding document, made between the developer and the user of a software, which outlines how that software can be used and distributed. Open software will carry licenses that follow the [Open Software Initiative (OSI)](https://opensource.org/licenses) definitions: allowing the software to 109 | "to be freely used, modified, and shared." [(Open Software Initiative OSI)](https://opensource.org/licenses) 110 | 111 | We have presented the major categories of licenses that might fall under the *open-source* definition, and what considerations to take when choosing a specific software and/or license for your software, *i.e.* 1) what is the intended use of this software?, 2) how others can reuse it? And what are the policies of my institution and local laws regarding open-software use and dissemination? 112 | 113 | We have also learned about proper attribution - how to get credit for your work (DOI, archival of code, publishing options), and how to cite others work. 114 | 115 | 116 | 117 | [^Morin]:Morin, A., Urban, J., & Sliz, P. (2012). A quick guide to software licensing for the scientist-programmer. PLoS Computational Biology, 8(7). 118 | 119 | [^Smith]:Smith et al. (2016). Software Citation Principles. PeerJ Comput. Sci., DOI 10.7717/peerj-cs.86 120 | 121 | [^Chue]:Chue Hong, Neil P., Katz, Daniel S., Barker, Michelle, Lamprecht, Anna-Lena, Martinez, Carlos, Psomopoulos, Fotis E., Harrow, Jen, Castro, Leyla Jael, Gruenpeter, Morane, Martinez, Paula Andrea, Honeyman, Tom, Struck, Alessandra, Lee, Allen, Loewe, Axel, van Werkhoven, Ben, Jones, Catherine, Garijo, Daniel, Plomp, Esther, Genova, Francoise, … RDA FAIR4RS WG. (2022). FAIR Principles for Research Software (FAIR4RS Principles) (1.0). 122 | 123 | [^licensetypes]:[licensetypes](https://www.techtarget.com/searchcio/definition/software-license#:~:text=A%20software%20license%20is%20a,the%20software%20without%20violating%20copyrights.) 124 | 125 | [^turing]: [Turing Way Compatibility](https://the-turing-way.netlify.app/reproducible-research/licensing/licensing-compatibility.html) 126 | -------------------------------------------------------------------------------- /open-software/lesson4-code-management.md: -------------------------------------------------------------------------------- 1 | # Lesson 4: Code management/Quality 2 | 3 | Learning objective: 4 | 5 | - Understanding some best practices for publishing Open Software 6 | - Learning the basics of code management 7 | 8 | ## Introduction 9 | 10 | While we maintain that sharing software at all is a great initial first step regardless of it's state, the more the code is kept clean, maintained, and documented, the more others will be able to cite, use, and contribute to it. 11 | 12 | ## What does it mean for software/code to be of good quality? 13 | 14 | There are two perspectives that you can take when engaging with this lesson: a user of open software, or a developer/provider of open software. As a user, you will want to make sure that a code or software project you are considering using in your research/project is quality. As a developer/provider, you will want to make sure your project is of high enough quality that others will want to use and engage with it. When we say "quality" code, we are referring to precisely that, a software/code that a user can be confident in using. 15 | 16 | Here we outline some baseline expectations for open software. While there are definitely good open software projects out there that do not include all of these items (and, unfortunately, plenty of projects out there that contain many of these items but still don't function well), this guide will assist in ensuring the software/code that you develop/use is quality. 17 | 18 | ### Good documentation 19 | 20 | Good documentation for code is possibly the most important item on this list for creating a quality code. This will help a user know what the software does and how it can be used, but also can be a real time saver for a developer when going back to look at code they haven't looked at in a while. 21 | 22 | #### The README file 23 | 24 | The first stop for a user when they approach a new project should be the README file. Aptly named, this file should contain orientation information that will help a user understand the project's purpose as well as shows examples of how it can be used, and lists most other important information the creator deems necessary. Note that there is no one agreed upon convention for the location of these documentation pieces, so we encourage exploration of the software you're interested in. Some information we describe as in a README file may be moved into its own file in some conventions, e.g. having installation instructions in an INSTALL file, but the README is still usually the best place to start. Keeping that in mind, if you are developing a code/software for use by others, they will expect a descriptive and useful README, without one using your code may be a nonstarter for many. 25 | 26 | [Here is an example of a README file](https://github.com/MillionConcepts/lhorizon) from a NASA-funded project that shows many of the specifics we are going to discuss below including multiple installation options. As you read the suggested parts of documentation below feel free to reference this for an example. 27 | 28 | Let's dive into the specifics of information you should include/find in a README file. First, a description of what the software does: it's purpose, the problem it's solving. You don't need to write a whole academic paper here, a sentence or two is fine. If you do happen to have a research paper written on the topic no one would be upset if you link it here, though do be careful that any linked papers are either (a) not behind a paywall or (b) if it _is_ behind a paywall, that the important information a user would need to use and understand your software is reiterated separately within the code documentation. 29 | 30 | A compatibility description is also necessary. Sometimes this is wrapped into the installation instructions and that is acceptable. Here the operating systems (e.g. Linux, Windows, macOS -- and their versions) that the software/code works on with are listed. If the code runs in a browser which does it work with? There are many tools for testing the compatibility of code across operating systems and environments, we won't get into those here as they can be specific to the coding language you're working in. 31 | 32 | If installation instructions are not in their own file, they'll live inside the README. These should be written with very little prior knowledge expected of the user. Most people are used to downloading a software package, double-clicking on the executable, and having a setup wizard walk them through any required steps. Setups such as this are achieved through packaging. Packaging bundles all the necessary pieces for a software to run, usually including dependencies, and distributes it to the user as one "package". Packaging software can make installation a lot simpler for users and allow it to be installed consistently that aids in reproducibility. Most open software won't be packaged to the double-click-with-setup-wizard level and some won't be packaged at all. They will require a bit more up front work for the user, but an advanced knowledge of installation practices shouldn't be assumed. For example, an exact command that can be copied and pasted into the command line is a lot more helpful than something like "clone the repo" or "install using git pip". 33 | 34 | Usage examples are another important part of a README document. While how to run and use the software may be obvious to the developer, many times this is not the case for the user. Simple/small usage examples are great for the README file. If there are more complex examples that require input files or that are interactive for the user and the programming language you are using supports interactive environments, such as [Jupyter](https://jupyter.org/) (for R, Python, and Julia), [Pluto](https://github.com/fonsp/Pluto.jl) (for Julia), [Quarto](https://quarto.org/) (for R, Python, and Julia), and [RStudio](https://www.rstudio.com/) (for R), these can be used and included in a repository and pointed to in the README. If interactive environments are not an option for the language you are using and your usage examples are necessarily complex, consider writing a standalone script and including a pointer to this with instructions on how to use and run that example script in the README. 35 | 36 | If relevant, the README is also one of the places you may find descriptions of the outputs of a software/code. Both what kind of objects these may be in terms of their type (e.g. string, integer, etc.) and in their general description (e.g. a list of names, the amount of rain the model calculated, etc.). 37 | 38 | As the README is the first place a user will look, this is also where you can find other notes and caveats of using the software. This should include at least something on the state of the software: is it in active development (meaning it may have some bugs and may not always work as expected), consistently maintained (meaning the software is updated when necessary--like when a dependency is updated or a bug is reported), or here for posterity purposes only (meaning the author/developer/researcher will not be working to maintain or improve this code any further)? How can you contact the developer/researcher that created this software/code? How can issue/bugs be reported (if at all)? This would also be a good place to list any known bugs/issues, so you get repeat requests. 39 | 40 | The README is also a great place to acknowledge team members that worked on the code/project as well as agencies and grant numbers that funded the work. 41 | 42 | #### Dependencies 43 | 44 | The dependencies -- the other software on which the software/code relies -- should be listed somewhere in the documentation, but are not always in the same place depending on the coding language. For example, in Python software, it is common to include a file titled something like `environment.yml` which will list dependencies and which can be used to install them quickly and easily. Other conventions may include listing them in the README file, a README can also be used to point to an additional file that lists dependencies (such as the `environment.yml` or `requirements.txt`) 45 | 46 | #### License 47 | 48 | A license file should be included with your documentation. This is expanded upon more in another lesson in this module, but without one, the code/software is technically and ethically not allowed to be used at all by anyone other than the author/developer. 49 | 50 | #### The `CONTRIBUTING.md` file 51 | 52 | One of the great benefits of open software is that it enables contributions from the community. The `CONTRIBUTING.md` and CODE_OF_CONDUCT files in software can be referenced for information on how to do this. This is expanded upon more in a later lesson. 53 | 54 | #### Documentation Checklist 55 | 56 | - [ ] Description of the software and the problem it solves 57 | 58 | - [ ] Compatibility description 59 | 60 | - [ ] Dependencies 61 | 62 | - [ ] Installation instructions 63 | 64 | - [ ] Usage examples (perhaps including an interactive notebook) 65 | 66 | - [ ] Development status of the software (under development, actively maintained, etc.) 67 | 68 | - [ ] Contact information 69 | 70 | - [ ] How to report issues/bugs (and a list of any known issues/limitations) 71 | 72 | - [ ] Acknowledgments of team and funding 73 | 74 | - [ ] License 75 | 76 | - [ ] Contribution guidelines 77 | 78 | - [ ] Code of conduct 79 | 80 | Additionally, a GitHub template from NOAA for open software documentation can be found [here](https://github.com/NOAA-OWP/owp-open-source-project-template). 81 | 82 | ### Clean/readable code 83 | 84 | Code for software is very rarely written only for one individual. Code typically has to be read and evaluated by others. In private companies, this is usually because software is written by a group of programmers and so it is important that programmers are able to read and understand the code, both in order to improve it and to "debug" or fix it. Open software also operates similarly: there may be many programmers working and contributing to a particular project from different backgrounds and walks of life. With different programmers with different backgrounds collaborating together, it's important that code is transparent and can be easily understood by others. This is sometimes referred to as "clean code". 85 | 86 | Clean code is code that is easily understood by others. Clean code has a number of advantages. One advantage is that it is easier to spot if or whether something is wrong with the code (known as "debugging"). Another advantage is that code that is "clean" is more likely to be shared than code that is not. This is fundamental to open software, which aims to be reproduced as widely as possible. There are a number of principles that should be adhered to when using clean code. 87 | 88 | #### Code Comments 89 | 90 | Arguably one of the most important is that code should be commented. Comments are annotations that help other programmers reading to understand what is going on. In many languages, they are designated by the sign `//` or `#` or `/* */`. As a rule, more comments are better than less but this should be prefaced with the warning that comments should not explain the obvious. For example, in the language JavaScript, the following would be an inappropriate comment 91 | 92 | ```var a = 5; //I'm assigning the value of 5 to the variable a.``` 93 | 94 | It is inappropriate because the code is self-explanatory. 95 | 96 | #### Descriptive naming 97 | 98 | Another point to bear in mind when it comes to clean code is that variables, functions, and similar entities should be given descriptive names as opposed to vague names. These are names that, when another programmer reads them, instantly gives an idea of what the variable or function is. For example, the variable name `colourOfCat` is a good name because it describes what it intends to do, which is to encompass the color of a cat. As a rule, the more descriptive a name for a variable, function, etc., is the better. Names for variables, functions, etc. should avoid using words that are likely to be keywords - names with reserved meanings in many languages - such as "while", "for", "override" and so on. Needless to say, names for variables, functions, etc. should similarly avoid giving offense and clean code should consider the sensitivities of those from different backgrounds. 99 | 100 | It's frequently the case that code may point to external files; where possible, a programmer should ensure that the external file has a descriptive filename. In addition, clean code should also conform to programming conventions. For example, it's common in many programming languages to use camel case to describe variables, such as `colourForCat` rather than `COLOURFORCAT`, but one would do well to ascertain what a convention may be for a particular language. 101 | 102 | #### Whitespace and indentation 103 | 104 | Lastly, clean code should contain sufficient spaces between lines of code (also known as whitespace) and sufficient indentation so that they are easily discernible. Sometimes code that does not contain sufficient lines of code can go through a process known as _beautification_ or _prettifying_ that helps them become more readable. Ultimately, a key test for whether code can be considered "clean" is the following: if you left the code and came back to it 2 years from now, would you be able to easily understand it? 105 | 106 | ## Summary 107 | 108 | In this lesson we go over two main topics regarding markers of quality code: (1) good, descriptive documentation and (2) clean, readable code. As a user, documentation can be the difference between spending hours or days trying to understand a code and being able to use it right out of the box. As a developer/researcher, documentation improves the reproducibility and reusability of your code and lets others know what to expect both of your code and of you yourself as a maintainer. Next, we'll discuss maintaining quality code. 109 | 110 | ## References 111 | 112 | - Lee BD (2018) Ten simple rules for documenting scientific software. PLoS Comput Biol 14(12): e1006561. 113 | - Anzt H, Bach F, Druskat S et al. An environment for sustainable research software in Germany and beyond: current state, open challenges, and call for action [version 2; peer review: 2 approved] F1000Research 2021, 9:295 114 | - Martin, R. C. (2008). Clean code: A handbook of agile software craftsmanship. Prentice Hall. 115 | -------------------------------------------------------------------------------- /open-software/lesson5-vesion-control.md: -------------------------------------------------------------------------------- 1 | # Lesson 5: Maintain good code quality 2 | 3 | Learning objective: 4 | 5 | - Understanding basics of Version Control 6 | - Learning the basics of testing in code development 7 | - Understanding the responsibilities of Open Software developers 8 | 9 | ## Introduction 10 | 11 | We've talked about markers of quality software in the prior lesson: good documentation and clean, readable code. The reality is that for most software, this is a journey, and it is going to continue to change and develop over some period of time. Here, we discuss version control, testing, and responsibilities after sharing. These topics are centered around the evolution of your code and ensuring the work you've done to make quality open software is able to endure. 12 | 13 | ## Version control 14 | 15 | Open source codes can change overtime. This brings several challenges to researchers developing and using an ever-changing software. We covered the importance of reproducibility for open-software - and open-science as a whole. Now, how can we achieve reproducibility with a changing code source? That is done by keeping track of changes to our source code, using version control. 16 | 17 | Version control can be done with tools and systems designed to manage changes not only to source code, but also to documents, websites, and datasets. [Google Docs](docs.google.com), for instance, has its own complex version control. This allows you and your collaborators to have access not only to the most updated google document you all are working on, but to the complete history of changes. So, if something goes wrong in a document: a child includes a thousand smiley faces in the text, a cat walks on the keyboard and deletes an entire section - you can just revert to the earlier, error-free version. 18 | 19 | This is the same for coding. For instance, you - the developer - receive a notification from a user that your code has a bug. You know that this bug was not present in the last version, so you can easily work through your history to look what recent changes might have caused a specific error, narrowing down your debugging work to specific parts of the code. So, version control allows a group of developers/users to know exactly what version of the code they are using, what changes were made and when - facilitating reproducibility. Version control also fosters collaboration, making it easier for people to work together at the same time and to merge changes from different users. 20 | 21 | There are several version control systems (VCS) available. We won't get into detail here, but some of the most popular open-source systems include [git](link), [SVN](link), and [Mercurial](link). It is important to note that while some repositories have already a built-in version control, repositories and version control systems are different - *e.g.*, *git* is the *version control system*, while [Github](https://github.com) is a *hosting service* for **git** repositories. 22 | 23 | In [lesson 6](link), we revisit version control, giving some concrete examples of how you can use it to contribute for new or existing open-source code. 24 | 25 | ## Testing 26 | 27 | In [Lesson 1](link), we introduced the concept of code testing and its importance in software development. There are many types of testing that range from testing the smallest testable parts of a code to verifying if a code works as whole under different scenarios. Since code testing in general can be a complicated and technically involved topic, we will not go into the details of each types of testing and refer you to external sources for further reading. Instead, we focus on benefits and difficulties of testing in general, how to measure test coverage, and what to expect from a "tested" code as an end-user. 28 | 29 | We recall that reproducibility in research software plays a critical role. In the context of testing, we can think of reproducibility as a test objective of which is to reproduce a specific output, *i.e.*, results obtained from a specific version of the code that has been published in a journal. This test should include all the required inputs (configuration files, input data, etc.) so users can easily run and get the same published results. 30 | 31 | More broadly, the main objective of code testing is to evaluate if a code is doing what it is supposed to do. It is important to recognize that testing a code comprehensively can be very difficult since not only we should test the code for generating expected outputs but also for failing when it should. For example, when an unacceptable input is passed, *e.g.*, wrong type, out of range, edge cases, etc., or when if implemented the algorithm doesn't converge for the given set of inputs. Taking into account all these scenarios can be extremely difficult and in some cases impossible. Therefore, we should manage our expectations when taking the tests as a measure of code's quality both as a developer (*e.g.*, realizing that the end-users might apply the code to scenarios that we don't anticipate) and an end-user (*e.g.*, realizing that the difficulties associated with testing and, if possible, evaluate the accuracy of outputs independently). 32 | 33 | From a developer perspective, there are also secondary benefits for testing. Whenever you make a change to a part of your code, for example to improve its performance, having tests for that portion of the code, ensures that the modified code does not change the output. Another scenario could be related to dependencies. For example, research software often depends on other software, therefore, if those dependencies release new versions, the tests help us evaluate if those new versions make any changes to outputs of our code. 34 | 35 | On the other hand, as an end-user, using a code that includes tests, gives us more confidence in the state of the code. Users can check the status of tests (pass/fail) when the developers make changes, or the code has been tested for the use-case of our interest. 36 | 37 | Now that we have a better understanding of the testing, we can discuss measuring its effectiveness. One of the ways that we can measure the testing is through percentage coverage. There are two levels of coverage: *test coverage* and *code coverage*. *Test coverage* refers to the coverage of different scenarios that the code would be used in while *code coverage* is the percentage of lines of code that tests cover. As we discussed previously, enumerating all the different scenarios the code could be used in can be very difficult, thus, it can be difficult to quantify *test coverage* both from a developer and end-user perspective. However, *code coverage* is just a simple percentage value: how many lines of code do the tests activate vs. not. It is important to note that a high *code coverage* does not necessarily mean that a code has good *test coverage* since testing different usage scenarios can not directly be translated to lines of code. 38 | 39 | ### Additional Resource 40 | 41 | - [IBM on Testing](https://www.ibm.com/topics/software-testing) 42 | - [Software Testing](https://www.softwaretestinghelp.com/types-of-software-testing/) 43 | - Martin, R. C. (2008). Clean code: A handbook of agile software craftsmanship. Prentice Hall. 44 | 45 | ## Responsibilities after Sharing 46 | 47 | After sharing software, there are certain steps that need to be taken in regard to maintenance of that code/software. 48 | 49 | First, you should know it is not a requirement for you to be a permanent maintainer forever, but it is your responsibility to let users know if you do or don't intend to maintain the software/code. You can do this in your documentation where you discuss the development status of the project. This helps a user know if it will continue to be supported in the future, and make choices about if they should base ongoing work off your project. You don't want someone to spend a huge amount of time using your work as a dependency and then have their project become unusable in the future. 50 | 51 | The reality is that a developer/researcher may not have the time or continued funding to keep up with a project. In this case, perhaps consider handing ownership of the software to another researcher/developer, involved user, or entity invested in its continued use. You can either approach potential parties you think may be interested in this; or you can make your license permissive enough to allow others to create their own copies and continue your work (see more on choosing a license in this module). Depending on the license you choose, the use of your project, and if you have significant interest, you may be able to commercialize your software/code to provide funding for continued maintenance and feature requests. There is also the potential to apply for continued funding from agencies both governmental and private if your open software is widely used. If you're a user of a software that is no longer maintained, consider contacting the owner/developer and volunteering either as a maintainer, or to take over ownership of the project (you'll be more likely to get a positive response if you leave that choice up to the current owner). 52 | 53 | If you receive requests for features and fixes, and you have indicated you intend to maintain the code, these should be responded to. Either tell the users that (a) you intend to perform their requested action or (b) you think that's out of scope of your project. Additionally, you can invite the requester to (a) contribute to the project and add that feature/fix themselves (which you can then approve and add into your project) or (b) fork (make a copy of) the project and create the feature/fix, notifying that you will not merge changes into your (main/original) copy. 54 | 55 | ## Summary 56 | 57 | Here we discuss how version control and testing can both be used to increase the reproducibility and trust a user can place in open software. These are tools that can be used whether your software is shared or not. We go over what responsibilities a developer/researcher has after sharing their code: namely to inform your potential users if you will be maintaining the software and if so, respond to requests for feature additions and bug fixes. We discuss options for allowing your code to undergo continued development even if you don't have time/motivation/funding to continue iteration and encourage users of code that is no longer maintained to explore these options themselves by reaching out to the original developers. Furthermore, we discuss how users can become involved in existing projects in our next lesson. 58 | -------------------------------------------------------------------------------- /open-software/notes.md: -------------------------------------------------------------------------------- 1 | - General points on structure 2 | - Break up each lesson into a separate markdown document 3 | - Learning objective(s) listed at the top of each lesson file 4 | - References at the end of each lesson file 5 | - Cross-referencing between lessons on different modules 6 | - : - 7 | - Single “files” directory for additional figures etc. 8 | - Summary rather than conclusions 9 | - Label each section of lesson 2.1, 2.2, etc. 10 | - Contributor list, like FAIR cookbook, but then without roles and alphabetical 11 | -------------------------------------------------------------------------------- /open-tools-resources/CONTRIBUTORS.md: -------------------------------------------------------------------------------- 1 | Flavio Azevedo 2 | FORRT & University of Cambridge 3 | 0000-0001-9000-8513 4 | https://github.com/flavioazevedo 5 | Flavio_Azevedo_ 6 | 7 | Tyson Swetnam 8 | University of Arizona 9 | 0000-0002-6639-7181 10 | tyson-swetnam 11 | tswetnam 12 | 13 | Batool Almarzouq 14 | OSCSA, KAIMRC, UoL 15 | 0000-0002-3905-2751 16 | BatoolMM 17 | batool664 18 | 19 | Saranjeet Kaur 20 | RSE Asia Association 21 | 0000-0002-7038-1457 22 | SaranjeetKaur 23 | qwertyquesting 24 | 25 | Melissa Black 26 | MetaDocencia 27 | 0000-0002-5406-2982 melibleq 28 | melissablck 29 | 30 | Rebecca Ringuette 31 | NASA Goddard Space Flight Center 32 | 0000-0003-0875-2023 33 | rebeccaringuette 34 | 35 | Elli Papadopoulou 36 | Athena Research Center / OpenAIRE 37 | 0000-0002-0893-8509 elpapado 38 | elli_lib 39 | -------------------------------------------------------------------------------- /open-tools-resources/README.md: -------------------------------------------------------------------------------- 1 | # Objectives Outline 2 | 3 | ## 5A. Explain why open science tools encourage responsible open science (e.g., using the FAIR and CARE principles) 4 | 5 | 6 | ## 5B. Identify Open Science communities and initiatives - within and across disciplines - and join a community of practice (CoP) of interest to you 7 | 8 | 9 | ## 5C. Provide examples of how open science is practiced in a research team 10 | 11 | 12 | ## 5D. Identify types of Open Science tools along with their purpose 13 | 14 | 15 | ## 5E. Match appropriate open science tools to specific objectives within the research workflow 16 | 17 | 18 | ## 5F. Describe 3-5 open science tools and how to use them in projects (e.g., for communication, sharing of results, and collaboration). 19 | -------------------------------------------------------------------------------- /open-tools-resources/files/placeholder.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /open-tools-resources/lesson3-tools-for-reproducibility.md: -------------------------------------------------------------------------------- 1 | # Lesson 3: Open Science tools for reproducibility 2 | 3 | ## Outline: Open Science tools for reproducibility 4 | 5 | * What is reproducibility? 6 | * Computational notebooks 7 | * Jupyter 8 | * R Markdown 9 | * Quarto 10 | 11 | ## Introduction 12 | 13 | SEE CONTENT OF THIS LESSON AT [https://tyson-swetnam.github.io/TOPS-OC5-tools/lesson3.html](https://tyson-swetnam.github.io/TOPS-OC5-tools/lesson3.html) 14 | 15 | This lesson is the third of the OpenCore Open Science Tools and Resources Modules. In this lesson, we take a deep dive into a few available tools for (computational) reproducibility. First, we define reproducibility. Then, … 16 | 17 | ## What is reproducibility? 18 | 19 | **Reproducibility ** - the [National Academies Report 2019](https://www.nationalacademies.org/our-work/reproducibility-and-replicability-in-science)** **defined reproducibility as: 20 | 21 | * **Reproducibility** means computational reproducibility—obtaining consistent computational results using the same input data, computational steps, methods, code, and conditions of analysis 22 | * **Replicability** means obtaining consistent results across studies aimed at answering the same scientific question, each of which has obtained its own data. 23 | 24 | In practice, reproducibility is taken further by an additional step. The goal of reproducibility is not only reproducing the same result given by using the same steps, such as re-executing a notebook in a containerized environment, but also allowing a given user to copy the environment and build upon the new technology and result by editing the environment to apply to a similar problem (e.g., a shareable, copyable executable paper). This small additional step gives others the ability to directly build upon previous work and get more science out of the same amount of funding. 25 | 26 | ## Computational notebooks 27 | 28 | … 29 | 30 | ### Jupyter 31 | 32 | … 33 | 34 | ### R Markdown 35 | 36 | … 37 | 38 | ### Quarto 39 | 40 | … 41 | 42 | Note: 43 | 44 | As you might have noticed, a lot of Open science tools require intermediate to advanced skills in data and information literacy and coding, especially if handling coding - intensive research projects. One of the best ways to learn these skills is through engaging with the respective communities, which often provide training and mentoring. 45 | 46 | ## Self Assessment Questions: Reproducibility 47 | 48 | **Scenario 1:** You stumble upon a research paper published a few years ago which used LANDSAT data and techniques similar to a project idea you want to apply for another area of interest. When you read the methods section of the paper, you find they published their derived data set in an international data repository (Dryad), but their algorithm code to generate the processed data from LANDSAT Real-Time (raw) data are not provided, only the description of the technique which they used is given in their Methods section and the mathematical equations for calculating their new index are in the Supplementary Materials. 49 | 50 | **Question S1-1**: From the hypothetical Scenario above, when there is access to the raw data, results data, and some written methods are provided, does the research paper meet the definition of being “reproducible”? 51 | 52 | **Answer S1-1**: No, the paper fails to provide a necessary level of detail to allow a different team, with a different experimental setup to obtain the same results exactly. The paper may support some aspects of “Replicability”, but only if someone is able to write their own code using the provided methods. With the same raw data product you could test your code and compare your results data to their results data. This would not be easy and is prohibitive. 53 | -------------------------------------------------------------------------------- /release-workflow.md: -------------------------------------------------------------------------------- 1 | # Workflow for Releasing Different Versions 2 | 3 | We release the latest versions of the main repository including the content from this repository on Zenodo (DOI for all versions: [10.5281/zenodo.7392118](https://doi.org/10.5281/zenodo.7392118)). 4 | In this document, we describe the process for making a release on Zenodo through GitHub. 5 | 6 | We don't describe how to connect a GitHub account/repository to a Zenodo account. 7 | To learn more about that, please read the details in [the GitHub documentation](https://docs.github.com/en/repositories/archiving-a-github-repository/referencing-and-citing-content). 8 | 9 | ## Updating release information 10 | 11 | **This workflow should be followed locally on your computer (using a code/file editor and commandline/terminal).** 12 | 13 | - Update [`CITATION.cff`](https://github.com/opensciency/sprint-content/blob/main/CITATION.cff) file: 14 | - Update `version: 0.0.0` (currently in line 6), `date-released: "2023-02-15"` (currently in line 12) 15 | - Validate the file locally on your computer using `cffconvert` (here we assume that the user knows how to use their terminal) 16 | - Step 1: Please install cffconvert using the following command: `python3 -m pip install --user cffconvert` (details here: https://pypi.org/project/cffconvert/) 17 | - Step 2: Run the following command: `cffconvert --validate` 18 | - Step 3: Once validated, create `.zenodo.json` file using the following command: `cffconvert --format zenodo > .zenodo.json` 19 | - `git add` -> `git commit` -> `git push` changes to the GitHub repository 20 | 21 | Please note that we have currently listed all individual names alphabatically in the CITATION file manually. 22 | If between the previous and current release, new contributors have joined your project, please add their details in the CITATION.cff file. 23 | 24 | ## Drafting release on GitHub 25 | 26 | - Click on [the release option](https://github.com/opensciency/sprint-content/releases) on GitHub main repository 27 | - Draft a [new release](https://github.com/opensciency/sprint-content/releases/new) 28 | - Click 'Choose a tag' 29 | - provide a new version name (such as v0.0.0, v1.0.1, ...) 30 | - click the 'create a new tag on release' option 31 | - A note on when we consider a version to be [major, minor or patch](https://semver.org/): 32 | - patch: Small additions to chapters are patch such as bug fixing, editing or minor contributions between the Book Dashes 33 | - minor: Significant number of the new content & new chapters such as during a Book Dash 34 | - major: Major changes such as major re-arranging of chapters into different guides or addition of a new guide 35 | - Create the release title: **'Opensciency - A core open science curriculum by and for the research community'** 36 | - Add details similar to what we have provided below that summarises what changes are in this version and provide a short sentence under the release log: 37 | 38 | ``` 39 | Opensciency is core open science curriculum material, drafted to introduce those beginning their open science journey to important definitions, tools, and resources; and provide for participants at all levels recommended practices. The material is made available under a [CC-BY 4.0 International](https://creativecommons.org/licenses/by/4.0/) license and is structured into five modules: 40 | 41 | - Ethos of Open Science 42 | - Open Tools and Resources 43 | - Open Data 44 | - Open Software 45 | - Open Results 46 | 47 | ### Citation 48 | 49 | **The latest release version can be found at this DOI [10.5281/zenodo.7392118](https://doi.org/10.5281/zenodo.7392118)** 50 | 51 | To credit and cite the material, use the following citation: 52 | OpenSciency Contributors (2022, December 2). Opensciency - A core open science curriculum by and for the research community. Zenodo. https://doi.org/10.5281/zenodo.7392119 53 | 54 | Shared under the CC-BY 4.0 License, all materials remain open for anyone to build open science curriculums or reuse for other purposes. Please include all author names where possible from the GitHub README contributors table. 55 | 56 | We encourage the wider community to reuse the material, and we are especially interested in creative approaches to displaying the material. An example we like is [Elements of AI](https://course.elementsofai.com/). 57 | 58 | ### Details of the project 59 | 60 | Opensciency is a result of the work of more than 40 open science experts and practitioners from across the world and from different disciplines. The first draft of the curriculum material was developed from [June 27 - July 1, 2022](https://github.com/nasa/Transform-to-Open-Science/blob/main/docs/Area2_Capacity_Sharing/OpenCore/OpenCore_leads.md) as part of the Transform to Open Science (TOPS) [OpenCore](https://github.com/nasa/Transform-to-Open-Science/tree/main/docs/Area2_Capacity_Sharing/OpenCore) sprint. More information about the NASA TOPS initiative is available via their [website ](https://science.nasa.gov/open-science/transform-to-open-science). After the TOPS Community Panel on [October 6, 2022](https://github.com/nasa/Transform-to-Open-Science/blob/main/docs/Area1_Engagement/Community_Panels/20221005_community_panel.md), the original contributors created the Opensciency repository to allow all contributors to further engage with the curriculum and invite review on the initial draft material from the wider research community. 61 | 62 | Let us know if you have a creative approach to displaying and reusing the material by [submitting an issue](https://github.com/opensciency/sprint-content/issues). 63 | 64 | This work is licensed under a [Creative Commons Attribution 4.0 International License](http://creativecommons.org/licenses/by/4.0/). 65 | 66 | Release log: 67 | v0.0.1: Peer-reviewed sprint document release 68 | v0.0.0: Post-sprint document release 69 | 70 | Full Changelog: v0.0.0...v0.0.1 (Previous release: v0.0.0...v0.0.1) 71 | 72 | ``` 73 | - Save draft 74 | 75 | ## Ready to Release 76 | 77 | - When you are ready 78 | - Merge the changes on GitHub made in the CITATION.cff and .zenodo.json files 79 | - Double-checked the details in the drafted 'tags' and click 'Publish release' 80 | - After a few seconds, you can see a new version appear at [https://doi.org/10.5281/zenodo.7392118](https://doi.org/10.5281/zenodo.7392118) 81 | 82 | This is the workflow is written by Malvika Sharan, reusing the workflow originally written for [_The Turing Way_](https://github.com/alan-turing-institute/the-turing-way/blob/main/release-workflow.md). 83 | --------------------------------------------------------------------------------