├── .github └── workflows │ └── deploy_site.yml ├── .gitignore ├── .markdownlint.json ├── CONTRIBUTING.md ├── LICENSE ├── Makefile ├── README.md ├── custom_theme └── img │ └── favicon.ico ├── docs ├── 01_introduction-and-goals.md ├── 02_architecture-constraints.md ├── 03_system-scope-and-context.md ├── 04_solution-strategy.md ├── 05_building-block-view.md ├── 06_runtime-view.md ├── 07_deployment-view.md ├── arc42.css ├── arc42.md ├── credits.md ├── glossary.md ├── img │ ├── 3.1-business-context.png │ ├── 5.2-whitebox-data-repos.png │ ├── 5.2-whitebox-meta-tool.png │ ├── 5.2-whitebox-repro-service.png │ ├── 5.2-whitebox-software-repos.png │ ├── 6.1-runtime-view-creation.png │ ├── 6.2-runtime-view-inspection.png │ ├── 7.1-deployment-view-testserver.png │ ├── 7.2-deployment-view-production-sketch.png │ ├── business-context.graphml │ ├── deployment-view-production-sketch.graphml │ ├── deployment-view-testserver.graphml │ ├── o2r-logo.png │ ├── runtime-view-creation.graphml │ ├── runtime-view-inspection.graphml │ ├── whitebox-data-repos.graphml │ ├── whitebox-meta-tool.graphml │ └── whitebox-repro-service.graphml ├── index.md ├── metadata.md ├── user-scenarios.md └── zenodo.md └── mkdocs.yml /.github/workflows/deploy_site.yml: -------------------------------------------------------------------------------- 1 | name: Deploy site 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | workflow_dispatch: 8 | 9 | jobs: 10 | render-pdf: 11 | runs-on: ubuntu-20.04 12 | 13 | steps: 14 | - name: Checkout 15 | uses: actions/checkout@v2 16 | 17 | - name: Set up Python 18 | uses: actions/setup-python@v2 19 | with: 20 | python-version: 3.9 21 | 22 | # Action for collection of short branchname in commit mesage 23 | - name: Inject short variables 24 | uses: rlespinasse/github-slug-action@v3.x 25 | 26 | - name: Install python packages 27 | run: | 28 | sudo apt update 29 | wget --version 30 | pip install mkdocs 31 | pip install mkdocs-cinder 32 | pip install markdown-include 33 | sudo apt-get -y install xfonts-encodings libxrender-dev libfontconfig-dev libxext-dev 34 | wget https://github.com/wkhtmltopdf/packaging/releases/download/0.12.6-1/wkhtmltox_0.12.6-1.focal_amd64.deb 35 | sudo apt -y install ./wkhtmltox_0.12.6-1.focal_amd64.deb 36 | wkhtmltopdf --version 37 | mkdocs --version 38 | 39 | - name: Build the documentation 40 | run: mkdocs build --clean 41 | 42 | - name: Replace current build version and date 43 | run: | 44 | CURRENT_VERSION=$(git log --pretty=format:'%h' -n 1) 45 | CURRENT_DATE=$(git show -s --format=%ci $CURRENT_VERSION) 46 | echo $CURRENT_VERSION "@" $CURRENT_DATE 47 | sed -i "s/@@VERSION@@/$CURRENT_VERSION/g" site/index.html 48 | sed -i "s/@@TIMESTAMP@@/$CURRENT_DATE/g" site/index.html 49 | 50 | - name: Make the pdf 51 | run: | 52 | VCS_REF=$(git rev-parse --short HEAD) 53 | CURRENT_DATE=$(date -u +"%Y-%m-%dT%H:%M:%SZ") 54 | FILE_NAME_PDF=$(echo o2r-architecture-$VCS_REF.pdf) 55 | # update version in cover page, if not already done 56 | sed -i 's/@@VERSION@@/$VCS_REF/g' site/index.html 57 | sed -i 's/@@TIMESTAMP@@/$CURRENT_DATE/g' site/index.html 58 | # fix protocol relative URLs, see https://github.com/wkhtmltopdf/wkhtmltopdf/issues/2713 59 | find site/ -type f -name '*.html' | xargs sed -i 's|href="//|href="https://|g' 60 | find site/ -type f -name '*.html' | xargs sed -i 's|src="//|src="https://|g' 61 | # create PDF 62 | sudo wkhtmltopdf --margin-top 20mm --load-error-handling ignore --enable-local-file-access --no-background $( pwd)/site/index.html site/o2r-architecture.pdf 63 | 64 | - name: Deploy to Github pages 65 | uses: JamesIves/github-pages-deploy-action@4.1.3 66 | with: 67 | branch: gh-pages # The branch the action should deploy to. 68 | folder: site # The folder the action should deploy. 69 | commit-message: Rebuild pages at ${{github.sha}} with branch ${{ env.GITHUB_REF_SLUG }} 70 | git-config-name: Build bot 71 | git-config-email: o2r.projekt@uni-muenster.de -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | site -------------------------------------------------------------------------------- /.markdownlint.json: -------------------------------------------------------------------------------- 1 | { 2 | "default": true, 3 | "MD003": { "style": "atx" }, 4 | "MD007": { "indent": 2 }, 5 | "MD013": { "line_length": 300 }, 6 | "MD033": { "allowed_elements": ["div", "a"] } 7 | } -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to this project 2 | 3 | ## Writing style 4 | 5 | - Put each sentence on one line. 6 | 7 | ## Pull requests 8 | 9 | In your first pull request, please state you are contributing under the project license. -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Creative Commons Legal Code 2 | 3 | CC0 1.0 Universal 4 | 5 | CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE 6 | LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN 7 | ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS 8 | INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES 9 | REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS 10 | PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM 11 | THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED 12 | HEREUNDER. 13 | 14 | Statement of Purpose 15 | 16 | The laws of most jurisdictions throughout the world automatically confer 17 | exclusive Copyright and Related Rights (defined below) upon the creator 18 | and subsequent owner(s) (each and all, an "owner") of an original work of 19 | authorship and/or a database (each, a "Work"). 20 | 21 | Certain owners wish to permanently relinquish those rights to a Work for 22 | the purpose of contributing to a commons of creative, cultural and 23 | scientific works ("Commons") that the public can reliably and without fear 24 | of later claims of infringement build upon, modify, incorporate in other 25 | works, reuse and redistribute as freely as possible in any form whatsoever 26 | and for any purposes, including without limitation commercial purposes. 27 | These owners may contribute to the Commons to promote the ideal of a free 28 | culture and the further production of creative, cultural and scientific 29 | works, or to gain reputation or greater distribution for their Work in 30 | part through the use and efforts of others. 31 | 32 | For these and/or other purposes and motivations, and without any 33 | expectation of additional consideration or compensation, the person 34 | associating CC0 with a Work (the "Affirmer"), to the extent that he or she 35 | is an owner of Copyright and Related Rights in the Work, voluntarily 36 | elects to apply CC0 to the Work and publicly distribute the Work under its 37 | terms, with knowledge of his or her Copyright and Related Rights in the 38 | Work and the meaning and intended legal effect of CC0 on those rights. 39 | 40 | 1. Copyright and Related Rights. A Work made available under CC0 may be 41 | protected by copyright and related or neighboring rights ("Copyright and 42 | Related Rights"). Copyright and Related Rights include, but are not 43 | limited to, the following: 44 | 45 | i. the right to reproduce, adapt, distribute, perform, display, 46 | communicate, and translate a Work; 47 | ii. moral rights retained by the original author(s) and/or performer(s); 48 | iii. publicity and privacy rights pertaining to a person's image or 49 | likeness depicted in a Work; 50 | iv. rights protecting against unfair competition in regards to a Work, 51 | subject to the limitations in paragraph 4(a), below; 52 | v. rights protecting the extraction, dissemination, use and reuse of data 53 | in a Work; 54 | vi. database rights (such as those arising under Directive 96/9/EC of the 55 | European Parliament and of the Council of 11 March 1996 on the legal 56 | protection of databases, and under any national implementation 57 | thereof, including any amended or successor version of such 58 | directive); and 59 | vii. other similar, equivalent or corresponding rights throughout the 60 | world based on applicable law or treaty, and any national 61 | implementations thereof. 62 | 63 | 2. Waiver. To the greatest extent permitted by, but not in contravention 64 | of, applicable law, Affirmer hereby overtly, fully, permanently, 65 | irrevocably and unconditionally waives, abandons, and surrenders all of 66 | Affirmer's Copyright and Related Rights and associated claims and causes 67 | of action, whether now known or unknown (including existing as well as 68 | future claims and causes of action), in the Work (i) in all territories 69 | worldwide, (ii) for the maximum duration provided by applicable law or 70 | treaty (including future time extensions), (iii) in any current or future 71 | medium and for any number of copies, and (iv) for any purpose whatsoever, 72 | including without limitation commercial, advertising or promotional 73 | purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each 74 | member of the public at large and to the detriment of Affirmer's heirs and 75 | successors, fully intending that such Waiver shall not be subject to 76 | revocation, rescission, cancellation, termination, or any other legal or 77 | equitable action to disrupt the quiet enjoyment of the Work by the public 78 | as contemplated by Affirmer's express Statement of Purpose. 79 | 80 | 3. Public License Fallback. Should any part of the Waiver for any reason 81 | be judged legally invalid or ineffective under applicable law, then the 82 | Waiver shall be preserved to the maximum extent permitted taking into 83 | account Affirmer's express Statement of Purpose. In addition, to the 84 | extent the Waiver is so judged Affirmer hereby grants to each affected 85 | person a royalty-free, non transferable, non sublicensable, non exclusive, 86 | irrevocable and unconditional license to exercise Affirmer's Copyright and 87 | Related Rights in the Work (i) in all territories worldwide, (ii) for the 88 | maximum duration provided by applicable law or treaty (including future 89 | time extensions), (iii) in any current or future medium and for any number 90 | of copies, and (iv) for any purpose whatsoever, including without 91 | limitation commercial, advertising or promotional purposes (the 92 | "License"). The License shall be deemed effective as of the date CC0 was 93 | applied by Affirmer to the Work. Should any part of the License for any 94 | reason be judged legally invalid or ineffective under applicable law, such 95 | partial invalidity or ineffectiveness shall not invalidate the remainder 96 | of the License, and in such case Affirmer hereby affirms that he or she 97 | will not (i) exercise any of his or her remaining Copyright and Related 98 | Rights in the Work or (ii) assert any associated claims and causes of 99 | action with respect to the Work, in either case contrary to Affirmer's 100 | express Statement of Purpose. 101 | 102 | 4. Limitations and Disclaimers. 103 | 104 | a. No trademark or patent rights held by Affirmer are waived, abandoned, 105 | surrendered, licensed or otherwise affected by this document. 106 | b. Affirmer offers the Work as-is and makes no representations or 107 | warranties of any kind concerning the Work, express, implied, 108 | statutory or otherwise, including without limitation warranties of 109 | title, merchantability, fitness for a particular purpose, non 110 | infringement, or the absence of latent or other defects, accuracy, or 111 | the present or absence of errors, whether or not discoverable, all to 112 | the greatest extent permissible under applicable law. 113 | c. Affirmer disclaims responsibility for clearing rights of other persons 114 | that may apply to the Work or any use thereof, including without 115 | limitation any person's Copyright and Related Rights in the Work. 116 | Further, Affirmer disclaims responsibility for obtaining any necessary 117 | consents, permissions or other rights required for any use of the 118 | Work. 119 | d. Affirmer understands and acknowledges that Creative Commons is not a 120 | party to this document and has no duty or obligation with respect to 121 | this CC0 or use of the Work. 122 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | default: 2 | make serve 3 | 4 | serve: 5 | mkdocs serve 6 | 7 | build: 8 | mkdocs build --clean 9 | 10 | VCS_REF := $(shell git rev-parse --short HEAD) 11 | CURRENT_DATE := $(shell date -u +"%Y-%m-%dT%H:%M:%SZ") 12 | FILE_NAME_PDF := $(shell echo o2r-architecture-${VCS_REF}.pdf) 13 | 14 | pdf: build 15 | # update version in cover page, if not already done 16 | sed -i 's/@@VERSION@@/${VCS_REF}/g' site/index.html 17 | sed -i 's/@@TIMESTAMP@@/${CURRENT_DATE}/g' site/index.html 18 | # fix protocol relative URLs, see https://github.com/wkhtmltopdf/wkhtmltopdf/issues/2713 19 | find site/ -type f -name '*.html' | xargs sed -i 's|href="//|href="https://|g' 20 | find site/ -type f -name '*.html' | xargs sed -i 's|src="//|src="https://|g' 21 | # create PDF 22 | wkhtmltopdf --margin-top 20mm --load-error-handling ignore --no-background $(shell pwd)/site/index.html site/o2r-architecture.pdf 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Opening Reproducible Research System Architecture 2 | 3 | Project website: [https://o2r.info](https://o2r.info) 4 | 5 | **Read architecture description online: [https://o2r.info/architecture](https://o2r.info/architecture)** 6 | 7 | ## The project 8 | 9 | Opening Reproducible Research (o2r) is a DFG-funded research project by Institute for Geoinformatics ([ifgi](http://www.uni-muenster.de/Geoinformatics/en/)) and University and Regional Library ([ULB](http://www.ulb.uni-muenster.de/)), University of Münster, Germany. Building on recent advances in mainstream IT, o2r envisions a new architecture for storing, executing and interacting with the original analysis environment alongside the corresponding research data and manuscript. This architecture evolves around so called _Executable Research Compendia_ (ERC) as the container for both research, review, and archival. 10 | 11 | ## Guidelines 12 | 13 | See [CONTRIBUTING.md](CONTRIBUTING.md) 14 | 15 | ## Build 16 | 17 | This specification is written in [Markdown](https://daringfireball.net/projects/markdown/), rendered with [MkDocs](http://www.mkdocs.org/) using a few [Python Markdown extensions](https://pythonhosted.org/Markdown/extensions/index.html), and deployed automatically using a Github Action. 18 | 19 | ![badge for workflow status](https://github.com/o2r-project/architecture/actions/workflows/deploy_site.yml/badge.svg) 20 | 21 | Use `mkdocs` to render it locally. 22 | 23 | ```bash 24 | # pip install mkdocs mkdocs-cinder 25 | mkdocs serve 26 | ``` 27 | 28 | ### Automated Builds 29 | The `deploy_site.yml` will run the `mkdocs` command on every direct commit or merge on the master branch and deploy the rendered HTML documents to the `gh-pages` branch in this repository. 30 | 31 | The action authenticates its push to the `gh-pages` branch using the [checkout action](https://github.com/actions/checkout) and the credentials of the user [@o2r-user](https://github.com/o2r-user), who has write access to this repository. It is finalized through the [github pages deploy action](https://github.com/marketplace/actions/deploy-to-github-pages). 32 | 33 | ## Diagrams 34 | 35 | The diagrams are created with [yEd](https://www.yworks.com/products/yed). 36 | All source files (`.graphml`) are stored in `/docs/img`. 37 | The PNG renderings are exported from yEd with `transparent` background, a margin of `5`, and a scaling factor of `1.0`. 38 | 39 | ## License 40 | 41 | The o2r Executable Research Compendium specification is licensed under [Creative Commons CC0 1.0 Universal License](https://creativecommons.org/publicdomain/zero/1.0/), see file `LICENSE`. 42 | To the extent possible under law, the people who associated CC0 with this work have waived all copyright and related or neighboring rights to this work. 43 | This work is published from: Germany. 44 | -------------------------------------------------------------------------------- /custom_theme/img/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/o2r-project/architecture/0d60d5e53ced02bd1588ee1c5eca5ddfc9951ccd/custom_theme/img/favicon.ico -------------------------------------------------------------------------------- /docs/01_introduction-and-goals.md: -------------------------------------------------------------------------------- 1 | ## 1. Introduction and Goals 2 | 3 | ### Preamble 4 | 5 | The packaging of research workflows is based on the concept of the **Executable Research Compendium** (ERC, see [specification](https://o2r.info/erc-spec) and [article](https://doi.org/10.1045/january2017-nuest)). 6 | The reproducibility service is defined by a [web **API** specification](https://o2r.info/api/) and demonstrated in a [**reference implementation**](https://github.com/o2r-project/reference-implementation). 7 | Both are published under permissive open licenses, as is this document. 8 | 9 | The normative specification is given in the [Markdown](https://en.wikipedia.org/wiki/Markdown) formatted files in the [project repository](https://github.com/o2r-project/architecture/), which form the basis for readable PDF and HTML versions of the architecture. 10 | A HTML and PDF version of this document are available at [https://o2r.info/architecture/](https://o2r.info/architecture/) and [https://o2r.info/architecture/o2r-architecture.pdf](https://o2r.info/architecture/o2r-architecture.pdf) respectively. 11 | 12 | ### 1.1 Requirements Overview 13 | 14 | This architecture describes the relationship of a **reproducibility service** with other services from the context of scientific collaboration, publishing, and preservation. 15 | Together these services can be combined into a new system for transparent and reproducible scholarly publications. 16 | 17 | The reproducibility service must provide a reliable way to create and inspect packages of computational research to support reproducible publications. 18 | _Creation_ comprises uploading of a researcher's workspace with code, data, and documentation for building a reproducible runtime environment. 19 | This runtime environment forms the basis for _inspection_, i.e. discovering, examining details, and manipulating workflows on an online platform. 20 | 21 | ### 1.2 Quality Goals 22 | 23 | Transparency 24 | : The system must be transparent to allow a scrutiny demanded by a rigorous scientific process. 25 | All software components must be Free and Open Source Software ([FOSS](https://en.wikipedia.org/wiki/Free_and_open-source_software)). 26 | All text and specification must be available under a permissive [public copyright license](https://en.wikipedia.org/wiki/Public_copyright_license). 27 | 28 | Separation of concern 29 | : The system must integrate with existing services and focus on the core functionality: creating interactive reproducible runtime environments for scientific workflows. 30 | It must not replicate existing functionality such as storage or persistent identification. 31 | 32 | Flexibility & modularity 33 | : In regard to the research project setting, the system components must be well separated, so functions can be developed independently, e.g. using different programming languages. 34 | This allows different developers to contribute efficiently. 35 | It must be possible to provide various computational configurations required by specific ERC which are outside of the included runtime. 36 | 37 | ### 1.3 Stakeholders 38 | 39 | Role/Name | Goal/point of contact | Required interaction 40 | --------- | ------- | ------------ 41 | Author (scientist) | publish ERC as part of a scientific publication process | - 42 | Reviewer (scientist) | examine ERC during a review process | - 43 | Co-author (scientist) | contribute to ERC during research (e.g. cloud based) | - 44 | Reader (scientist) | view and interact with ERC on a journal website | - 45 | Publisher | increase quality of publications in journals with ERC | - 46 | Curator/preservationist | ensure research is complete and archivable using ERC | - 47 | Operator | provide infrastructure to researchers at my university to collaborate and conduct high-quality research using ERC | - 48 | Developer | use and extend the tools around ERC | - 49 | 50 | Some of the stakeholders are accompanied by [user scenarios](user-scenarios.md) in prose. 51 | -------------------------------------------------------------------------------- /docs/02_architecture-constraints.md: -------------------------------------------------------------------------------- 1 | ## 2. Architecture constraints 2 | 3 | This section shows constraints on this project given by involved parties or conscious decisions made to ensure the longevity and transparency of the architecture and its implementations. 4 | If applicable, a motivation for constraints is given. (based on [biking2](https://biking.michael-simons.eu/docs/index.html#section-architecture-constraints)) 5 | 6 | ### 2.1 Technical constraints 7 | 8 |   | Constraint | Background and/or motivation 9 | ------ | ---------- | ---------------------------- 10 | TECH.1 | Only open licenses | All third party software or used data must be available under a suitable code license, i.e. either [OSI-approved](https://opensource.org/licenses) or [ODC license](https://opendatacommons.org/licenses). 11 | TECH.2 | OS independent development and deployment | Server applications must run in well defined [Docker](https://docker.com) containers to allow installation on any host system and to not limit developers to a specific language or environment. 12 | TECH.3 | Do not store secure information | The team members experience and available resources do not allow for handling information with security concerns, so no critical data, such as user passwords but also data with privacy concerns, must be stored in the system. 13 | TECH.4 | Configurations for ERC runtimes | ERCs include the runtime environment in form of a binary archive. The architecture must support executing this runtime environment and must be able to provide different configurations outside it, for example [computer architectures](/glossary#computer-architecture) or operating system [kernels](/glossary#kernel). The minimum requirements for the containerisation solution regarding architecture and kernel apply. 14 | 15 | ### 2.2 Organizational constraints 16 | 17 |   | Constraint | Background and/or motivation 18 | ------ | ---------- | ---------------------------- 19 | ORG.1 | Team and schedule | [https://o2r.info/about](https://o2r.info/about) 20 | ORG.2 | Do not interfere with existing well-established peer-review process | This software is _not_ going to change how scientific publishing works, nor should it. While intentioned to support public peer-reviews, open science etc., the software should be agnostic of these aspects. 21 | ORG.3 | Only open licenses | All created software must be available under an [OSI-approved](https://opensource.org/licenses) license, documentation and specification under a [CC license](https://creativecommons.org/licenses). 22 | ORG.4 | Version control/management | Code must be versioned using `git` and published on [GitHub](https://github.com/o2r-project). 23 | ORG.5 | Acknowledge transfer from group domain to persistent domain | The ERC bundles artifacts coming from a private or group domain for a transfer to a public and persistent domain (cf. [Curation Domain Model](http://www.forschungsdaten.org/index.php/Curation_Domain_Model) (in German)), which imposes requirements on the incorporated metadata. 24 | 25 | ### 2.3 Conventions 26 | 27 |   | Constraint | Background and/or motivation 28 | ------ | ---------- | ---------------------------- 29 | CONV.1 | Provide formal architecture documentation | Based on [arc42](http://arc42.org/) (template version 7.0). 30 | CONV.2 | Follow coding conventions | Typical project layout and coding conventions of the respective used language should be followed as far as possible. However, we explicitly accept the research project context and do _not_ provide full tests suites or documentation beyond what is needed by project team members. 31 | CONV.3 | Documentation language is British English | International research project must be understandable by anyone interested; consistency increases readability. 32 | CONV.4 | Use subjectivisation for server component names | Server-side components are named using personalized verbs or (ideally) professions: _muncher_, _loader_, _transporter_. All git repositories for software use an `o2r-` prefix, in case of server-side components e.g. `o2r-shipper`. 33 | CONV.5 | Configuration using environment variables | Server-side components must be configurable using all caps environment variables prefixed with the component name, e.g. `SHIPPER_THE_SETTING`, for required settings. Other settings should be put in a settings file suitable for the used language, e.g. `config.js` or `config.yml`. 34 | -------------------------------------------------------------------------------- /docs/03_system-scope-and-context.md: -------------------------------------------------------------------------------- 1 | ## 3. System scope and context 2 | 3 | ### 3.1 Business context 4 | 5 | [![business context](img/3.1-business-context.png)](img/3.1-business-context.png) 6 | 7 | Communication partner | Exchanged data | Technology/protocol 8 | --------------------- | ------ | ------- 9 | **Reproducibility service**, e.g. [o2r reference implementation](https://o2r.info/results) | publication platforms utilize creation and examination services for ERC; reproducibility service uses different _supporting services_ to retrieve software artifacts, store runtime environment images, execute workflows, and save complete ERC | `HTTP` APIs | 10 | **Publishing platform**, e.g. online journal website or review system | users access ERC status and metadata via search results and paper landing pages; review process integrates ERC details and supports manipulation; | system's API using `HTTP` with `JSON` payload 11 | **Collaboration platform** | provide means to collaboratively work on data, code, or text; such platforms support both public and private (shared) digital workspaces | `HTTP` 12 | **ID provider** | retrieve unique user IDs, user metadata, and authentication tokens; user must log in with the provider | `HTTP` 13 | **Execution infrastructure** | ERC can be executed using a shared/distributed infrastructure | `HTTP` 14 | **Data repository** | the reproducibility service fetches (a) content for ERC creation, or (b) complete ERC, from different sources; it stores created ERC persistently at suitable repositories, which in turn may connect to long-term archives and preservation systems | `HTTP`, `FTP`, `WebDAV`, `git` 15 | **Registry (metadata)** | the reproducibility service can deliver metadata on published ERC to registries/catalogues/search portals directly and mediately via data repositories; the service can also retrieve/harvest contextual metadata during ERC creation to reduce required user inputs; users discover ERC via registries | (proprietary) `HTTP` APIs, persistent identifiers (`DOI`), `OAI-PMH` 16 | **Software repository** | software repository provide software artifacts during ERC creation and store executable runtime environments | `HTTP` APIs 17 | **Archives and digital preservation systems** | saving ERCs in preservation systems includes extended data and metadata management (cf. private/group domain vs. persistent domain in the [Curation Domain Model](http://www.forschungsdaten.org/index.php/Curation_Domain_Model) (in German)), because a different kind of access and re-use is of concern for these systems; these concerns are relevant in so far as the intermediary _data repositories_ must be supported, but further aspects, e.g. long-term access rights, are only mediately relevant for the reproducibility service | metadata in `JSON` and `XML` provided as part of `HTTP` requests or as files within payloads 18 | 19 | ### 3.2 Technical context 20 | 21 | All components use `HTTP(S)` over cable networks connections for communication (metadata documents, ERC, Linux containers, etc.). 22 | -------------------------------------------------------------------------------- /docs/04_solution-strategy.md: -------------------------------------------------------------------------------- 1 | ## 4. Solution strategy 2 | 3 | This section provides a short overview of architecture decisions and for some the reasoning behind them. 4 | 5 | ### Web API 6 | 7 | The developed solution is set in an existing system of services, and first and foremost must integrate well with these systems, focussing on the specific missing features of building and running ERCs. 8 | These features are provided via a _well-defined RESTful API_. 9 | 10 | ### Microservices 11 | 12 | To allow a dynamic development and support the large variety of skills, all server-side features are developed in independent _[microservices](https://en.wikipedia.org/wiki/Microservices)_. 13 | These microservices handle only specific functional parts of the API and allow independent development and deployment cycles. 14 | Core components are developed using server-side JavaScript based on [Node.js](https://nodejs.org/) with [Express](https://expressjs.com/) while other components are implemented in Python. 15 | 16 | We accept this diversification _increases complexity_ of both development and testing environments and the deployment of said services. 17 | 18 | Required documentation is minimal. 19 | The typical structure should follow common practices of the respective language and tools. 20 | 21 | ### Storage and intra-service communication 22 | 23 | In accordance with the system scope, there is no reliable storage solution implemented. 24 | The microservices simply share a common pointer to a local file system path. 25 | Storage of ERC is only implemented to make the solution independent during development and for the needs of core functionality (temporal storage), but it is not a feature the solution will eventually provide. 26 | 27 | The unifying component of the architecture is the _database_. 28 | It is known to all microservices. 29 | 30 | Some microservices communicate via an eventing mechanism for real-time updates, such as the search database and the component providing live updates to the user via WebSockets. 31 | The eventing is based on the operation log of the database (which is normally used to synchronise database nodes). 32 | This is a clear _misuse of an internal feature_, but a lot simpler than maintaining a full-blown eventing solution. 33 | 34 | ### Demonstration, user data & authentication 35 | 36 | To be able to demonstrate the system, a _browser-based client application_ is developed. 37 | It uses the RESTful API to control the system. 38 | _OAuth 2.0_ is used for authentication and minimal information, which is already public, is stored for each user. 39 | This information is shared between all services which require authentication via the database. 40 | 41 | _The client application manages the control flow_ of all user interactions. 42 | 43 | ### Tools 44 | 45 | If standalone tools are developed, they provide a command-line interface (CLI). 46 | The CLI allows integration into microservices when needed and to package tools including their dependencies as containers and distributing them using a container registry. 47 | These _2nd level containers_ are started by the microservices and can run either next to the microservices or in an independent container cluster, providing scalability. 48 | It must only be ensured they are correctly configured in each microservice. 49 | The only required documentation is the installation into a container and usage of the CLI. 50 | -------------------------------------------------------------------------------- /docs/05_building-block-view.md: -------------------------------------------------------------------------------- 1 | ## 5. Building block view 2 | 3 | ### 5.1 Refinement Level 1 4 | 5 | #### 5.1.1 Blackbox Publication Platforms 6 | 7 | Publications platforms are the online interaction points of users with scientific works. 8 | Users create publications, e.g. submitting to a scientific journal, publishing on a pre-print server, publishing on a self-hosted website, or collaborating in online repositories. 9 | Users examine publications, e.g. browsing, searching, reading, downloading, or reviewing. 10 | 11 | #### 5.1.2 Blackbox ID Provider 12 | 13 | Identification information of distributed systems is crucial, and for security reasons as well as for limiting manual reproduction of metadata, a central service can provide all of 14 | 15 | - unique _identification of users_ and _metadata on users_, 16 | - _authentication_ of users, and 17 | - metadata on a user's _works_, e.g. publications or ERC. 18 | 19 | Persistent identifiers for artifacts in the reproducibility service itself are _not required_, as these are provided by data storage and registries. 20 | However, services such as [ePIC](http://www.pidconsortium.eu/) could allow to retrieve persistent IDs. 21 | 22 | #### 5.1.3 Blackbox Execution Infrastructure 23 | 24 | The execution infrastructure provides CPU time and temporary result storage space for execution of ERC, both "as is" and with manipulation, i.e. changed parameters. 25 | It also provides different [architectures](/glossary#architecture) and [operating system kernel](/glossary#kernel) configurations which are outside of the scope of ERC's runtime environments based on containers. 26 | 27 | #### 5.1.4 Blackbox Data Repositories 28 | 29 | Data repositories are all services storing data but not software. 30 | More specifically, they may store software "as data", but not with software-specific features such as code versioning or installation binaries for different computer architectures. 31 | Data repositories may be self-hosted or public/free, domain-specific or generic. 32 | They typically provide persistent identifiers or handles, e.g. a [DOI](https://www.doi.org/) or [URN](https://en.wikipedia.org/wiki/Uniform_Resource_Name). 33 | They are used both for loading created ERC and for storing the ERC created by the reproducibility service. 34 | 35 | #### 5.1.5 Blackbox Registries 36 | 37 | Registries are metadata indexes or catalogues. 38 | 39 | They are recipients of metadata exports by the reproducibility service to share information about ERC, e.g. add a new ERC to an author's profile. 40 | This requires the reproducibility services to translate the internal metadata model into the recipients data model and encoding. 41 | 42 | They are sources of metadata during ERC creation when the information in the fetched content is used to query registries for additional information which can be offered to the user. 43 | 44 | #### 5.1.6 Blackbox Software Repositories 45 | 46 | Software repositories are a source and a sink for software at different abstraction levels. 47 | They are a source for software dependencies, such as system packages for installing a library. 48 | They are a sink for executable images, which comprise a number of software artifacts and their dependencies, for a specific ERC instance. 49 | 50 | ### 5.2 Refinement Level 2 51 | 52 | #### 5.2.1 Whitebox Publication Platforms 53 | 54 | Publication platforms can be roughly divided into two groups. 55 | They can be either specific journals hosted independently, such as [JStatSoft](http://www.jstatsoft.org/) or [JOSS](http://joss.theoj.org/), or a larger platform provided by a publisher to multiple journals, such as [ScienceDirect](http://sciencedirect.com/), [MDPI](http://www.mdpi.com/), [SpringerLink](https://link.springer.com/), or [PLOS](https://plos.org/). 56 | To some extend, pre-print servers, for example [OSF](https://osf.io/) or [arXiv.org](https://arxiv.org/), can also fall into the latter category. 57 | 58 | Integration with the reproducibility service can happen via plug-ins to generic software, e.g. [OJS](https://pkp.sfu.ca/ojs/), or by bespoke extensions. 59 | Integrations are based on the service's public API. 60 | 61 | #### 5.2.2 Whitebox ID Provider 62 | 63 | The reproducibility service uses [ORCID](http://orcid.org/) to authenticate users and retrieve user metadata. 64 | The reproducibility service does not use the ORCID authorisation to edit ORCID user data or retrieve non-public data from ORCID, thus this process is [pseudo-authentication using OAuth](https://security.stackexchange.com/questions/44611/difference-between-oauth-openid-and-openid-connect-in-very-simple-term). 65 | Internally, the user's public `ORCID` is the main identifier. 66 | User have different levels, which allow different actions, such as "registered user" or "administrator". 67 | These levels are stored in the reproducibility service. 68 | 69 | #### 5.2.3 Whitebox Execution Infrastructure 70 | 71 | Such an infrastructure could be either self-hosted, e.g. [Docker Swarm](https://www.docker.com/products/docker-swarm)-based, use a cloud service provider, such as [Amazon EC2](https://aws.amazon.com/ec2/), [Docker Cloud](http://cloud.docker.com/), or even use continuous integration services such as [Travis CI](https://travis-ci.org/) or [Gitlab CI](https://about.gitlab.com/gitlab-ci/). 72 | Or it could use a combination of these. 73 | 74 | Not all of these options provide the flexibility to provide configurations outside of containers, for example specific operating system kernels. 75 | An implementing system must manage these independently, for example by mapping ERC requirements like an operating system, to a part of the execution infrastructure that supports it. 76 | 77 | #### 5.2.4 Whitebox Data Repositories 78 | 79 | [![whitebox data repositories](img/5.2-whitebox-data-repos.png)](img/5.2-whitebox-data-repos.png) 80 | 81 | The reproducibility service _does not persistently store anything_. 82 | It only keeps copies of files during creation and inspection. 83 | So where are ERCs saved and where is their data coming from? 84 | 85 | **Collaboration platforms**, e.g. [ownCloud/Sciebo](http://sciebo.de/), [GitHub](http://github.com/), [ShareLatex](http://sharelatex.com/), [OSF](https://osf.io/), allow users to create, store, and share their research (code, text, data, et cetera). 86 | Besides being an interaction platform for users, they can also be seen simply as a data repository. 87 | The reproducibility service fetches contents for building an ERC from them based on public links, e.g. a public GitHub repository or shared Sciebo folder. 88 | It is possible to link ERC creation to an project/repository under development on a collaboration platform as to trigger an ERC (re-)creation or execution when changes are made. 89 | 90 | Protocols: `WebDAV`, `ownCloud`, `HTTP` (including [webhooks](https://en.wikipedia.org/wiki/Webhook)), `git` 91 | 92 | **Domain data repositories**, e.g. [PANGAEA](https://www.pangaea.de/) or [GFZ Data Services](http://dataservices.gfz-potsdam.de/portal/), can be accessed by the reproducibility service during creation and execution of ERC to download data. 93 | Allowing access to data repositories reduces data duplication but requires control over/trust in the respective repository. 94 | 95 | Protocol: `HTTP` APIs 96 | 97 | Generic **Repositories**, e.g. [Zenodo](https://zenodo.org/), [Mendeley Data](https://data.mendeley.com/), [Figshare](http://figshare.com/), [OSF](https://osf.io/), provide (a) access to complete ERC stored in repositories for inspection and execution by the reproducibility service, and (b) storage of created ERC. repositories. 98 | 99 | Protocols: (authenticated) `HTTP` APIs 100 | 101 | **Archives** and digital preservation solutions can provide long-term preservation of ERC. 102 | The data repository and/or one of the involved platform providers are responsible for preservation. 103 | A data repository might save the hosted content to an archive, be regularly harvested by an archive, or be part of a distributed dark archive, e.g. [CLOCKSS](https://www.clockss.org). 104 | A platform provider might supply a digital preservation service, e.g. an installation of [Archivematica](https://www.archivematica.org/). 105 | 106 | Protocol: `HTTP` carrying bitstreams and metadata 107 | 108 | !!! Note "_Data Curation Continuum_" 109 | The Data Curation Continuum (cf. [diagram by Andre Treloar](http://andrew.treloar.net/research/diagrams/data_curation_continuum.pdf)), describes how data moves from the private domain of a researcher to the public domain of data repositories over the course of conducting research. It describes the properties of data and important aspects of the transitions. In a publishing process based on the reproducibility service, the full migration process is run through. 110 | 111 | #### 5.2.5 Whitebox Registries 112 | 113 | Research data registries and websites, for example ([CRIS](https://www.uni-muenster.de/FB7_MultimediaSupport/CRIS_Infoseite/Forschungsdatenbank_Infoseite.html), [DataCite](https://www.datacite.org/), [Google Scholar](https://scholar.google.de/), [Scopus](https://www.scopus.com/), [Altmetric](https://www.altmetric.com/), to name just a few, collect metadata on publications and provide services with this data. 114 | Services comprise discovery but also derivation of citation data and creating networks of researchers and publications. 115 | 116 | The listed examples include open platforms, commercial solutions, and institution-specific platforms. 117 | Some of the registries offer a public, well-defined API to retrieve structured metadata and to create new records. 118 | 119 | Protocol: `HTTP` APIs 120 | 121 | #### 5.2.6 Whitebox Software Repositories 122 | 123 | [![whitebox software repositories](img/5.2-whitebox-software-repos.png)](img/5.2-whitebox-software-repos.png) 124 | 125 | ##### 5.2.6.1 Blackbox Package repositories 126 | 127 | Package repositories are used during ERC creation to download and install software artifacts for specific operating systems, e.g. [Debian APT](https://wiki.debian.org/Apt) or [Ubuntu Launchpad](https://launchpad.net/ubuntu), for specific programming languages or environments, e.g. [CRAN](https://cran.r-project.org/), or from source, e.g. [GitHub](https://github.com/). 128 | 129 | ##### 5.2.6.2 Blackbox Container registries 130 | 131 | Container registries such as [Docker Hub](https://hub.docker.com/), [Quay](https://quay.io/), self-hosted [Docker Registry 2.0](https://github.com/docker/distribution) or [Amazon ERC](https://aws.amazon.com/de/ecr/), store executable images of runtime environments. 132 | They can be used to distribute the runtime environments across the execution infrastructure and provide an intermediate ephemeral storage for the reproducibility service. 133 | 134 | #### 5.2.7 Whitebox Reproducibility Service 135 | 136 | [![whitebox reproducibility service](img/5.2-whitebox-repro-service.png)](img/5.2-whitebox-repro-service.png) 137 | 138 | ##### 5.2.7.1 Blackbox Webserver 139 | 140 | A webserver handles all incoming calls to the API (`/api/v1/`) via `HTTPS` (`HTTP` is redirected) and distributes them to the respective microservice. 141 | A working [nginx](https://nginx.org) configuration is available [in the test setup](https://github.com/o2r-project/o2r-platform/blob/master/dev/nginx.conf). 142 | 143 | ##### 5.2.7.2 Blackbox UI 144 | 145 | The UI is a web application based on [Angular JS](https://angularjs.org/), see [o2r-platform](https://github.com/o2r-project/o2r-platform). 146 | It connects to all microservices via their API and is served using the same webserver as the API. 147 | 148 | ##### 5.2.7.3 Blackbox Microservices 149 | 150 | The reproducibility service uses a [microservice architecture](https://en.wikipedia.org/wiki/Microservices) to separate functionality defined by the **[web API specification](https://o2r.info/api)** into manageable units. 151 | 152 | This allows scalability (selected microservices can be deployed as much as needed) and technology independence for each use case and developer. 153 | The microservices all access one main database and a shared file storage. 154 | 155 | ##### 5.2.7.4 Blackbox Tools 156 | 157 | Some functionality is developed as standalone tools and used as such in the microservices instead of re-implementing features. 158 | These tools are integrated via their command line interface (CLI) and executed as _2nd level containers_ by microservices. 159 | 160 | ##### 5.2.7.5 Blackbox Databases 161 | 162 | The _main document database_ is the unifying element of the microservice architecture. 163 | All information shared between microservices or transactions between microservices are made via the database, including session state handling for authentication. 164 | 165 | A _search database_ is used for full-text search and advanced queries. 166 | 167 | The database's operation log, normally used for synchronization between database nodes, is also used for 168 | 169 | - event-driven communication between microservices, and 170 | - synchronization between main document database and search index. 171 | 172 | !!! Note 173 | This eventing "hack" is expected to be replaced by a proper eventing layer for productive deployments. 174 | 175 | ##### 5.2.7.6 Blackbox Ephemeral file storage 176 | 177 | After loading from external sources and during creation of ERC, the files are stored in a file storage shared between the microservices. 178 | The file structure is known to each microservice and read/write operations happen as needed. 179 | 180 | ### 5.3 Refinement Level 3 181 | 182 | #### 5.3.1 Whitebox microservices 183 | 184 | Each microservice is encapsulated as a [Docker](http://docker.com/) container running at its own port on an internal network and only serving its respective API path. 185 | Internal communication between the webserver and the microservices is unencrypted, i.e. `HTTP`. 186 | 187 | **Testing**: the [reference implementation](https://github.com/o2r-project/reference-implementation) provides instructions on running a local instance ofr the microservices and the demonstration UI. 188 | 189 | **Development**: the [o2r-platform](https://github.com/o2r-project/o2r-platform) GitHub project contains [docker-compose](https://docs.docker.com/compose/compose-file/) configurations to run all microservices, see repository file `docker-compose.yml` and the project's `README.md` for instructions. 190 | 191 | The following table describes the microservices, their endpoints, and their features. 192 | 193 | **Project** | **API path** | **Language** | **Description** 194 | ------ | ------ | ------ | ------ 195 | [muncher](https://github.com/o2r-project/o2r-muncher) | `/api/v1/compendium` and `/api/v1/job` | JavaScript (Node.js) | core component for [CRUD](https://en.wikipedia.org/wiki/Create,_read,_update_and_delete) of compendia and jobs (ERC execution) 196 | [loader](https://github.com/o2r-project/o2r-loader) | `/api/v1/compendium` (`HTTP POST` only) | JavaScript (Node.js) | load workspaces from repositories and collaboration platforms 197 | [finder](https://github.com/o2r-project/o2r-finder) | `/api/v1/search` | JavaScript (Node.js) | discovery and search, synchronizes the database with a search database (Elasticsearch) and exposes read-only search endpoints 198 | [transporter](https://github.com/o2r-project/o2r-transporter) | `~ /data/` and `~* \.(zip|tar|tar.gz)` | JavaScript (Node.js) | downloads of compendia in zip or (gzipped) tar formats 199 | [informer](https://github.com/o2r-project/o2r-informer) | `~* \.io` | JavaScript (Node.js) | [socket.io](http://socket.io/)-based WebSockets for live updates to the UI based on database event log, e.g. job progress 200 | [inspecter](https://github.com/o2r-project/o2r-inspecter) | `/api/v1/inspection` | R ([plumber](https://www.rplumber.io/)) | allow inspection of non-text-based file formats, e.g. `.Rdata` 201 | [substituter](https://github.com/o2r-project/o2r-substituter) | `/api/v1/substitution` | JavaScript (Node.js) | create new ERCs based on existing ones by substituting files 202 | [manipulater](https://github.com/o2r-project/o2r-manipulater/) | `under development` | -- | provide back-end containers for interactive ERCs 203 | 204 | ##### ERC exporting 205 | 206 | **Project** | **API path** | **Language** | **Description** 207 | ------ | ------ | ------ | ------ 208 | [shipper](https://github.com/o2r-project/o2r-shipper) | `/api/v1/shipment` | Python | ship ERCs, including packaging, and their metadata to third party repositories and archives 209 | 210 | ##### Authentication 211 | 212 | **Project** | **API path** | **Language** | **Description** 213 | ------ | ------ | ------ | ------ 214 | [bouncer](https://github.com/o2r-project/o2r-bouncer) | `/api/v1/auth`, `/api/v1/user/` | JavaScript (Node.js) | authentication service and user management (whoami, level changing) 215 | 216 | ##### Supporting services 217 | 218 | Existing software projects can be re-used for common functionality, such as gathering statistics. 219 | These supporting services run alongside the microservices in their own containers accessible via the main webservice. 220 | 221 | **Project** | **Description** 222 | ------ | ------ 223 | [Piwik](https://matomo.org/) | collect user statistics 224 | 225 | #### 5.3.2 Whitebox database 226 | 227 | Two databases are used. 228 | 229 | **[MongoDB](https://www.mongodb.com/) document database** with enabled [replica-set oplog](https://docs.mongodb.com/manual/core/replica-set-oplog/) for eventing. 230 | 231 | Collections: 232 | 233 | - `users` 234 | - `sessions` 235 | - `compendia` 236 | - `jobs` 237 | - `shipments` 238 | 239 | The MongoDB API is used by connecting microservices via suitable client packages, which are available for all required languages. 240 | 241 | **[Elasticsearch](https://elastic.co) search index**, kept in sync with the main document database by the microservice `finder`. 242 | The ids are mapped to support update and delete operations. 243 | 244 | The two main resources of the API are kept in separate indices due to [their different structure/mappings](https://www.elastic.co/blog/index-vs-type): 245 | 246 | - `compendia` with type `compendia` 247 | - `jobs` with type `jobs` 248 | 249 | The search index is accessed by clients through the search endpoint provided by `finder`. 250 | 251 | #### 5.3.3 Whitebox tools 252 | 253 | **project** | **language** | **description** 254 | ------ | ------ | ------ 255 | [meta](https://github.com/o2r-project/o2r-meta) | Python | scripts for extraction, translation and validation of metadata; for details see [metadata documentation](/metadata) 256 | [containerit](https://github.com/o2r-project/containerit) | R | generation of Dockerfiles based on R sessions and scripts 257 | 258 | Each tool's code repository includes one or more `Dockerfiles`, which are automatically build and published on Docker Hub. 259 | The microservices use the tool's Docker images to execute the tools instead of installing all their dependencies into the microservices. 260 | The advantages are a controlled environment for the tool usage, independent development cycles and updating of the tools, smaller independent images for the microservices, and scalability. 261 | 262 | #### Meta 263 | 264 | Meta provides a CLI for each step of the metadata processing required in the reproducibility service as shown by the following diagram. 265 | After each step the created metadata is saved as a file per model to a directory in the compendium. 266 | A detailed view of the meta tool usage in the creation process is provided in the runtime view [ERC Creation](#61-erc-creation). 267 | 268 | [![whitebox meta tool](img/5.2-whitebox-meta-tool.png)](img/5.2-whitebox-meta-tool.png) 269 | 270 | #### Containerit 271 | 272 | The containerit tool extracts required dependencies from ERC main documents and uses the information and external configuration to create a Dockerfile, which executes the full computational workflow when the container is started. 273 | Its main strategy is to analyse the session at the end of executing the full workflow. 274 | 275 | #### 5.3.4 Whitebox ephemeral file storage 276 | 277 | A host directory is mounted into every container to the location `/tmp/o2r`. 278 | -------------------------------------------------------------------------------- /docs/06_runtime-view.md: -------------------------------------------------------------------------------- 1 | ## 6. Runtime view 2 | 3 | The runtime view describes the interaction between the static building blocks. 4 | It cannot cover all potential cases and focusses on the following main scenarios. 5 | 6 | **Scenario** | **Purpose and overview** 7 | ------ | ------ 8 | ERC Creation | The most important workflow for an author is creating an ERC from his workspace of data, code and documentation. The author can provide these resources as a direct upload, but a more comfortable process is loading the files from a collaboration platform. Three microservices are core to this scenario: `loader`, `muncher`, and `shipper`. 9 | ERC Inspection | The most important workflow for a reviewer or reader is executing the analysis encapsulated in an ERC. The execution comprises creation of configuration files (if missing) from metadata, compiling the a display file using the actual analysis, and saving the used runtime environment. The core microservice for this scenario is `muncher`. 10 | 11 | ### 6.1 ERC Creation 12 | 13 | [![runtime view ERC creation](img/6.1-runtime-view-creation.png)](img/6.1-runtime-view-creation.png) 14 | 15 | First, the user initiates a _creation_ of a new ERC based on a workspace containing at least a viewable file (e.g. an HTML document or a plot) based on the code and instructions provided in a either a script or [literate programming document](/glossary#literate-programming)), and any other data. 16 | The [`loader`](#531-whitebox-microservices) runs a series of steps: fetching the files, checking the incoming workspace structure, extracting raw metadata from the workspace, brokering raw metadata to o2r metadata, and saving the compendium to the database. 17 | The compendium is now a non-public _candidate_, meaning only the uploading user or admin users can see and edit it. 18 | All metadata processing is based on the tool [`meta`](#533-whitebox-tools). 19 | 20 | Then the user opens the candidate compendium, reviews and completes the metadata, and saves it. 21 | Saving triggers a metadata validation in [`muncher`](#531-whitebox-microservices). 22 | If the validation succeeds, the metadata is brokered to several output formats as files within the compendium using [`meta`](#533-whitebox-tools), and then re-loaded to the database for better [searchability](#532-whitebox-database). 23 | 24 | Next, the user must start a _job_ to add the ERC configuration and runtime environment to the workspace, which are core elements of an ERC. 25 | The ERC configuration is a file generated from the user-provided metadata (see [ERC specification](https://o2r.info/erc-spec/spec/#erc-configuration-file)). 26 | The runtime environment consists of two parts: (a) the runtime manifest, which is created by executing the workflow once in a container based on the tool [`containerit`](#533-whitebox-tools); and (b) the runtime image, which is built from the runtime manifest. 27 | A user may provide the ERC configuration file and the runtime manifest with the workspace for fine-grained control; the generation steps are skipped then. 28 | 29 | Finally the user starts a shipment of the compendium to a data repository. 30 | The [`shipper`](#531-whitebox-microservices) manages this two step process. 31 | The separate "create" and "publish" steps allow checking the shipped files and avoid unintentional shipments, because a published shipment creates an non-erasable public resource. 32 | 33 | !!! Note "_In the code_" 34 | The `loader` has two core controllers for direct _upload_ and _load_ from a collaboration platform. 35 | Their core chain of functions are realised as [JavaScript Promises](/glossary#javascript-promises), see the code for [loader](https://github.com/o2r-project/o2r-loader/blob/master/lib/loader.js#L48) and [uploader](https://github.com/o2r-project/o2r-loader/blob/master/lib/uploader.js#L44) respectively. 36 | The respective steps are shared between these two cases where possible, i.e. starting with the step `stripSingleBasedir`. 37 | 38 | ### 6.2 ERC Inspection 39 | 40 | [![runtime view ERC inspection](img/6.2-runtime-view-inspection.png)](img/6.2-runtime-view-inspection.png) 41 | 42 | The user initiates an _inspection_ of an existing ERC by providing a reference such as [DOI](/glossary#doi) or URL. 43 | [`loader`](#531-whitebox-microservices) retrieves the compendium files, saves them locally and loads the contained metadata. 44 | Then the user can start a new _job_ for the compendium. 45 | [`muncher`](#531-whitebox-microservices) checks the request, creates a new job in the database and returns the job ID. 46 | The user's client can use the ID to connect to the live logs provided by [`informer`](#531-whitebox-microservices). 47 | All following steps by muncher regularly update the database, whose change events `informer` uses to continuously update client via WebSockets. 48 | 49 | The job starts with creating a copy of the compendium's files for the job. 50 | A [copy-on-write filesystem](https://en.wikipedia.org/wiki/Copy-on-write) is advantageous for this step. 51 | Then the archived runtime image is loaded from the file in the compendium into a runtime repository. 52 | This repository may be remote (either public or private, e.g. based on [Docker Registry](https://github.com/docker/distribution), [ECR](https://aws.amazon.com/ecr/) or [GitLab](https://docs.gitlab.com/ce/user/project/container_registry.html)) or simply the local image storage. 53 | Then all files except the runtime image archive are packed so they can be send to a container runtime. 54 | The container runtime can be local (e.g. the Docker daemon), or a container orchestration such as [Kubernetes](https://en.wikipedia.org/wiki/Kubernetes). 55 | It provides log updates as a stream to `muncher`, which updates the database, whose changes trigger updates of the user interface via `informer`. 56 | When the container is finished, `muncher` compares the created outputs with the ones provided in the compendium and provides the result to the user. 57 | 58 | !!! Note "_In the code_" 59 | The `muncher` has two core resources: a _compendium_ represents an ERC, a _job_ represents a _"run"_ of an ERC, i.e. the building, running, and saving of the runtime environment including execution of the contained workflow. 60 | The core function for this is the `Executor`, which chains a number of steps using [JavaScript Promises](/glossary#javascript-promises), see the [code](https://github.com/o2r-project/o2r-muncher/blob/master/lib/executor.js#L1306). 61 | The check uses the tool [`erc-checker`](https://github.com/o2r-project/erc-checker). 62 | -------------------------------------------------------------------------------- /docs/07_deployment-view.md: -------------------------------------------------------------------------------- 1 | ## 7. Deployment View 2 | 3 | ### 7.1 Test server [https://o2r.uni-muenster.de](https://o2r.uni-muenster.de) 4 | 5 | [![deployment view test server](img/7.1-deployment-view-testserver.png)](img/7.1-deployment-view-testserver.png) 6 | 7 | Motivation 8 | 9 | : The o2r infrastructure is driven by the research community's need for user friendly and transparent but also scalable and reliable solutions to increase computational reproducibility in the scientific publication process. To retrieve feedback from the community (public demo) and to increase software quality (controlled non-development environment), the current development state is regularly published on a test server. 10 | 11 | Quality and/or Performance Features 12 | 13 | : The server is managed completely with [Ansible](https://www.ansible.com/) to ensure a well-document setup. The base operating system is CentOS Linux 7. The machine has 4 cores, 8 GB RAM, and a local storage ~100 GB, and runs on a VM host. The one machine in this deployment runs the full o2r reproducibility service, i.e. all microservices and a webserver to serve the user interfaces. It also runs the databases and ancillary services, such as a web traffic statistics service. When executing a compendium, the compendium workspace is packaged in a tarball and send to the Docker daemon. This allows easy switching to remote machines, but also has a performance disadvantage. 14 | 15 | Mapping of Building Blocks to Infrastructure 16 | 17 | : All building blocks run in their own Docker container using an image provided via and build on [Docker Hub](https://hub.docker.com/r/o2rproject/) using a `Dockerfile` included in [each microservice's code repository](https://github.com/search?q=topic%3Amicroservice+org%3Ao2r-project+fork%3Atrue). The server is managed by the o2r team; external building blocks are managed by the respective organisation/provider. 18 | 19 | ### 7.2 Production (sketch) 20 | 21 | [![deployment view test server](img/7.2-deployment-view-production-sketch.png)](img/7.2-deployment-view-production-sketch.png) 22 | 23 | !!! Note 24 | This deployment view is a sketch for a potential productive deployment and intends to point out features of the chosen architecture and expected challenges or solutions. 25 | _It is not implemented at the moment!_ 26 | 27 | Motivation 28 | 29 | : A productive system must be reliable and scalable providing a single reproducibility service API endpoint. It must also adopt the distribution and deployments of the reproducibility service's microservices. Being based on containers it naturally uses one of the powerful orchestration engines, such as [Docker Swarm](https://docs.docker.com/engine/swarm) or [Kubernetes](http://kubernetes.io/). It can also include multiple execution infrastructures to support multiple container software versions, different architectures, kernels, GPUs, or even specialised hardware. Operators of a reproducibility service can separate themselves from other operators by offering specific hardware or versions. 30 | 31 | Quality and/or Performance Features 32 | 33 | : The services are redundantly provided via separated clusters of nodes for (a) running the reproducibility service's microservices and ancillary services, (b) running the document and search databases, (c) running ERC executions. Separating the clusters allows common security protocols, e.g. the tool and execution cluster should not be able to contact arbitrary websites. The software in the data cluster can run in containers or bare metal. The clusters for app and compendia have access to a common shared file storage, a potential bottleneck. Performance of microservices can be easily scaled by adding nodes to the respective clusters. The diversity of supported ERCs can be increased by providing different architectures and kernels, and hardware. Some requirements could be met on demand using virtualisation, such as a specific operating system version. 34 | 35 | Mapping of Building Blocks to Infrastructure 36 | 37 | : The o2r reproducibility service and execution infrastructures are managed by the o2r team similar to the test server. The other big building blocks, like publishing platforms or data repositories, are managed by the respective organisations. 38 | -------------------------------------------------------------------------------- /docs/arc42.css: -------------------------------------------------------------------------------- 1 | /* readthedocs */ 2 | .wy-side-nav-search { 3 | background-color: #004286; 4 | } 5 | 6 | .wy-nav-side { 7 | background-color: #434553; 8 | } 9 | 10 | .wy-menu a { 11 | color: #ffffff; 12 | } 13 | 14 | .wy-menu-vertical a:hover { 15 | background:#004286; 16 | color: #ffffff; 17 | } 18 | 19 | .wy-side-nav-search { 20 | background-color: #004286; 21 | } 22 | 23 | .wy-nav-top { 24 | background-color: #004286; 25 | } 26 | 27 | .wy-menu a { 28 | color: #ffffff; 29 | } 30 | 31 | .wy-menu-vertical a:hover { 32 | background:#004286; 33 | } 34 | 35 | .subnav { 36 | margin-left: 1em; 37 | } 38 | 39 | .caption-text { 40 | font-weight: bolder; 41 | } 42 | 43 | .rst-content dl:not(.docutils) dt { 44 | display: inline-block; 45 | margin: 6px 0; 46 | margin-top: 6px; 47 | font-size: 90%; 48 | line-height: normal; 49 | background: #e7f2fa; 50 | color: #004286; 51 | border-top: solid 2px #004286; 52 | } 53 | 54 | /* cinder */ 55 | .navbar-default { 56 | background-color: #004286; 57 | } 58 | 59 | footer small { 60 | font-size: 0.7em; 61 | } 62 | 63 | .navbar-brand { 64 | font-weight: 500; 65 | } 66 | 67 | .nav { 68 | font-weight: 500; 69 | } 70 | 71 | .alert { 72 | background-color: #fcf8e3; 73 | border: 1px solid #fbeed5; 74 | font-size: 1em; 75 | } 76 | 77 | .alert a { 78 | color: #008cba; 79 | text-decoration: none; 80 | } 81 | 82 | a:hover, a:focus { 83 | color: #00526e; 84 | text-decoration: underline; 85 | } 86 | 87 | .buildinfo { 88 | font-size: 0.7em; 89 | color: #343838; 90 | } 91 | 92 | div[role=main] img { 93 | max-width: 100%; 94 | } 95 | 96 | .rst-content .note .admonition-title { 97 | background: #004286; 98 | } 99 | -------------------------------------------------------------------------------- /docs/arc42.md: -------------------------------------------------------------------------------- 1 | ## About arc42 2 | 3 | arc42, the Template for documentation of software and system architecture. 4 | 5 | By Dr. Gernot Starke, Dr. Peter Hruschka and contributors. 6 | 7 | Template Revision: 7.0 EN (based on asciidoc), January 2017 8 | 9 | © We acknowledge that this document uses material from the arc 42 architecture template, . Created by Dr. Peter Hruschka & Dr. Gernot Starke. -------------------------------------------------------------------------------- /docs/credits.md: -------------------------------------------------------------------------------- 1 | ------ 2 | 3 | ## Credits 4 | 5 | This specification and guides are developed by the members of the project Opening Reproducible Research ([Offene Reproduzierbare Forschung](https://www.uni-muenster.de/forschungaz/project/9520)) funded by the German Research Foundation (Deutsche Forschungsgemeinschaft (DFG) - Projektnummer [274927273](http://gepris.dfg.de/gepris/projekt/274927273)) under grant numbers PE 1632/10-1, KR 3930/3-1, and TR 864/6-1). 6 | 7 | > [![Opening Reproducible Research](img/o2r-logo.png)](https://o2r.info) 8 | > 9 | > _Opening Reproducible Research (o2r, [https://o2r.info/about](https://o2r.info/about)) is a DFG-funded research project by Institute for Geoinformatics ([ifgi](http://www.uni-muenster.de/Geoinformatics/)) and University and Regional Library ([ULB](https://www.ulb.uni-muenster.de/)), University of Münster, Germany. Building on recent advances in mainstream IT, o2r envisions a new architecture for storing, executing and interacting with the original analysis environment alongside the corresponding research data and manuscript. This architecture evolves around so called Executable Research Compendia (ERC) as the container for both research, review, and archival._ 10 | 11 | To cite this specification please use 12 | 13 | > _Nüst, Daniel, 2018. Reproducibility Service for Executable Research Compendia: Technical Specifications and Reference Implementation. Zenodo. doi:[10.5281/zenodo.2203844](http://doi.org/10.5281/zenodo.2203844)_ 14 | 15 | For a complete list of publications, posters, presentations, and software projects from th2 o2r project please visit [https://o2r.info/results/](https://o2r.info/results/). 16 | 17 | ------ 18 | 19 | ## License 20 | 21 | ![CC-0 Button](https://licensebuttons.net/p/zero/1.0/88x31.png) 22 | 23 | The o2r architecture specification is licensed under [Creative Commons CC0 1.0 Universal License](https://creativecommons.org/publicdomain/zero/1.0/), see file [`LICENSE`](https://raw.githubusercontent.com/o2r-project/architecture/arc42/LICENSE). 24 | To the extent possible under law, the people who associated CC0 with this work have waived all copyright and related or neighboring rights to this work. 25 | This work is published from: Germany. 26 | 27 | ------ 28 | 29 | {!docs/arc42.md!} 30 | 31 | ------ 32 | 33 |
Build @@VERSION@@ @ @@TIMESTAMP@@
34 | -------------------------------------------------------------------------------- /docs/glossary.md: -------------------------------------------------------------------------------- 1 | # Glossary 2 | 3 | ## Architecture 4 | 5 | See [computer architecture](#computer-architecture). 6 | 7 | ## Computer Architecture 8 | 9 | > [C]omputer architecture is a set of rules and methods that describe the functionality, organization, and implementation of computer systems. Some definitions of architecture define it as describing the capabilities and programming model of a computer but not a particular implementation. In other definitions computer architecture involves instruction set architecture design, microarchitecture design, logic design, and implementation. 10 | > via [Wikipedia](https://en.wikipedia.org/wiki/Computer_architecture) 11 | 12 | Common architectures are `amd64` or `x86_64`. 13 | 14 | You can find out the operating system + architecture combinations supported by a specific Docker image, e.g. `golang`, with 15 | 16 | ```bash 17 | $ docker run mplatform/mquery golang 18 | Image: golang 19 | * Manifest List: Yes 20 | * Supported platforms: 21 | - linux/amd64 22 | - linux/arm/v7 23 | - linux/arm64/v8 24 | - linux/386 25 | - linux/ppc64le 26 | - linux/s390x 27 | - windows/amd64:10.0.14393.2068 28 | - windows/amd64:10.0.16299.248 29 | ``` 30 | 31 | ## CRUD 32 | 33 | Basic operations on a digital artefact are create, read, update, and delete, often abbreviated to "[CRUD](https://en.wikipedia.org/wiki/Create,_read,_update_and_delete)". 34 | 35 | ## Digital Object Identifier 36 | 37 | See [DOI](#doi). 38 | 39 | ## DOI 40 | 41 | > _In computing, a Digital Object Identifier or DOI is a persistent identifier or handle used to uniquely identify objects [..]_ 42 | > _A DOI aims to be "resolvable", usually to some form of access to the information object to which the DOI refers._ 43 | > via [Wikipedia](https://en.wikipedia.org/wiki/Digital_object_identifier), see also [https://doi.org](https://doi.org) 44 | 45 | ## ERC 46 | 47 | Executable Research Compendium, see this [scientific article](https://doi.org/10.1045/january2017-nuest) for concepts and the [specification](https://o2r.info/erc-spec) for technical documentation. 48 | 49 | ## Executable Research Compendium 50 | 51 | See [ERC](#erc). 52 | 53 | ## JavaScript Promises 54 | 55 | > _A Promise is an object representing the eventual completion or failure of an asynchronous operation. [...] Essentially, a promise is a returned object to which you attach callbacks, instead of passing callbacks into a function._ 56 | > via [MDN web docs](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Using_promises) 57 | 58 | ## Kernel 59 | 60 | > _The kernel is a computer program that is the core of a computer's operating system, with complete control over everything in the system._ 61 | > via [Wikipedia](https://en.wikipedia.org/wiki/Kernel_(operating_system)) 62 | 63 | A common example is the [Linux kernel](https://en.wikipedia.org/wiki/Linux_kernel). 64 | 65 | ## Literate Programming 66 | 67 | > _Literate programming is a programming paradigm [..] in which a program is given as an explanation of the program logic in a natural language, such as English, interspersed with snippets of macros and traditional source code, from which a compilable source code can be generated._ 68 | > via [Wikipedia](https://en.wikipedia.org/wiki/Literate_programming) -------------------------------------------------------------------------------- /docs/img/3.1-business-context.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/o2r-project/architecture/0d60d5e53ced02bd1588ee1c5eca5ddfc9951ccd/docs/img/3.1-business-context.png -------------------------------------------------------------------------------- /docs/img/5.2-whitebox-data-repos.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/o2r-project/architecture/0d60d5e53ced02bd1588ee1c5eca5ddfc9951ccd/docs/img/5.2-whitebox-data-repos.png -------------------------------------------------------------------------------- /docs/img/5.2-whitebox-meta-tool.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/o2r-project/architecture/0d60d5e53ced02bd1588ee1c5eca5ddfc9951ccd/docs/img/5.2-whitebox-meta-tool.png -------------------------------------------------------------------------------- /docs/img/5.2-whitebox-repro-service.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/o2r-project/architecture/0d60d5e53ced02bd1588ee1c5eca5ddfc9951ccd/docs/img/5.2-whitebox-repro-service.png -------------------------------------------------------------------------------- /docs/img/5.2-whitebox-software-repos.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/o2r-project/architecture/0d60d5e53ced02bd1588ee1c5eca5ddfc9951ccd/docs/img/5.2-whitebox-software-repos.png -------------------------------------------------------------------------------- /docs/img/6.1-runtime-view-creation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/o2r-project/architecture/0d60d5e53ced02bd1588ee1c5eca5ddfc9951ccd/docs/img/6.1-runtime-view-creation.png -------------------------------------------------------------------------------- /docs/img/6.2-runtime-view-inspection.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/o2r-project/architecture/0d60d5e53ced02bd1588ee1c5eca5ddfc9951ccd/docs/img/6.2-runtime-view-inspection.png -------------------------------------------------------------------------------- /docs/img/7.1-deployment-view-testserver.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/o2r-project/architecture/0d60d5e53ced02bd1588ee1c5eca5ddfc9951ccd/docs/img/7.1-deployment-view-testserver.png -------------------------------------------------------------------------------- /docs/img/7.2-deployment-view-production-sketch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/o2r-project/architecture/0d60d5e53ced02bd1588ee1c5eca5ddfc9951ccd/docs/img/7.2-deployment-view-production-sketch.png -------------------------------------------------------------------------------- /docs/img/business-context.graphml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | o2r Busines Context 24 | 25 | 26 | 27 | 28 | 29 | 30 | Interaction Platforms 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | Supporting Services 46 | 47 | 48 | 49 | 50 | 51 | 52 | Downstream Services 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | Archives & 105 | Digital 106 | Preservation 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | Data 124 | Repositories 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | Execution 142 | Infrastructure 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | ID Provider 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | Publishing 177 | Platforms 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | Registries 195 | (Metadata) 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | Reproducibility 213 | Service 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | Software 232 | Repositories 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | Scientist 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | 270 | Collaboration 271 | Platforms 272 | (Data, Code) 273 | 274 | 275 | 276 | 277 | 278 | 279 | 280 | 281 | 282 | 283 | 284 | 285 | 286 | 287 | 288 | 289 | 290 | 291 | 292 | 293 | 294 | 295 | 296 | 297 | 298 | 299 | 300 | 301 | 302 | 303 | 304 | download data from trusted repos 305 | 306 | 307 | 308 | 309 | 310 | 311 | 312 | 313 | 314 | 315 | 316 | 317 | 318 | 319 | 320 | 321 | 322 | access runtime 323 | images 324 | 325 | 326 | 327 | 328 | 329 | 330 | 331 | 332 | 333 | 334 | 335 | 336 | 337 | 338 | 339 | 340 | 341 | 342 | create ERC 343 | examine ERC 344 | 345 | 346 | 347 | 348 | 349 | 350 | 351 | 352 | 353 | 354 | 355 | 356 | 357 | 358 | 359 | 360 | 361 | link to 362 | 363 | 364 | 365 | 366 | 367 | 368 | 369 | 370 | 371 | 372 | 373 | 374 | 375 | 376 | 377 | 378 | 379 | 380 | 381 | 382 | 383 | 384 | 385 | 386 | 387 | 388 | 389 | 390 | 391 | execute ERC 392 | (build, inspect, 393 | manipulate) 394 | 395 | 396 | 397 | 398 | 399 | 400 | 401 | 402 | 403 | 404 | 405 | 406 | 407 | 408 | 409 | 410 | 411 | 412 | 413 | fetch content 414 | 415 | save/load ERC 416 | 417 | 418 | 419 | 420 | 421 | 422 | 423 | 424 | 425 | 426 | 427 | 428 | 429 | 430 | 431 | 432 | 433 | 434 | 435 | harvest & deliver MD 436 | 437 | 438 | 439 | 440 | 441 | 442 | 443 | 444 | 445 | 446 | 447 | 448 | 449 | download software 450 | 451 | CRUD runtime images 452 | 453 | 454 | 455 | 456 | 457 | 458 | 459 | 460 | 461 | 462 | 463 | 464 | 465 | 466 | 467 | 468 | 469 | 470 | 471 | check authentication 472 | 473 | 474 | 475 | 476 | 477 | 478 | 479 | 480 | 481 | 482 | 483 | 484 | 485 | 486 | 487 | 488 | 489 | 490 | 491 | 492 | publish research 493 | review publications 494 | find publications 495 | read publications 496 | 497 | 498 | 499 | 500 | 501 | 502 | 503 | 504 | 505 | 506 | 507 | 508 | 509 | 510 | 511 | 512 | 513 | 514 | 515 | discover ERC 516 | 517 | 518 | 519 | 520 | 521 | 522 | 523 | 524 | 525 | 526 | 527 | 528 | 529 | 530 | 531 | 532 | 533 | 534 | 535 | do research 536 | 537 | 538 | 539 | 540 | 541 | 542 | 543 | 544 | 545 | 546 | 547 | 548 | 549 | 550 | 551 | 552 | 553 | log in with 554 | 555 | 556 | 557 | 558 | 559 | 560 | 561 | 562 | 563 | 564 | 565 | 566 | 567 | 568 | 569 | 570 | 571 | 572 | 573 | load data 574 | create ERC 575 | 576 | 577 | 578 | 579 | 580 | 581 | 582 | 583 | 584 | 585 | 586 | 587 | 588 | 589 | 590 | 591 | 592 | 593 | 594 | 595 | 596 | authenticate 597 | user 598 | 599 | 600 | 601 | 602 | 603 | 604 | 605 | 606 | <?xml version="1.0" encoding="UTF-8" standalone="no"?> 607 | <!-- Created with Inkscape (http://www.inkscape.org/) --> 608 | <svg 609 | xmlns:dc="http://purl.org/dc/elements/1.1/" 610 | xmlns:cc="http://web.resource.org/cc/" 611 | xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" 612 | xmlns:svg="http://www.w3.org/2000/svg" 613 | xmlns="http://www.w3.org/2000/svg" 614 | xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd" 615 | xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape" 616 | width="41" 617 | height="68.997391" 618 | id="svg2" 619 | sodipodi:version="0.32" 620 | inkscape:version="0.45.1" 621 | sodipodi:docbase="C:\Daten\alberts\projects\yfx" 622 | sodipodi:docname="uml_actor.svg" 623 | inkscape:output_extension="org.inkscape.output.svg.inkscape" 624 | version="1.0"> 625 | <defs 626 | id="defs4" /> 627 | <sodipodi:namedview 628 | id="base" 629 | pagecolor="#ffffff" 630 | bordercolor="#666666" 631 | borderopacity="1.0" 632 | inkscape:pageopacity="0.0" 633 | inkscape:pageshadow="2" 634 | inkscape:zoom="2.934351" 635 | inkscape:cx="144.21983" 636 | inkscape:cy="28.533711" 637 | inkscape:document-units="px" 638 | inkscape:current-layer="layer1" 639 | showgrid="true" 640 | inkscape:window-width="1280" 641 | inkscape:window-height="968" 642 | inkscape:window-x="-4" 643 | inkscape:window-y="-4" 644 | width="48px" 645 | height="48px" 646 | showborder="false" 647 | inkscape:showpageshadow="false" /> 648 | <metadata 649 | id="metadata7"> 650 | <rdf:RDF> 651 | <cc:Work 652 | rdf:about=""> 653 | <dc:format>image/svg+xml</dc:format> 654 | <dc:type 655 | rdf:resource="http://purl.org/dc/dcmitype/StillImage" /> 656 | </cc:Work> 657 | </rdf:RDF> 658 | </metadata> 659 | <g 660 | inkscape:label="Ebene 1" 661 | inkscape:groupmode="layer" 662 | id="layer1" 663 | transform="translate(-29.5,-42.959476)"> 664 | <a 665 | id="a3142" 666 | transform="matrix(1.0873906,0,0,1,-4.4741999,0)"> 667 | <path 668 | transform="translate(11.586889,5.2908993)" 669 | d="M 47.02914 47.36993 A 8.5197716 9.2013531 0 1 1 29.989597,47.36993 A 8.5197716 9.2013531 0 1 1 47.02914 47.36993 z" 670 | sodipodi:ry="9.2013531" 671 | sodipodi:rx="8.5197716" 672 | sodipodi:cy="47.36993" 673 | sodipodi:cx="38.509369" 674 | id="path2160" 675 | style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" 676 | sodipodi:type="arc" /> 677 | </a> 678 | <path 679 | sodipodi:type="arc" 680 | style="fill:none" 681 | id="path3134" 682 | sodipodi:cx="43.962021" 683 | sodipodi:cy="48.392303" 684 | sodipodi:rx="3.7486994" 685 | sodipodi:ry="0" 686 | d="M 47.71072 48.392303 A 3.7486994 0 0 1 1 40.213321,48.392303 A 3.7486994 0 0 1 1 47.71072 48.392303 z" /> 687 | <path 688 | style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1.24319649px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" 689 | d="M 50,61.33709 C 50,91.363211 50,92.247838 50,92.247838" 690 | id="path3136" /> 691 | <path 692 | style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" 693 | d="M 69.760668,72.362183 C 69.760668,72.362183 69.760668,72.362183 50.239332,72.362183 C 30.239332,72.362183 30.239332,72.362183 30.239332,72.362183 L 30.239332,72.362183" 694 | id="path3138" /> 695 | <path 696 | style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" 697 | d="M 30,111.45687 C 30,111.45687 30,111.45687 50,92.013532 C 70,111.45687 70,111.45687 70,111.45687" 698 | id="path3140" /> 699 | </g> 700 | </svg> 701 | 702 | 703 | 704 | 705 | -------------------------------------------------------------------------------- /docs/img/o2r-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/o2r-project/architecture/0d60d5e53ced02bd1588ee1c5eca5ddfc9951ccd/docs/img/o2r-logo.png -------------------------------------------------------------------------------- /docs/img/whitebox-data-repos.graphml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | Data Repositories 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | Reproducibility 35 | Service 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | Domain Data 53 | Repositories 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | Generic 71 | Repositories 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | Execution 89 | Infrastructure 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | Archives & 107 | Digital 108 | Preservation 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | Collaboration 126 | Platforms 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | access data 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | fetch data, 164 | code, and text 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | trigger updates 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | Save ERC 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | preserve 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | backup 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | 270 | 271 | 272 | 273 | 274 | 275 | 276 | -------------------------------------------------------------------------------- /docs/img/whitebox-meta-tool.graphml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | User 24 | workspace 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | Raw Metadata 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | o2r Metadata 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 3rd party 76 | repository 77 | Metadata 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | extraction 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | brokering with 113 | o2r mapping 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | brokering with 132 | repository 133 | mapping 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | User Input 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | -------------------------------------------------------------------------------- /docs/img/whitebox-repro-service.graphml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | Reproducibility Service 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | Data 37 | Repositories 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | Clients 55 | (publishing platforms, 56 | collaboration platforms, 57 | registries, ...) 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | Software 75 | Repositories 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | Scientist 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | Execution 114 | Infrastructure 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | Webserver (HTTPS) 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | UI 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | microservices 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | Tools 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | microservices 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | Microservices 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | Databases 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | Ephemeral 251 | file storage 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | ID Provider 269 | 270 | 271 | 272 | 273 | 274 | 275 | 276 | 277 | 278 | 279 | 280 | 281 | 282 | 283 | 284 | 285 | 286 | 287 | 288 | execute ERC 289 | (build, inspect, 290 | manipulate) 291 | 292 | 293 | 294 | 295 | 296 | 297 | 298 | 299 | 300 | 301 | 302 | 303 | 304 | 305 | 306 | 307 | 308 | fetch content 309 | 310 | save/load ERC 311 | 312 | 313 | 314 | 315 | 316 | 317 | 318 | 319 | 320 | 321 | 322 | 323 | 324 | 325 | 326 | 327 | 328 | 329 | 330 | 331 | download software 332 | 333 | CRUD runtime images 334 | 335 | 336 | 337 | 338 | 339 | 340 | 341 | 342 | 343 | 344 | 345 | 346 | 347 | 348 | 349 | 350 | 351 | interact with 352 | 353 | 354 | 355 | 356 | 357 | 358 | 359 | 360 | 361 | 362 | 363 | 364 | 365 | 366 | 367 | 368 | 369 | create, 370 | inspect, 371 | manipulate & 372 | substitute ERC 373 | 374 | 375 | 376 | 377 | 378 | 379 | 380 | 381 | 382 | 383 | 384 | 385 | 386 | 387 | 388 | 389 | 390 | 391 | 392 | 393 | 394 | 395 | 396 | 397 | 398 | 399 | 400 | 401 | 402 | 403 | 404 | 405 | 406 | 407 | 408 | 409 | 410 | 411 | 412 | 413 | 414 | 415 | 416 | 417 | 418 | 419 | 420 | 421 | 422 | 423 | 424 | 425 | 426 | 427 | 428 | 429 | 430 | 431 | 432 | 433 | 434 | 435 | 436 | 437 | 438 | 439 | 440 | 441 | 442 | 443 | 444 | 445 | 446 | 447 | 448 | 449 | 450 | live updates 451 | 452 | 453 | 454 | 455 | 456 | 457 | 458 | 459 | 460 | 461 | 462 | 463 | 464 | 465 | 466 | 467 | 468 | 469 | 470 | 471 | 472 | 473 | 474 | 475 | 476 | 477 | 478 | 479 | 480 | 481 | 482 | 483 | 484 | 485 | 486 | 487 | use 488 | 489 | 490 | 491 | 492 | 493 | 494 | 495 | 496 | 497 | 498 | 499 | 500 | 501 | 502 | 503 | 504 | 505 | 506 | 507 | manage 508 | users, 509 | sessions, 510 | compendia, 511 | jobs 512 | 513 | 514 | 515 | 516 | 517 | 518 | 519 | 520 | 521 | 522 | 523 | 524 | 525 | 526 | 527 | 528 | 529 | 530 | 531 | manage compendium and job files 532 | 533 | 534 | 535 | 536 | 537 | 538 | 539 | 540 | 541 | 542 | 543 | 544 | 545 | 546 | 547 | 548 | 549 | authentication 550 | 551 | 552 | 553 | 554 | 555 | 556 | 557 | 558 | 559 | 560 | 561 | 562 | 563 | 564 | <?xml version="1.0" encoding="UTF-8" standalone="no"?> 565 | <!-- Created with Inkscape (http://www.inkscape.org/) --> 566 | <svg 567 | xmlns:dc="http://purl.org/dc/elements/1.1/" 568 | xmlns:cc="http://web.resource.org/cc/" 569 | xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" 570 | xmlns:svg="http://www.w3.org/2000/svg" 571 | xmlns="http://www.w3.org/2000/svg" 572 | xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd" 573 | xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape" 574 | width="41" 575 | height="68.997391" 576 | id="svg2" 577 | sodipodi:version="0.32" 578 | inkscape:version="0.45.1" 579 | sodipodi:docbase="C:\Daten\alberts\projects\yfx" 580 | sodipodi:docname="uml_actor.svg" 581 | inkscape:output_extension="org.inkscape.output.svg.inkscape" 582 | version="1.0"> 583 | <defs 584 | id="defs4" /> 585 | <sodipodi:namedview 586 | id="base" 587 | pagecolor="#ffffff" 588 | bordercolor="#666666" 589 | borderopacity="1.0" 590 | inkscape:pageopacity="0.0" 591 | inkscape:pageshadow="2" 592 | inkscape:zoom="2.934351" 593 | inkscape:cx="144.21983" 594 | inkscape:cy="28.533711" 595 | inkscape:document-units="px" 596 | inkscape:current-layer="layer1" 597 | showgrid="true" 598 | inkscape:window-width="1280" 599 | inkscape:window-height="968" 600 | inkscape:window-x="-4" 601 | inkscape:window-y="-4" 602 | width="48px" 603 | height="48px" 604 | showborder="false" 605 | inkscape:showpageshadow="false" /> 606 | <metadata 607 | id="metadata7"> 608 | <rdf:RDF> 609 | <cc:Work 610 | rdf:about=""> 611 | <dc:format>image/svg+xml</dc:format> 612 | <dc:type 613 | rdf:resource="http://purl.org/dc/dcmitype/StillImage" /> 614 | </cc:Work> 615 | </rdf:RDF> 616 | </metadata> 617 | <g 618 | inkscape:label="Ebene 1" 619 | inkscape:groupmode="layer" 620 | id="layer1" 621 | transform="translate(-29.5,-42.959476)"> 622 | <a 623 | id="a3142" 624 | transform="matrix(1.0873906,0,0,1,-4.4741999,0)"> 625 | <path 626 | transform="translate(11.586889,5.2908993)" 627 | d="M 47.02914 47.36993 A 8.5197716 9.2013531 0 1 1 29.989597,47.36993 A 8.5197716 9.2013531 0 1 1 47.02914 47.36993 z" 628 | sodipodi:ry="9.2013531" 629 | sodipodi:rx="8.5197716" 630 | sodipodi:cy="47.36993" 631 | sodipodi:cx="38.509369" 632 | id="path2160" 633 | style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" 634 | sodipodi:type="arc" /> 635 | </a> 636 | <path 637 | sodipodi:type="arc" 638 | style="fill:none" 639 | id="path3134" 640 | sodipodi:cx="43.962021" 641 | sodipodi:cy="48.392303" 642 | sodipodi:rx="3.7486994" 643 | sodipodi:ry="0" 644 | d="M 47.71072 48.392303 A 3.7486994 0 0 1 1 40.213321,48.392303 A 3.7486994 0 0 1 1 47.71072 48.392303 z" /> 645 | <path 646 | style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1.24319649px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" 647 | d="M 50,61.33709 C 50,91.363211 50,92.247838 50,92.247838" 648 | id="path3136" /> 649 | <path 650 | style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" 651 | d="M 69.760668,72.362183 C 69.760668,72.362183 69.760668,72.362183 50.239332,72.362183 C 30.239332,72.362183 30.239332,72.362183 30.239332,72.362183 L 30.239332,72.362183" 652 | id="path3138" /> 653 | <path 654 | style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" 655 | d="M 30,111.45687 C 30,111.45687 30,111.45687 50,92.013532 C 70,111.45687 70,111.45687 70,111.45687" 656 | id="path3140" /> 657 | </g> 658 | </svg> 659 | 660 | 661 | 662 | 663 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | # Opening Reproducible Research System Architecture 2 | 3 | {!docs/01_introduction-and-goals.md!} 4 | 5 | {!docs/02_architecture-constraints.md!} 6 | 7 | {!docs/03_system-scope-and-context.md!} 8 | 9 | {!docs/04_solution-strategy.md!} 10 | 11 | {!docs/05_building-block-view.md!} 12 | 13 | {!docs/06_runtime-view.md!} 14 | 15 | {!docs/07_deployment-view.md!} 16 | 17 | {!docs/credits.md!} 18 | -------------------------------------------------------------------------------- /docs/metadata.md: -------------------------------------------------------------------------------- 1 | # Metadata workflows 2 | 3 | This document describes the internal processes handling metadata for ERC. 4 | For information on the metadata model for ERC as part of the o2r web API please see [the API specification](https://o2r.info/api/compendium/metadata). 5 | 6 | The remainder of this document describes _who_ handles metadata _when_ and _how_ within the [o2r architecture](/). 7 | 8 | ## Files vs. database 9 | 10 | In all workflows files are created within ERC in a specific subdirectory `.erc` holding different kinds, formats, or versions of metadata. 11 | For ease of access via web API, the information is also stored within the database. 12 | 13 | **The files in the compendium are always the normative source of information.** 14 | 15 | The term _brokering_ means the translation from schema-less to schema-specific metadata, as well as inter-schema mappings. 16 | The brokering output is then stored in respective files and mirrored to the database by the reproducibility service. 17 | 18 | ## Metadata extraction and brokering during creation 19 | 20 | `muncher` is the main [CRUD](https://en.wikipedia.org/wiki/Create,_read,_update_and_delete) component for compedia. 21 | It controls the creation workflow. 22 | 23 | The creation from the metadata perspective is as follows: 24 | 25 | 1. `init` stores the files for a new ERC in a directory. 26 | 1. `extract` uses `metaextract.py` ([docs](https://github.com/o2r-project/o2r-meta#2-metaextract)) to analyse the incoming ERC and creates new files with _raw_ metadata for each of the scanned files. Currently the following types of files will be considered: _.r, .rmd, netcdf, "bagit.txt"_. Future releases of the extractor will be likely to consider _.tex, .json (geojson), .jp2, .tiff_ and more. 27 | This raw metadata itself is _schema-less_ and non-semantic. 28 | The processed files are in conceptual competition for the best representative of the working directory's meta information, i.e. there will be only one main output, ideally represented by the most complete set of metadata. 29 | By default the competing bits of information will also be preserved in `.erc/metadata_raw_.json` where _filename_ is an identifier based on the original source file. 30 | - output file: `.erc/metadata_raw.json` 31 | - database field: `.metadata.raw` 32 | 1. `broker` uses `metabroker.py` ([docs](https://github.com/o2r-project/o2r-meta#5-metabroker)) to translate the _raw_ metadata in `json` to _o2r_ metadata in `json` as being compliant to the o2r json-schema. 33 |  - output file: `.erc/metadata_o2r_X.json` (where `X` is the version number as set in the [o2r-map.json](https://github.com/o2r-project/o2r-meta/blob/master/broker/mappings/o2r-map.json) mapping file, e.g. _1_) 34 |  - database field: `.metadata.o2r` 35 | 1. (`harvest` TBD; will connect to third party database endpoint via OAI-PMH to gather additional information for the enrichment of the o2r metadata collected via extraction) 36 | 1. `save` stores the new ERC to the database including the aforementioned metadata fields. 37 | 1. `user check` provides an interactive form to the uploading user to control and edit the suggested metadata. 38 | Suggestions are based on _o2r_ metadata. 39 | The check workflow is handled in the web client project. 40 | 1. `update` updates the metadata in both database and file with the user's edits. 41 | This step creates _valid o2r_ metadata. 42 | The metadata update includes _all brokering_ to the configured metadata formats, meaning the brokered metadata is always up-to-date and based on the same source, the _o2r_ metadata. 43 | 44 | By design there is no metadata brokering during shipments or job executions. 45 | Because it is likely that not all information can be brokered automatically, the metadata required by shipping destinations are mandatory in the o2r metadata model to reduce the user involvement to a minimum, i.e. when updating the metadata. 46 | In the same vein, all _validation_ takes place during metadata updates, because that is the only time a user can react to validation errors. 47 | 48 | ## Metadata for shipments 49 | 50 | The `shipper` uses the metadata stored in the ERC directory `.erc` to start a shipment of data or metadata to third-party repositories. 51 | It does not do any updating, brokering, or validation. 52 | 53 | ## Metadata mappings 54 | 55 | **destination** | **model** | **format(s)** | **description** 56 | ------ | ------ | ------ | ------ 57 | [//]: # (`datacite` | [DataCite Metadata Schema 4.1](http://schema.datacite.org/meta/kernel-4.1/) | `xml` | for metadata export) 58 | [//]: # (`datacite` | [DataCite Metadata Schema 3.1](http://schema.datacite.org/meta/kernel-3.1/) | `xml` | (still in wide spread use for OAI-PMH)) 59 | [//]: # (`ORCID` (TBD) | [XML for orcid-works](https://members.orcid.org/api/xml-orcid-works) | `xml` | for adding ERC as works to an ORCID profile) 60 | [//]: # (`CRIS` (TBD) | (local adaptation of the [CERIF model](http://www.eurocris.org/cerif/main-features-cerif) | `xml` | ...) 61 | `b2share` | using o2r schema for the o2r community depositions on [b2share](https://b2share.eudat.eu/) | `json` | ... 62 | `codemeta` | [codemeta 2.0-rc](https://github.com/codemeta/codemeta/tree/2.0-rc) | `json ld` | ... 63 | `zenodo` | [Deposition metadata](https://zenodo.org/dev#collapse-list16) | `json` | for storing full ERC in the Zenodo data repository; Zenodo also publishes metadata on [DataCite](https://datacite.org/) 64 | -------------------------------------------------------------------------------- /docs/user-scenarios.md: -------------------------------------------------------------------------------- 1 | # User scenarios 2 | 3 | ## Andrea the author and reader 4 | 5 | Andrea turned 29 this year. 6 | She is always up for a joke and a pot of coffee but is also quite impatient. 7 | Especially if she has to wait for others or if she hasn’t had any progress for a while. 8 | However, currently Andrea does her Ph.D in the field of geosciences. 9 | Two years ago she decided to go for a cumulative dissertation, meaning she publishes scientific papers throughout his graduation and summarizes them at the end. 10 | She already published his first paper a few months ago which is good, actually. 11 | One of the reviewers was interested in the data and the source-code in order to reproduce the results. 12 | 13 | After a few hours of searching (remember she is not one of the most patient), she finally finds some files which include the dataset and also the source-code in R (a statistics program). 14 | Just a short try if it still working... 15 | weird, the results are different. 16 | Just a short look into the paper... 17 | The configuration is different than the one described in the method section. 18 | Well, just few manipulations and - still not working. 19 | 20 | Although she submitted the paper just a few months ago, she can’t remember the exact configuration for the results in the paper. 21 | Fortunately, submitting data and code was not mandatory. 22 | But Andrea knows she made a mistake. 23 | 24 | More and more journals expect their authors to submit data and source-code which underlie the research findings. 25 | For this reason, she wants to change her working behavior and to keep data and code files better under control. 26 | She remembers her last research work which was quite unstructured, maybe already messy. 27 | Code and data was distributed over several folders and even computers. 28 | She had so search for them for quite a while. 29 | Moreover, some components do not work anymore. 30 | 31 | This time, she wants to do it better and searches for a great tool assisting her workflow. 32 | She just heard about a new website supporting reproducible research. 33 | It allows to upload all necessary files and to create a so called "container" which is "executable" - whatever that means. 34 | It even verifies the results in the paper making it possible to detect errors immediately. 35 | Of course it also contains common features like sharing the publication with other authors. 36 | On top of that, Andrea can also benefit from other publications. 37 | As the website automatically generates a number of meta information, new search capabilities arise. 38 | It is not only possible to search for other publications by using keywords, but also by using spatial and temporal properties and constraints. 39 | It is even possible to constrain the search to hypotheses and research questions having certain vocabulary, thus simplifying search for related work. 40 | Andrea is quite impressed! She easily finds related papers around her own work. 41 | She gets a good overview about existing research questions making it easier to identify a research gap she can focus on. 42 | Andrea doesn’t even have to implement all the code lines for her statistical analysis from scratch, but can build upon existing. 43 | While reading some of the related papers in the browser, she realizes a couple of user interface widgets besides the incorporated figures. 44 | He doesn’t know them from traditional, static papers which are typically published as .pdf-files. 45 | Andrea recognizes that the widgets allow to interact with the diagrams to which the widgets belong. 46 | They allow to change, for example, thresholds, input variables and constants. 47 | She is thus able to check the assumptions and conclusions underlying the paper. 48 | She is a bit overwhelmed by the number of new features, such as exchanging the dataset or the source-code underlying the paper. 49 | Andrea is quite happy about her new tool. 50 | It provides support for structured work, finding related publications, algorithms and datasets, identifying a research gap, and even tools for interacting with traditional, static papers. 51 | So, let’s go for the second paper. 52 | 53 | ## Arthur the administrator 54 | 55 | Arthur works as a system administrator in a large university library in Germany. 56 | He's quite happy with his job. 57 | After working as freelance software developer for over 20 years, he now enjoys the challenge to make all the different servers and applications under his care work like a charm 24/7 while having a stable paycheck and reasonable working hours. 58 | He is particularly proud that, since he took over the job, he successfully migrated all services to a private cloud infrastructure and enabled https-only traffic on all, event the internal, APIs and websites. 59 | Since then, there has been minimal overtime for him and close to 0 minutes downtime for the services... 60 | and a raise! 61 | 62 | Arthur is interested in this new reproducibility service which the head of the library is interested in, but he is sceptical about all new systems. 63 | There are going to be bugs, unforseen problems, and a lot of testing ""in production"", which he does not like. 64 | But he knows scientists have been in touch with the library before about archiving data and software, so if this is a high priority for his customers, as he sees them, there is no way around it. 65 | 66 | At second look though, he realizes the project seems to have all the basics straight for a stable and scalable deployment: All components are published under open source licenses, and the project maintainers provide different ready-to-use Docker images. 67 | Arthur worries about security, so there is no better way to make sure things work well than source code access. 68 | The project is written in a language he has not used before himself, but he can actually build the project himself from source with the provided instructions. 69 | He also understands that, instead of reinventing the wheel, the developers seem to be competent enough to build upon established libraries. 70 | 71 | The Dockerfiles are great to play around with, but also easy to integrate in his own server management solution. 72 | He also likes the HTTP APIs and the setup and configuration, which seem to be very well documented. 73 | This should make it easy to integrate the new solution with some custom tools he developed, but also with some legacy infrastructure he has not yet been able to get rid off. 74 | He does worry a bit about the scheduling solution, since he is not very keen on Docker containers being started automatically on his servers. 75 | Good thing the project contributors seem to operate a public chat, and professional support is also available at reasonable prices. 76 | 77 | After some testing, he feels good to tell his colleagues: looks good to me, let's try this out! 78 | 79 | ## Olivia the operator 80 | 81 | Olivia is the chancellor of a mid-size state owned university in the US. 82 | She is proud to have been elected to this position a few years ago, and works very hard each day to improve both the university's reputation and the working and learning conditions of her employees and students. 83 | She had to make some unpleasant first hand experiences with aspects of todays academic life, some of which sadly became almost normal: budget cuts, violence on campus, and plagiarism scandals. 84 | During all of these upsets, she is happy she never wavered on the importance of personal integrity and credibility of each and every one of the scientists and researchers working on her campus. 85 | 86 | To gain some ground in the competition with other universities, Olivia puts her best assistant on the job of finding the newest trends in academia. 87 | Soon enough she presents to her the idea of making all research conducted at the university reproducible. 88 | Olivia is first surprised by the fact, being an arts major herself, as she thought that is already the case. 89 | She starts reading the material provided to hear and realizes science, and especially something called computational science, is very much different from the practical work she has encountered during her years as a researcher. 90 | It also becomes clear it won't work to just put out a statement forcing every lab to spend enormous efforts on changing established research practices, or to re-do what has been done 5, 10 or 20 years ago. 91 | The huge variety of labs and workflows and all the different kinds of people... 92 | getting out the stick simply won't work. 93 | But maybe the carrot will? 94 | 95 | She discovers a novel website. 96 | It promises to solve all the problems of reproducibility. 97 | The people behind it seem competent enough to her, but again she asks her assistant to consult with experts from the university library and computer science departments to see what they think. 98 | A lengthy discussion starts, and there seems to be no consensus after months of meetings and evaluations. 99 | The assistant doesn't know what to report back to Olivia. 100 | Eventually, Olivia is tired of waiting and joins a few of the meetings of the expert group as an observer. 101 | She realizes nothing comes for free... 102 | she encourages the expert group to create a list of requirements on establishing a reproducibility website for the university. 103 | She quickly understands they might get the proper time and money to do it, because the lecturers and staff in the group realize they won't just get more work to do! 104 | 105 | Olivia makes the new website a matter for the boss. 106 | She successfully acquires the funds to start and maintain both the technical services and to hire support staff to maintain it. 107 | Beyond that, the supports staff is even equipped to provided consultancy services to all researchers at the university. 108 | These services quickly become popular across all disciplines working with data and code, and after just a few months, more and more fully reproducible papers appear on the public reproducibility website. 109 | Olivia is very glad to see the changes she introduced did not have an impact on the scientific output of the university - the monthly statistics tell her that much. 110 | Is the quality or quantity of the output going to increase? It's too soon to tell, but Olivia is sure it will. 111 | Just last week, the head of the programme reported to her that now ten papers are available on the website for which researchers from different university departments collaborated, who never collaborated before - they discovered the overlap through the new system! More than 20 undergraduate courses teaching scientific methods incorporated material from the website into their course schedule, and 50% of the graduate theses from the computer science department are now using the university reproducibility tools. 112 | Those are good enough signs for Olivia. 113 | She decides to pitch an idea to the university board: let's include reproducibility of publications as an evaluation factor for the budget allocations next year. 114 | You got to use the stick from time to time to make people appreciate the carrot. 115 | 116 | ## Carl the curator 117 | 118 | Carl works as a digital assets curator at a university library in Germany. 119 | He has been working as a librarian for about ten years and experienced the digital transformation of the field, which is why he specialized in the area of digital curation and archiving. 120 | He is qualified to manage and organize several collections of digital objects at a given time and recently selected objects for an exhibition of gold standard open access publications in the software category of his institutions catalog front page. 121 | 122 | Carl’s expertise encompasses the management of accessibility levels as well as the preservation of file integrity and meta data curation. 123 | Since he discovered a growing interest in the preservation of software, he realized reproducibility of research findings, including code and data increases the value and visibility of his university’s portfolio. 124 | As a result, Carl is working closely with the library’s team for Research Data Management, in order to facilitate integration of reproducible computational environments into the digital objects' life cycle. 125 | This work matches their current policies. 126 | 127 | As he strongly believes publicly funded research data are public goods, Carl values his profession as a vital point of intersection between researchers, librarians und the general public. 128 | Therefore, when planning a selection of digital assets or curating the library’s catalogs, Carl enjoys the interoperability provided by international metadata standards and linked open data vocabularies. 129 | 130 | ## Polly the publisher 131 | 132 | Polly is the head of a large publishing firm for scientific journals. 133 | She grew up being part of a publisher family, the third of four kids. 134 | While her older brothers wrestled with the family legacy, she has always been close to her late grandfather, who started the publishing business as a young man. 135 | So it came as no surprise she studied arts and library science and after a few well planned career steps around the globe, she joined the family business as assistant of her father and became CEO after a few years, a decision she rarely regrets. 136 | 137 | Though there is one thing making her job challenging every day: technology's high development speed. 138 | For a large publishing business, it is hard to keep up with new and modern technology. 139 | She has to serve both old (in more than one way) customers and employers, who have had a long relationship and a work environment and processes which have developed and settled in over many years. 140 | On the other hand, she sees new ideas by entrepreneurs and startups almost every week, some crazy and some rightfully called revolutionary, who experiment with new ways to publish science without the baggage of a reputation and hundreds of journals and an order of magnitude more employees. 141 | 142 | So what should Polly do? Scramble up some money to acquire a few startups and replace the existing review and authoring solution? Fire all staff members who are too slow adopting the new technologies? Close journals with an excellent reputation because editors and reviewers are not tech-savvy? 143 | 144 | Obviously, none of these were an option. 145 | Change had to come gradually and inclusively, not in a disruptive fashion. 146 | Polly turned to her CTO Charlotte. 147 | She joined the company recently and played out to be a very good hire, as she was able to revive the in-house development team with a positive attitude and a few key hires. 148 | Charlotte is aware of the challenges and agrees to compile an action plan from her perspective. 149 | A few weeks later, she presents the options to Polly and the other board members. 150 | She suggests to adopt an open service for interactive publications, which is an integrated solution for hosting and archiving data as well as code, all of which are often part of publications these days. 151 | It is open source, but of course it does not come for free. 152 | Charlotte suggests a combined approach of experiments by her own staff and external consulting by the original developers of the software. 153 | And she quickly mitigates all concerns raised by the other CxOs: the website is customizable, so it will not look like the competitors versions, it is extensible, so their few "cool features" which have been developed over the last years will be easy to integrate, and it is compatible with the existing data repository (so no need to replace that beast of a software). 154 | This new website would be an option presented to all editors to adopt for their journals. 155 | Education of the company's staff would precede this offer to make sure the intended message is spread: don't be left behind, challenge reviewers and authors to improve the quality of the journals and subsequently raise the bar for high quality open science. 156 | 157 | ## Richard the reviewer 158 | 159 | Richard is a successful researcher. 160 | After getting tenure a few years back, he embraces the chance to support students and collaborate with other scientists instead of hunting for the next easy publication to get his name on. 161 | A big part of his time is taken up by his membership in the editorial boards of two journals and his engagement with several more journals as a reviewer. 162 | 163 | Richard is "senior" in some ways, and he as well as his colleagues know his value lies in experience, not in hunting the latest hot new things. 164 | Therefore Richard never came around to catch up practically with the latest technologies, and while he has a good understanding of computer science and used to be a very capable programmer, this new stuff the kids are doing is beyond his means. 165 | 166 | As the next paper review request lands in his inbox, he skims the abstract and soon thinks "I will never be able to thoroughly evaluate this work, the code must be too complex to run on my machine". 167 | But the content is so interesting! What a shame. 168 | He almost replies with a negative answer and then sees a new link at the bottom of the notification. 169 | The publisher must have added a new feature. 170 | The link's title is "Click here to examine and manipulate code and data". 171 | 172 | Richard clicks the link. 173 | He is taken to a website looking partially similar to the old review system he is used to. 174 | One the one side there is the well-known article view where he can read, add highlights and make comments. 175 | But on the other side, there is a new menu he enthusiastically explores. 176 | It allows him to edit parameters and re-run analysis of the paper! 177 | Without even downloading any data or code. 178 | He immediately sees the benefits: What a relief for his work, and what a chance to dig deeper into the article and conduct a thorough review. 179 | 180 | After some brief inspections of the article figures and manipulation of some parameters, Richard feels confident he can actually do the review properly. 181 | He let's the editor know about his decision and wants to dive right back into the article, but then stops himself. 182 | First, he writes an email to his fellow editors about this new review system for evaluating code and data - they need it for their journal, too. 183 | 184 | ## Rachel the reader 185 | 186 | Rachel is a second year graduate student in geoinformatics. 187 | She's eager to learn and has left all struggles with the technical side of research, and has become a trusted programmer in her group and is seen as an expert in more than one programming language. 188 | 189 | When she starts one of her final courses in advanced geoinformatics, the lecturer sends out a long list of reading material. 190 | _How is she supposed to get through all of it?_ 191 | Never faltering, she starts reading all the documents... 192 | 193 | After the third article, she is annoyed and underwhelmed by the fancy descriptions and high-level diagrams. 194 | Although they all make sense, she feels like there is more to see and understand than is presented in the article. 195 | 196 | She shares her thoughts with her teacher Teresa during the next seminar. 197 | Teresa can relate to Rachel's frustration and quickly points her to items 8 and 9 on the reading list. 198 | "These are different", she says. 199 | 200 | Rachel gets back to reading. 201 | The next articles start out the same as the others, but she soon realizes something is different. 202 | The website takes a bit longer to load, and the graphics do not seem like they are compressed images at all. 203 | She needs some time to explore the relatively complex navigation, but then is excited to discover she can read and even download all the code and data which was used to generate the figures. 204 | Even more, she can interact with the present methods and play around with the algorithms. 205 | Finally she can immediately test her own understanding, challenge her criticism, and resolve misunderstandings. 206 | 207 | She plays around with the articles on the website for a little while and spends a lot longer on trying to understand the bits and pieces. 208 | Eventually she sees a close relation of one aspect of the analysis with the research project she though about doing for her thesis. 209 | Rachel is enthusiastic and directly downloads the whole article with its code to her own laptop to try the code out with her own dataset. 210 | -------------------------------------------------------------------------------- /docs/zenodo.md: -------------------------------------------------------------------------------- 1 | # Zenodo integration 2 | 3 | Zenodo, and other data repositories, provide a way to create a collection of records. 4 | At Zenodo these are called _communities. 5 | In the context of o2r, we have created a community [`o2r`](https://zenodo.org/communities/o2r) (also in the [sandbox](https://zenodo.org/communities/o2r)). 6 | 7 | **Issues** 8 | 9 | - There is no semi-automated workflow for adding a Zenodo record to a community 10 | - A curation policy should guide the community management 11 | 12 | ## Workflow 13 | 14 | - communities metadata element in [Zenodo metadata](http://developers.zenodo.org/#representation) can be used to trigger the workflow only: 15 | > _List of communities you wish the deposition to appear. The owner of the community will be notified, and can either accept or reject your request. Each array element is an object with the attributes:_ 16 | - someone needs to curate and "accept" / "approve" ERCs as Zenodo records upon submission, there is no API function for that. 17 | - [UI integration](https://github.com/o2r-project/o2r-platform/issues/180) 18 | 19 | ## Curation policy 20 | 21 | We need to write a policy for that community where all ERC are collected by default when shipped to Zenodo. 22 | They must be in line with http://about.zenodo.org/policies/ and http://about.zenodo.org/terms/. 23 | 24 | **Content for the policy** 25 | 26 | - the records must be a valid ERC 27 | -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: o2r Architecture 2 | site_description: Opening Reproducible Research System Architecture 3 | site_author: "Daniel Nüst, Matthias Schutzeichel, Jörg Lorenz" 4 | copyright: "Licensed under Creative Commons CC0 1.0 Universal License." 5 | nav: 6 | - Architecture: index.md 7 | - 'User scenarios': user-scenarios.md 8 | - Glossary: glossary.md 9 | - Metadata: metadata.md 10 | - Zenodo integration: zenodo.md 11 | theme: 12 | name: readthedocs 13 | custom_dir: custom_theme 14 | site_author: o2r project 15 | site_dir: site 16 | site_url: https://o2r.info/architecture/ 17 | extra_css: [arc42.css] 18 | repo_url: https://github.com/o2r-project/architecture 19 | repo_name: GitHub 20 | edit_uri: tree/master/docs/ 21 | markdown_extensions: 22 | - markdown_include.include 23 | - markdown.extensions.admonition 24 | - def_list 25 | - toc: 26 | permalink: true 27 | --------------------------------------------------------------------------------