├── .coveragerc
├── .github
    └── workflows
    │   ├── build-docs.yml
    │   ├── publish-pypi.yml
    │   └── run-tests.yml
├── .gitignore
├── .pre-commit-config.yaml
├── .readthedocs.yml
├── AUTHORS.md
├── CHANGELOG.md
├── CONTRIBUTING.md
├── LICENSE.txt
├── README.md
├── docs
    ├── Makefile
    ├── _static
    │   └── .gitignore
    ├── authors.md
    ├── changelog.md
    ├── conf.py
    ├── contributing.md
    ├── index.md
    ├── license.md
    ├── readme.md
    ├── requirements.txt
    └── tutorial.md
├── lib
    ├── CMakeLists.txt
    └── src
    │   └── rdswrapper.cpp
├── pyproject.toml
├── setup.cfg
├── setup.py
├── src
    └── rds2py
    │   ├── PyRdsReader.py
    │   ├── __init__.py
    │   ├── generics.py
    │   ├── rdsutils.py
    │   ├── read_atomic.py
    │   ├── read_delayed_matrix.py
    │   ├── read_dict.py
    │   ├── read_factor.py
    │   ├── read_frame.py
    │   ├── read_granges.py
    │   ├── read_mae.py
    │   ├── read_matrix.py
    │   ├── read_rle.py
    │   ├── read_sce.py
    │   └── read_se.py
├── tests
    ├── conftest.py
    ├── data
    │   ├── atomic_attr.rds
    │   ├── atomic_chars.rds
    │   ├── atomic_chars_unicode.rds
    │   ├── atomic_complex.rds
    │   ├── atomic_double.rds
    │   ├── atomic_ints.rds
    │   ├── atomic_ints_with_names.rds
    │   ├── atomic_logical.rds
    │   ├── atomic_logical_wNA.rds
    │   ├── atomic_raw.rds
    │   ├── data.frame.rds
    │   ├── example_anndata.h5ad
    │   ├── generate_files.R
    │   ├── granges.rds
    │   ├── grangeslist.rds
    │   ├── h5sparse.rds
    │   ├── lists.rds
    │   ├── lists_df.rds
    │   ├── lists_df_rownames.rds
    │   ├── lists_nested.rds
    │   ├── lists_nested_deep.rds
    │   ├── matrix_with_dim_names.rds
    │   ├── matrix_with_row_names.rds
    │   ├── numpy_dtype.rds
    │   ├── ranged_se.rds
    │   ├── s4_class.rds
    │   ├── s4_dense_matrix.rds
    │   ├── s4_matrix.rds
    │   ├── s4_matrix_dgt.rds
    │   ├── scalar_int.rds
    │   ├── simple_factors.rds
    │   ├── simple_list.rds
    │   ├── simple_mae.rds
    │   ├── simple_rle.rds
    │   ├── simple_sce.rds
    │   └── sumexpt.rds
    ├── test_atomics.py
    ├── test_delayedmatrices.py
    ├── test_dict.py
    ├── test_factors.py
    ├── test_frames.py
    ├── test_granges.py
    ├── test_mae.py
    ├── test_matrices.py
    ├── test_rle.py
    ├── test_s4.py
    ├── test_sce.py
    └── test_se.py
└── tox.ini


/.coveragerc:
--------------------------------------------------------------------------------
 1 | # .coveragerc to control coverage.py
 2 | [run]
 3 | branch = True
 4 | source = rds2py
 5 | # omit = bad_file.py
 6 | 
 7 | [paths]
 8 | source =
 9 |     src/
10 |     */site-packages/
11 | 
12 | [report]
13 | # Regexes for lines to exclude from consideration
14 | exclude_lines =
15 |     # Have to re-enable the standard pragma
16 |     pragma: no cover
17 | 
18 |     # Don't complain about missing debug-only code:
19 |     def __repr__
20 |     if self\.debug
21 | 
22 |     # Don't complain if tests don't hit defensive assertion code:
23 |     raise AssertionError
24 |     raise NotImplementedError
25 | 
26 |     # Don't complain if non-runnable code isn't run:
27 |     if 0:
28 |     if __name__ == .__main__.:
29 | 


--------------------------------------------------------------------------------
/.github/workflows/build-docs.yml:
--------------------------------------------------------------------------------
 1 | name: Build documentation
 2 | 
 3 | on:
 4 |   push:
 5 |     tags:
 6 |       - "*"
 7 | 
 8 | jobs:
 9 |   test:
10 |     name: Build docs
11 |     runs-on: ubuntu-latest
12 | 
13 |     steps:
14 |       - uses: actions/checkout@v4
15 | 
16 |       - name: Set up Python 3.12
17 |         uses: actions/setup-python@v5
18 |         with:
19 |           python-version: 3.12
20 |           cache: 'pip'
21 | 
22 |       - name: Install Python dependencies
23 |         run: |
24 |           python -m pip install --upgrade pip setuptools
25 |           pip install cmake pybind11 numpy tox
26 | 
27 |       - name: Build docs
28 |         run: |
29 |           python setup.py build_ext --inplace
30 |           cp build/lib*/rds2py/lib_rds_parser* src/rds2py/
31 |           tox -e docs
32 |           touch ./docs/_build/html/.nojekyll
33 | 
34 |       - name: GH Pages Deployment
35 |         if: github.ref == 'refs/heads/master' || startsWith(github.ref, 'refs/tags/')
36 |         uses: JamesIves/github-pages-deploy-action@v4
37 |         with:
38 |           branch: gh-pages # The branch the action should deploy to.
39 |           folder: ./docs/_build/html
40 |           clean: true # Automatically remove deleted files from the deploy branch
41 | 


--------------------------------------------------------------------------------
/.github/workflows/publish-pypi.yml:
--------------------------------------------------------------------------------
 1 | name: Publish to PyPI
 2 | 
 3 | on:
 4 |   push:
 5 |     tags:
 6 |       - "*"
 7 | 
 8 | jobs:
 9 |   build_wheels:
10 |     name: Build wheels on ${{ matrix.os }}
11 | 
12 |     runs-on: ${{ matrix.os }}
13 | 
14 |     strategy:
15 |       matrix:
16 |         # macos-13 is an intel runner, higher macos's are apple silicon
17 |         # At some point, maybe get this to work on windows-latest
18 |         os: [ubuntu-latest, macos-13, macos-latest]
19 | 
20 |     steps:
21 |       - uses: actions/checkout@v4
22 |         with:
23 |           submodules: true
24 | 
25 |       - name: Build wheels
26 |         uses: pypa/cibuildwheel@v2.22.0
27 |         env:
28 |           CIBW_ARCHS_LINUX: x86_64 # remove this later so we build for all linux archs
29 |           CIBW_PROJECT_REQUIRES_PYTHON: ">=3.9"
30 |           CIBW_SKIP: pp*
31 | 
32 |       - uses: actions/upload-artifact@v4
33 |         with:
34 |           name: cibw-wheels-${{ matrix.os }}-${{ strategy.job-index }}
35 |           path: ./wheelhouse/*.whl
36 | 
37 |   build_sdist:
38 |     name: Build source distribution
39 |     runs-on: ubuntu-latest
40 |     steps:
41 |       - uses: actions/checkout@v4
42 | 
43 |       - name: Build sdist
44 |         run: pipx run build --sdist
45 | 
46 |       - uses: actions/upload-artifact@v4
47 |         with:
48 |           name: cibw-sdist
49 |           path: dist/*.tar.gz
50 | 
51 |   upload_pypi:
52 |     needs: [build_wheels, build_sdist]
53 |     runs-on: ubuntu-latest
54 |     permissions:
55 |       id-token: write
56 |       repository-projects: write
57 |       contents: write
58 |       pages: write
59 | 
60 |     steps:
61 |       - uses: actions/download-artifact@v4
62 |         with:
63 |           pattern: cibw-*
64 |           path: dist
65 |           merge-multiple: true
66 | 
67 |       # This uses the trusted publisher workflow so no token is required.
68 |       - name: Publish to PyPI
69 |         uses: pypa/gh-action-pypi-publish@release/v1
70 | 


--------------------------------------------------------------------------------
/.github/workflows/run-tests.yml:
--------------------------------------------------------------------------------
 1 | name: Test the library
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - master
 7 |   pull_request:
 8 | 
 9 | jobs:
10 |   test:
11 |     runs-on: ubuntu-latest
12 |     strategy:
13 |       matrix:
14 |         python-version: [ '3.9', '3.10', '3.11', '3.12', '3.13' ]
15 | 
16 |     name: Python ${{ matrix.python-version }}
17 |     steps:
18 |       - uses: actions/checkout@v4
19 |         with:
20 |           submodules: true
21 | 
22 |       - name: Setup Python
23 |         uses: actions/setup-python@v5
24 |         with:
25 |           python-version: ${{ matrix.python-version }}
26 |           cache: 'pip'
27 | 
28 |       - name: Get latest CMake
29 |         uses: lukka/get-cmake@latest
30 | 
31 |       - name: Test with tox
32 |         run: |
33 |           pip install tox
34 |           tox
35 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Temporary and binary files
 2 | *~
 3 | *.py[cod]
 4 | *.so
 5 | *.cfg
 6 | !.isort.cfg
 7 | !setup.cfg
 8 | *.orig
 9 | *.log
10 | *.pot
11 | __pycache__/*
12 | .cache/*
13 | .*.swp
14 | */.ipynb_checkpoints/*
15 | .DS_Store
16 | 
17 | # Project files
18 | .ropeproject
19 | .project
20 | .pydevproject
21 | .settings
22 | .idea
23 | .vscode
24 | tags
25 | 
26 | # Package files
27 | *.egg
28 | *.eggs/
29 | .installed.cfg
30 | *.egg-info
31 | 
32 | # Unittest and coverage
33 | htmlcov/*
34 | .coverage
35 | .coverage.*
36 | .tox
37 | junit*.xml
38 | coverage.xml
39 | .pytest_cache/
40 | 
41 | # Build and docs folder/files
42 | build/*
43 | dist/*
44 | sdist/*
45 | docs/api/*
46 | docs/_rst/*
47 | docs/_build/*
48 | cover/*
49 | MANIFEST
50 | 
51 | # Per-project virtualenvs
52 | .venv*/
53 | .conda*/
54 | .python-version
55 | 
56 | extern/rds2cpp*
57 | src/rds2py/lib/parser.cpp
58 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | exclude: '^docs/conf.py'
 2 | 
 3 | repos:
 4 | - repo: https://github.com/pre-commit/pre-commit-hooks
 5 |   rev: v5.0.0
 6 |   hooks:
 7 |   - id: trailing-whitespace
 8 |   - id: check-added-large-files
 9 |   - id: check-ast
10 |   - id: check-json
11 |   - id: check-merge-conflict
12 |   - id: check-xml
13 |   - id: check-yaml
14 |   - id: debug-statements
15 |   - id: end-of-file-fixer
16 |   - id: requirements-txt-fixer
17 |   - id: mixed-line-ending
18 |     args: ['--fix=auto']  # replace 'auto' with 'lf' to enforce Linux/Mac line endings or 'crlf' for Windows
19 | 
20 | # - repo: https://github.com/PyCQA/docformatter
21 | #   rev: "v1.7.5"
22 | #   hooks:
23 | #     - id: docformatter
24 | #       additional_dependencies: [tomli]
25 | #       args: [--in-place, --wrap-descriptions=120, --wrap-summaries=120]
26 | #       # --config, ./pyproject.toml
27 | 
28 | # - repo: https://github.com/psf/black
29 | #   rev: 24.8.0
30 | #   hooks:
31 | #   - id: black
32 | #     language_version: python3
33 | 
34 | - repo: https://github.com/astral-sh/ruff-pre-commit
35 |   # Ruff version.
36 |   rev: v0.11.10
37 |   hooks:
38 |     - id: ruff
39 |       args: [--fix, --exit-non-zero-on-fix]
40 |     # Run the formatter.
41 |     - id: ruff-format
42 | 
43 | ## If like to embrace black styles even in the docs:
44 | # - repo: https://github.com/asottile/blacken-docs
45 | #   rev: v1.13.0
46 | #   hooks:
47 | #   - id: blacken-docs
48 | #     additional_dependencies: [black]
49 | 
50 | ## Check for misspells in documentation files:
51 | # - repo: https://github.com/codespell-project/codespell
52 | #   rev: v2.2.5
53 | #   hooks:
54 | #   - id: codespell
55 | 


--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
 1 | # Read the Docs configuration file
 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 3 | 
 4 | # Required
 5 | version: 2
 6 | 
 7 | # Build documentation in the docs/ directory with Sphinx
 8 | sphinx:
 9 |   configuration: docs/conf.py
10 | 
11 | # Build documentation with MkDocs
12 | #mkdocs:
13 | #  configuration: mkdocs.yml
14 | 
15 | # Optionally build your docs in additional formats such as PDF
16 | formats:
17 |   - pdf
18 | 
19 | python:
20 |   version: 3.8
21 |   install:
22 |     - requirements: docs/requirements.txt
23 |     - {path: ., method: pip}
24 | 


--------------------------------------------------------------------------------
/AUTHORS.md:
--------------------------------------------------------------------------------
1 | # Contributors
2 | 
3 | * Jayaram Kancherla [jayaram.kancherla@gmail.com](mailto:jayaram.kancherla@gmail.com)
4 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # Changelog
 2 | 
 3 | ## Version 0.7.0 - 0.7.3
 4 | 
 5 | - All dependencies are now listed under optional, except for numpy and biocutils.
 6 | - Pin the version of byteme.
 7 | - Fix an issue when trying to access shape of the `MatrixWrapper` objects.
 8 | - Fix bug sanitizing empty data frame like objects.
 9 | 
10 | ## Version 0.6.1
11 | 
12 | - Fix name of the attribute that contains names of dimensions in matrices.
13 | - Update relevant tests and generate new rds files to test matrix behavior.
14 | 
15 | ## Version 0.6.0
16 | 
17 | - chore: Remove Python 3.8 (EOL).
18 | - precommit: Replace docformatter with ruff's formatter.
19 | 
20 | ## Version 0.5.1
21 | 
22 | - Added parser for delayed sparse objects backed by H5
23 | 
24 | ## Version 0.5.0
25 | 
26 | - Complete overhaul of the codebase using pybind11
27 | - Streamlined readers for R data types
28 | - Updated API for all classes and methods
29 | - Updated documentation and tests.
30 | 
31 | ## Version 0.4.5
32 | 
33 | - Switch to pybind11 to implementing the bindings to rds2cpp.
34 | - Update tests, documentation and actions.
35 | - Fix github issue with showing incorrect package version on github pages.
36 | 
37 | ## Version 0.4.4
38 | 
39 | - Add methods to parse RDS files containing `GenomicRangesList`
40 | - Fix bug in reading strand information; mostly RLE vectors.
41 | - Update tests and documentation
42 | 
43 | ## Version 0.4.0 - 0.4.3
44 | 
45 | - Migrate to the new class implementations
46 | - Add reader for objects containing genomic ranges
47 | 
48 | ## Version 0.3.0
49 | 
50 | This release migrates the package to a more palatable Google's Python style guide. A major modification to the package is with casing, all `camelCase` properties, methods, functions and parameters are now `snake_case`.
51 | 
52 | In addition, docstrings and documentation has been updated to use sphinx's features of linking objects to their types. Sphinx now also documents private and special dunder methods (e.g. `__getitem__`, `__copy__` etc). Intersphinx has been updated to link to references from dependent packages.
53 | 
54 | Configuration for flake8, ruff and black has been added to pyproject.toml and setup.cfg to be less annoying.
55 | 
56 | Finally, pyscaffold has been updated to use "myst-parser" as the markdown compiler instead of recommonmark. As part of the pyscaffold setup, one may use pre-commits to run some of the routine tasks of linting and formatting before every commit. While this is sometimes annoying and can be ignored with `--no-verify`, it brings some consistency to the code base.
57 | 
58 | ## Version 0.1
59 | 
60 | - First implementation
61 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
  1 | ```{todo} THIS IS SUPPOSED TO BE AN EXAMPLE. MODIFY IT ACCORDING TO YOUR NEEDS!
  2 | 
  3 |    The document assumes you are using a source repository service that promotes a
  4 |    contribution model similar to [GitHub's fork and pull request workflow].
  5 |    While this is true for the majority of services (like GitHub, GitLab,
  6 |    BitBucket), it might not be the case for private repositories (e.g., when
  7 |    using Gerrit).
  8 | 
  9 |    Also notice that the code examples might refer to GitHub URLs or the text
 10 |    might use GitHub specific terminology (e.g., *Pull Request* instead of *Merge
 11 |    Request*).
 12 | 
 13 |    Please make sure to check the document having these assumptions in mind
 14 |    and update things accordingly.
 15 | ```
 16 | 
 17 | ```{todo} Provide the correct links/replacements at the bottom of the document.
 18 | ```
 19 | 
 20 | ```{todo} You might want to have a look on [PyScaffold's contributor's guide],
 21 | 
 22 |    especially if your project is open source. The text should be very similar to
 23 |    this template, but there are a few extra contents that you might decide to
 24 |    also include, like mentioning labels of your issue tracker or automated
 25 |    releases.
 26 | ```
 27 | 
 28 | # Contributing
 29 | 
 30 | Welcome to `rds2py` contributor's guide.
 31 | 
 32 | This document focuses on getting any potential contributor familiarized with
 33 | the development processes, but [other kinds of contributions] are also appreciated.
 34 | 
 35 | If you are new to using [git] or have never collaborated in a project previously,
 36 | please have a look at [contribution-guide.org]. Other resources are also
 37 | listed in the excellent [guide created by FreeCodeCamp] [^contrib1].
 38 | 
 39 | Please notice, all users and contributors are expected to be **open,
 40 | considerate, reasonable, and respectful**. When in doubt,
 41 | [Python Software Foundation's Code of Conduct] is a good reference in terms of
 42 | behavior guidelines.
 43 | 
 44 | ## Issue Reports
 45 | 
 46 | If you experience bugs or general issues with `rds2py`, please have a look
 47 | on the [issue tracker].
 48 | If you don't see anything useful there, please feel free to fire an issue report.
 49 | 
 50 | :::{tip}
 51 | Please don't forget to include the closed issues in your search.
 52 | Sometimes a solution was already reported, and the problem is considered
 53 | **solved**.
 54 | :::
 55 | 
 56 | New issue reports should include information about your programming environment
 57 | (e.g., operating system, Python version) and steps to reproduce the problem.
 58 | Please try also to simplify the reproduction steps to a very minimal example
 59 | that still illustrates the problem you are facing. By removing other factors,
 60 | you help us to identify the root cause of the issue.
 61 | 
 62 | ## Documentation Improvements
 63 | 
 64 | You can help improve `rds2py` docs by making them more readable and coherent, or
 65 | by adding missing information and correcting mistakes.
 66 | 
 67 | `rds2py` documentation uses [Sphinx] as its main documentation compiler.
 68 | This means that the docs are kept in the same repository as the project code, and
 69 | that any documentation update is done in the same way was a code contribution.
 70 | 
 71 | ```{todo} Don't forget to mention which markup language you are using.
 72 | 
 73 |     e.g.,  [reStructuredText] or [CommonMark] with [MyST] extensions.
 74 | ```
 75 | 
 76 | ```{todo} If your project is hosted on GitHub, you can also mention the following tip:
 77 | 
 78 |    :::{tip}
 79 |       Please notice that the [GitHub web interface] provides a quick way of
 80 |       propose changes in `rds2py`'s files. While this mechanism can
 81 |       be tricky for normal code contributions, it works perfectly fine for
 82 |       contributing to the docs, and can be quite handy.
 83 | 
 84 |       If you are interested in trying this method out, please navigate to
 85 |       the `docs` folder in the source [repository], find which file you
 86 |       would like to propose changes and click in the little pencil icon at the
 87 |       top, to open [GitHub's code editor]. Once you finish editing the file,
 88 |       please write a message in the form at the bottom of the page describing
 89 |       which changes have you made and what are the motivations behind them and
 90 |       submit your proposal.
 91 |    :::
 92 | ```
 93 | 
 94 | When working on documentation changes in your local machine, you can
 95 | compile them using [tox] :
 96 | 
 97 | ```
 98 | tox -e docs
 99 | ```
100 | 
101 | and use Python's built-in web server for a preview in your web browser
102 | (`http://localhost:8000`):
103 | 
104 | ```
105 | python3 -m http.server --directory 'docs/_build/html'
106 | ```
107 | 
108 | ## Code Contributions
109 | 
110 | ```{todo} Please include a reference or explanation about the internals of the project.
111 | 
112 |    An architecture description, design principles or at least a summary of the
113 |    main concepts will make it easy for potential contributors to get started
114 |    quickly.
115 | ```
116 | 
117 | ### Submit an issue
118 | 
119 | Before you work on any non-trivial code contribution it's best to first create
120 | a report in the [issue tracker] to start a discussion on the subject.
121 | This often provides additional considerations and avoids unnecessary work.
122 | 
123 | ### Create an environment
124 | 
125 | Before you start coding, we recommend creating an isolated [virtual environment]
126 | to avoid any problems with your installed Python packages.
127 | This can easily be done via either [virtualenv]:
128 | 
129 | ```
130 | virtualenv <PATH TO VENV>
131 | source <PATH TO VENV>/bin/activate
132 | ```
133 | 
134 | or [Miniconda]:
135 | 
136 | ```
137 | conda create -n rds2py python=3 six virtualenv pytest pytest-cov
138 | conda activate rds2py
139 | ```
140 | 
141 | ### Clone the repository
142 | 
143 | 1. Create an user account on GitHub if you do not already have one.
144 | 
145 | 2. Fork the project [repository]: click on the *Fork* button near the top of the
146 |    page. This creates a copy of the code under your account on GitHub.
147 | 
148 | 3. Clone this copy to your local disk:
149 | 
150 |    ```
151 |    git clone git@github.com:YourLogin/rds2py.git
152 |    cd rds2py
153 |    ```
154 | 
155 | 4. You should run:
156 | 
157 |    ```
158 |    pip install -U pip setuptools -e .
159 |    ```
160 | 
161 |    to be able to import the package under development in the Python REPL.
162 | 
163 |    ```{todo} if you are not using pre-commit, please remove the following item:
164 |    ```
165 | 
166 | 5. Install [pre-commit]:
167 | 
168 |    ```
169 |    pip install pre-commit
170 |    pre-commit install
171 |    ```
172 | 
173 |    `rds2py` comes with a lot of hooks configured to automatically help the
174 |    developer to check the code being written.
175 | 
176 | ### Implement your changes
177 | 
178 | 1. Create a branch to hold your changes:
179 | 
180 |    ```
181 |    git checkout -b my-feature
182 |    ```
183 | 
184 |    and start making changes. Never work on the main branch!
185 | 
186 | 2. Start your work on this branch. Don't forget to add [docstrings] to new
187 |    functions, modules and classes, especially if they are part of public APIs.
188 | 
189 | 3. Add yourself to the list of contributors in `AUTHORS.rst`.
190 | 
191 | 4. When you’re done editing, do:
192 | 
193 |    ```
194 |    git add <MODIFIED FILES>
195 |    git commit
196 |    ```
197 | 
198 |    to record your changes in [git].
199 | 
200 |    ```{todo} if you are not using pre-commit, please remove the following item:
201 |    ```
202 | 
203 |    Please make sure to see the validation messages from [pre-commit] and fix
204 |    any eventual issues.
205 |    This should automatically use [flake8]/[black] to check/fix the code style
206 |    in a way that is compatible with the project.
207 | 
208 |    :::{important}
209 |    Don't forget to add unit tests and documentation in case your
210 |    contribution adds an additional feature and is not just a bugfix.
211 | 
212 |    Moreover, writing a [descriptive commit message] is highly recommended.
213 |    In case of doubt, you can check the commit history with:
214 | 
215 |    ```
216 |    git log --graph --decorate --pretty=oneline --abbrev-commit --all
217 |    ```
218 | 
219 |    to look for recurring communication patterns.
220 |    :::
221 | 
222 | 5. Please check that your changes don't break any unit tests with:
223 | 
224 |    ```
225 |    tox
226 |    ```
227 | 
228 |    (after having installed [tox] with `pip install tox` or `pipx`).
229 | 
230 |    You can also use [tox] to run several other pre-configured tasks in the
231 |    repository. Try `tox -av` to see a list of the available checks.
232 | 
233 | ### Submit your contribution
234 | 
235 | 1. If everything works fine, push your local branch to the remote server with:
236 | 
237 |    ```
238 |    git push -u origin my-feature
239 |    ```
240 | 
241 | 2. Go to the web page of your fork and click "Create pull request"
242 |    to send your changes for review.
243 | 
244 |    ```{todo} if you are using GitHub, you can uncomment the following paragraph
245 | 
246 |       Find more detailed information in [creating a PR]. You might also want to open
247 |       the PR as a draft first and mark it as ready for review after the feedbacks
248 |       from the continuous integration (CI) system or any required fixes.
249 | 
250 |    ```
251 | 
252 | ### Troubleshooting
253 | 
254 | The following tips can be used when facing problems to build or test the
255 | package:
256 | 
257 | 1. Make sure to fetch all the tags from the upstream [repository].
258 |    The command `git describe --abbrev=0 --tags` should return the version you
259 |    are expecting. If you are trying to run CI scripts in a fork repository,
260 |    make sure to push all the tags.
261 |    You can also try to remove all the egg files or the complete egg folder, i.e.,
262 |    `.eggs`, as well as the `*.egg-info` folders in the `src` folder or
263 |    potentially in the root of your project.
264 | 
265 | 2. Sometimes [tox] misses out when new dependencies are added, especially to
266 |    `setup.cfg` and `docs/requirements.txt`. If you find any problems with
267 |    missing dependencies when running a command with [tox], try to recreate the
268 |    `tox` environment using the `-r` flag. For example, instead of:
269 | 
270 |    ```
271 |    tox -e docs
272 |    ```
273 | 
274 |    Try running:
275 | 
276 |    ```
277 |    tox -r -e docs
278 |    ```
279 | 
280 | 3. Make sure to have a reliable [tox] installation that uses the correct
281 |    Python version (e.g., 3.7+). When in doubt you can run:
282 | 
283 |    ```
284 |    tox --version
285 |    # OR
286 |    which tox
287 |    ```
288 | 
289 |    If you have trouble and are seeing weird errors upon running [tox], you can
290 |    also try to create a dedicated [virtual environment] with a [tox] binary
291 |    freshly installed. For example:
292 | 
293 |    ```
294 |    virtualenv .venv
295 |    source .venv/bin/activate
296 |    .venv/bin/pip install tox
297 |    .venv/bin/tox -e all
298 |    ```
299 | 
300 | 4. [Pytest can drop you] in an interactive session in the case an error occurs.
301 |    In order to do that you need to pass a `--pdb` option (for example by
302 |    running `tox -- -k <NAME OF THE FALLING TEST> --pdb`).
303 |    You can also setup breakpoints manually instead of using the `--pdb` option.
304 | 
305 | ## Maintainer tasks
306 | 
307 | ### Releases
308 | 
309 | ```{todo} This section assumes you are using PyPI to publicly release your package.
310 | 
311 |    If instead you are using a different/private package index, please update
312 |    the instructions accordingly.
313 | ```
314 | 
315 | If you are part of the group of maintainers and have correct user permissions
316 | on [PyPI], the following steps can be used to release a new version for
317 | `rds2py`:
318 | 
319 | 1. Make sure all unit tests are successful.
320 | 2. Tag the current commit on the main branch with a release tag, e.g., `v1.2.3`.
321 | 3. Push the new tag to the upstream [repository],
322 |    e.g., `git push upstream v1.2.3`
323 | 4. Clean up the `dist` and `build` folders with `tox -e clean`
324 |    (or `rm -rf dist build`)
325 |    to avoid confusion with old builds and Sphinx docs.
326 | 5. Run `tox -e build` and check that the files in `dist` have
327 |    the correct version (no `.dirty` or [git] hash) according to the [git] tag.
328 |    Also check the sizes of the distributions, if they are too big (e.g., >
329 |    500KB), unwanted clutter may have been accidentally included.
330 | 6. Run `tox -e publish -- --repository pypi` and check that everything was
331 |    uploaded to [PyPI] correctly.
332 | 
333 | [^contrib1]: Even though, these resources focus on open source projects and
334 |     communities, the general ideas behind collaborating with other developers
335 |     to collectively create software are general and can be applied to all sorts
336 |     of environments, including private companies and proprietary code bases.
337 | 
338 | 
339 | [black]: https://pypi.org/project/black/
340 | [commonmark]: https://commonmark.org/
341 | [contribution-guide.org]: http://www.contribution-guide.org/
342 | [creating a pr]: https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request
343 | [descriptive commit message]: https://chris.beams.io/posts/git-commit
344 | [docstrings]: https://www.sphinx-doc.org/en/master/usage/extensions/napoleon.html
345 | [first-contributions tutorial]: https://github.com/firstcontributions/first-contributions
346 | [flake8]: https://flake8.pycqa.org/en/stable/
347 | [git]: https://git-scm.com
348 | [github web interface]: https://docs.github.com/en/github/managing-files-in-a-repository/managing-files-on-github/editing-files-in-your-repository
349 | [github's code editor]: https://docs.github.com/en/github/managing-files-in-a-repository/managing-files-on-github/editing-files-in-your-repository
350 | [github's fork and pull request workflow]: https://guides.github.com/activities/forking/
351 | [guide created by freecodecamp]: https://github.com/freecodecamp/how-to-contribute-to-open-source
352 | [miniconda]: https://docs.conda.io/en/latest/miniconda.html
353 | [myst]: https://myst-parser.readthedocs.io/en/latest/syntax/syntax.html
354 | [other kinds of contributions]: https://opensource.guide/how-to-contribute
355 | [pre-commit]: https://pre-commit.com/
356 | [pypi]: https://pypi.org/
357 | [pyscaffold's contributor's guide]: https://pyscaffold.org/en/stable/contributing.html
358 | [pytest can drop you]: https://docs.pytest.org/en/stable/usage.html#dropping-to-pdb-python-debugger-at-the-start-of-a-test
359 | [python software foundation's code of conduct]: https://www.python.org/psf/conduct/
360 | [restructuredtext]: https://www.sphinx-doc.org/en/master/usage/restructuredtext/
361 | [sphinx]: https://www.sphinx-doc.org/en/master/
362 | [tox]: https://tox.readthedocs.io/en/stable/
363 | [virtual environment]: https://realpython.com/python-virtual-environments-a-primer/
364 | [virtualenv]: https://virtualenv.pypa.io/en/stable/
365 | 
366 | 
367 | ```{todo} Please review and change the following definitions:
368 | ```
369 | 
370 | [repository]: https://github.com/<USERNAME>/rds2py
371 | [issue tracker]: https://github.com/<USERNAME>/rds2py/issues
372 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2022 Genentech, Inc.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | [![Project generated with PyScaffold](https://img.shields.io/badge/-PyScaffold-005CA0?logo=pyscaffold)](https://pyscaffold.org/)
  2 | [![PyPI-Server](https://img.shields.io/pypi/v/rds2py.svg)](https://pypi.org/project/rds2py/)
  3 | ![Unit tests](https://github.com/BiocPy/rds2py/actions/workflows/run-tests.yml/badge.svg)
  4 | 
  5 | # rds2py
  6 | 
  7 | Parse and construct Python representations for datasets stored in RDS files. `rds2py` supports various base classes from R, and Bioconductor's `SummarizedExperiment` and `SingleCellExperiment` S4 classes. **_For more details, check out [rds2cpp library](https://github.com/LTLA/rds2cpp)._**
  8 | 
  9 | ---
 10 | 
 11 | **Version 0.5.0** brings major changes to the package,
 12 | 
 13 | - Complete overhaul of the codebase using pybind11
 14 | - Streamlined readers for R data types
 15 | - Updated API for all classes and methods
 16 | 
 17 | Please refer to the [documentation](https://biocpy.github.io/rds2py/) for the latest usage guidelines. Previous versions may have incompatible APIs.
 18 | 
 19 | ---
 20 | 
 21 | The package provides:
 22 | 
 23 | - Efficient parsing of RDS files with _minimal_ memory overhead
 24 | - Support for R's basic data types and complex S4 objects
 25 |   - Vectors (numeric, character, logical)
 26 |   - Factors
 27 |   - Data frames
 28 |   - Matrices (dense and sparse)
 29 |   - Run-length encoded vectors (Rle)
 30 | - Conversion to appropriate Python/NumPy/SciPy data structures
 31 |   - dgCMatrix (sparse column matrix)
 32 |   - dgRMatrix (sparse row matrix)
 33 |   - dgTMatrix (sparse triplet matrix)
 34 | - Preservation of metadata and attributes from R objects
 35 | - Integration with BiocPy ecosystem for Bioconductor classes
 36 |   - SummarizedExperiment
 37 |   - RangedSummarizedExperiment
 38 |   - SingleCellExperiment
 39 |   - GenomicRanges
 40 |   - MultiAssayExperiment
 41 | 
 42 | ## Installation
 43 | 
 44 | Package is published to [PyPI](https://pypi.org/project/rds2py/)
 45 | 
 46 | ```shell
 47 | pip install rds2py
 48 | 
 49 | # or install optional dependencies
 50 | pip install rds2py[optional]
 51 | ```
 52 | 
 53 | By default, the package does not install packages to convert python representations to BiocPy classes. Please consider installing all optional dependencies.
 54 | 
 55 | ## Usage
 56 | 
 57 | If you do not have an RDS object handy, feel free to download one from [single-cell-test-files](https://github.com/jkanche/random-test-files/releases).
 58 | 
 59 | ```python
 60 | from rds2py import read_rds
 61 | r_obj = read_rds("path/to/file.rds")
 62 | ```
 63 | 
 64 | The returned `r_obj` either returns an appropriate Python class if a parser is already implemented or returns the dictionary containing the data from the RDS file.
 65 | 
 66 | In addition, the package provides the dictionary representation of the RDS file.
 67 | 
 68 | ```python
 69 | from rds2py import parse_rds
 70 | 
 71 | robject_dict = parse_rds("path/to/file.rds")
 72 | print(robject_dict)
 73 | ```
 74 | 
 75 | ### Write-your-own-reader
 76 | 
 77 | Reading RDS files as dictionary representations allows users to write their own custom readers into appropriate Python representations.
 78 | 
 79 | ```python
 80 | from rds2py import parse_rds
 81 | 
 82 | robject = parse_rds("path/to/file.rds")
 83 | print(robject)
 84 | ```
 85 | 
 86 | if you know this RDS file contains an `GenomicRanges` object, you can use the built-in reader or write your own reader to convert this dictionary.
 87 | 
 88 | ```python
 89 | from rds2py.read_granges import read_genomic_ranges
 90 | 
 91 | gr = read_genomic_ranges(robject)
 92 | print(gr)
 93 | ```
 94 | 
 95 | ## Type Conversion Reference
 96 | 
 97 | | R Type     | Python/NumPy Type                    |
 98 | | ---------- | ------------------------------------ |
 99 | | numeric    | numpy.ndarray (float64)              |
100 | | integer    | numpy.ndarray (int32)                |
101 | | character  | list of str                          |
102 | | logical    | numpy.ndarray (bool)                 |
103 | | factor     | list                                 |
104 | | data.frame | BiocFrame                            |
105 | | matrix     | numpy.ndarray or scipy.sparse matrix |
106 | | dgCMatrix  | scipy.sparse.csc_matrix              |
107 | | dgRMatrix  | scipy.sparse.csr_matrix              |
108 | 
109 | ## Developer Notes
110 | 
111 | This project uses pybind11 to provide bindings to the rds2cpp library. Please make sure necessary C++ compiler is installed on your system.
112 | 
113 | <!-- pyscaffold-notes -->
114 | 
115 | ## Note
116 | 
117 | This project has been set up using PyScaffold 4.5. For details and usage
118 | information on PyScaffold see https://pyscaffold.org/.
119 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | AUTODOCDIR    = api
11 | 
12 | # User-friendly check for sphinx-build
13 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $?), 1)
14 | $(error "The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from https://sphinx-doc.org/")
15 | endif
16 | 
17 | .PHONY: help clean Makefile
18 | 
19 | # Put it first so that "make" without argument is like "make help".
20 | help:
21 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
22 | 
23 | clean:
24 | 	rm -rf $(BUILDDIR)/* $(AUTODOCDIR)
25 | 
26 | # Catch-all target: route all unknown targets to Sphinx using the new
27 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
28 | %: Makefile
29 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
30 | 


--------------------------------------------------------------------------------
/docs/_static/.gitignore:
--------------------------------------------------------------------------------
1 | # Empty directory
2 | 


--------------------------------------------------------------------------------
/docs/authors.md:
--------------------------------------------------------------------------------
1 | ```{include} ../AUTHORS.md
2 | :relative-docs: docs/
3 | :relative-images:
4 | ```
5 | 


--------------------------------------------------------------------------------
/docs/changelog.md:
--------------------------------------------------------------------------------
1 | ```{include} ../CHANGELOG.md
2 | :relative-docs: docs/
3 | :relative-images:
4 | ```
5 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | # This file is execfile()d with the current directory set to its containing dir.
  2 | #
  3 | # This file only contains a selection of the most common options. For a full
  4 | # list see the documentation:
  5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
  6 | #
  7 | # All configuration values have a default; values that are commented out
  8 | # serve to show the default.
  9 | 
 10 | import os
 11 | import sys
 12 | import shutil
 13 | 
 14 | # -- Path setup --------------------------------------------------------------
 15 | 
 16 | __location__ = os.path.dirname(__file__)
 17 | 
 18 | # If extensions (or modules to document with autodoc) are in another directory,
 19 | # add these directories to sys.path here. If the directory is relative to the
 20 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 21 | sys.path.insert(0, os.path.join(__location__, "../src"))
 22 | 
 23 | # -- Run sphinx-apidoc -------------------------------------------------------
 24 | # This hack is necessary since RTD does not issue `sphinx-apidoc` before running
 25 | # `sphinx-build -b html . _build/html`. See Issue:
 26 | # https://github.com/readthedocs/readthedocs.org/issues/1139
 27 | # DON'T FORGET: Check the box "Install your project inside a virtualenv using
 28 | # setup.py install" in the RTD Advanced Settings.
 29 | # Additionally it helps us to avoid running apidoc manually
 30 | 
 31 | try:  # for Sphinx >= 1.7
 32 |     from sphinx.ext import apidoc
 33 | except ImportError:
 34 |     from sphinx import apidoc
 35 | 
 36 | output_dir = os.path.join(__location__, "api")
 37 | module_dir = os.path.join(__location__, "../src/rds2py")
 38 | try:
 39 |     shutil.rmtree(output_dir)
 40 | except FileNotFoundError:
 41 |     pass
 42 | 
 43 | try:
 44 |     import sphinx
 45 | 
 46 |     cmd_line = f"sphinx-apidoc --implicit-namespaces -f -o {output_dir} {module_dir}"
 47 | 
 48 |     args = cmd_line.split(" ")
 49 |     if tuple(sphinx.__version__.split(".")) >= ("1", "7"):
 50 |         # This is a rudimentary parse_version to avoid external dependencies
 51 |         args = args[1:]
 52 | 
 53 |     apidoc.main(args)
 54 | except Exception as e:
 55 |     print("Running `sphinx-apidoc` failed!\n{}".format(e))
 56 | 
 57 | # -- General configuration ---------------------------------------------------
 58 | 
 59 | # If your documentation needs a minimal Sphinx version, state it here.
 60 | # needs_sphinx = '1.0'
 61 | 
 62 | # Add any Sphinx extension module names here, as strings. They can be extensions
 63 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
 64 | extensions = [
 65 |     "sphinx.ext.autodoc",
 66 |     "sphinx.ext.intersphinx",
 67 |     "sphinx.ext.todo",
 68 |     "sphinx.ext.autosummary",
 69 |     "sphinx.ext.viewcode",
 70 |     "sphinx.ext.coverage",
 71 |     "sphinx.ext.doctest",
 72 |     "sphinx.ext.ifconfig",
 73 |     "sphinx.ext.mathjax",
 74 |     "sphinx.ext.napoleon",
 75 |     "sphinx_autodoc_typehints",
 76 | ]
 77 | 
 78 | # Add any paths that contain templates here, relative to this directory.
 79 | templates_path = ["_templates"]
 80 | 
 81 | 
 82 | # Enable markdown
 83 | extensions.append("myst_parser")
 84 | 
 85 | # Configure MyST-Parser
 86 | myst_enable_extensions = [
 87 |     "amsmath",
 88 |     "colon_fence",
 89 |     "deflist",
 90 |     "dollarmath",
 91 |     "html_image",
 92 |     "linkify",
 93 |     "replacements",
 94 |     "smartquotes",
 95 |     "substitution",
 96 |     "tasklist",
 97 | ]
 98 | 
 99 | # The suffix of source filenames.
100 | source_suffix = [".rst", ".md"]
101 | 
102 | # The encoding of source files.
103 | # source_encoding = 'utf-8-sig'
104 | 
105 | # The master toctree document.
106 | master_doc = "index"
107 | 
108 | # General information about the project.
109 | project = "rds2py"
110 | copyright = "2023, jkanche"
111 | 
112 | # The version info for the project you're documenting, acts as replacement for
113 | # |version| and |release|, also used in various other places throughout the
114 | # built documents.
115 | #
116 | # version: The short X.Y version.
117 | # release: The full version, including alpha/beta/rc tags.
118 | # If you don’t need the separation provided between version and release,
119 | # just set them both to the same value.
120 | try:
121 |     from rds2py import __version__ as version
122 | except ImportError:
123 |     version = ""
124 | 
125 | if not version or version.lower() == "unknown":
126 |     version = os.getenv("READTHEDOCS_VERSION", "unknown")  # automatically set by RTD
127 | 
128 | release = version
129 | 
130 | # The language for content autogenerated by Sphinx. Refer to documentation
131 | # for a list of supported languages.
132 | # language = None
133 | 
134 | # There are two options for replacing |today|: either, you set today to some
135 | # non-false value, then it is used:
136 | # today = ''
137 | # Else, today_fmt is used as the format for a strftime call.
138 | # today_fmt = '%B %d, %Y'
139 | 
140 | # List of patterns, relative to source directory, that match files and
141 | # directories to ignore when looking for source files.
142 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", ".venv"]
143 | 
144 | # The reST default role (used for this markup: `text`) to use for all documents.
145 | # default_role = None
146 | 
147 | # If true, '()' will be appended to :func: etc. cross-reference text.
148 | # add_function_parentheses = True
149 | 
150 | # If true, the current module name will be prepended to all description
151 | # unit titles (such as .. function::).
152 | # add_module_names = True
153 | 
154 | # If true, sectionauthor and moduleauthor directives will be shown in the
155 | # output. They are ignored by default.
156 | # show_authors = False
157 | 
158 | # The name of the Pygments (syntax highlighting) style to use.
159 | pygments_style = "sphinx"
160 | 
161 | # A list of ignored prefixes for module index sorting.
162 | # modindex_common_prefix = []
163 | 
164 | # If true, keep warnings as "system message" paragraphs in the built documents.
165 | # keep_warnings = False
166 | 
167 | # If this is True, todo emits a warning for each TODO entries. The default is False.
168 | todo_emit_warnings = True
169 | 
170 | autodoc_default_options = {
171 |     'special-members': True,
172 |     'undoc-members': False,
173 |     'exclude-members': '__weakref__, __dict__, __str__, __module__, __init__'
174 | }
175 | 
176 | autosummary_generate = True
177 | autosummary_imported_members = True
178 | 
179 | 
180 | # -- Options for HTML output -------------------------------------------------
181 | 
182 | # The theme to use for HTML and HTML Help pages.  See the documentation for
183 | # a list of builtin themes.
184 | html_theme = "furo"
185 | 
186 | # Theme options are theme-specific and customize the look and feel of a theme
187 | # further.  For a list of options available for each theme, see the
188 | # documentation.
189 | html_theme_options = {
190 |     "sidebar_width": "300px",
191 |     "page_width": "1200px"
192 | }
193 | 
194 | # Add any paths that contain custom themes here, relative to this directory.
195 | # html_theme_path = []
196 | 
197 | # The name for this set of Sphinx documents.  If None, it defaults to
198 | # "<project> v<release> documentation".
199 | # html_title = None
200 | 
201 | # A shorter title for the navigation bar.  Default is the same as html_title.
202 | # html_short_title = None
203 | 
204 | # The name of an image file (relative to this directory) to place at the top
205 | # of the sidebar.
206 | # html_logo = ""
207 | 
208 | # The name of an image file (within the static path) to use as favicon of the
209 | # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
210 | # pixels large.
211 | # html_favicon = None
212 | 
213 | # Add any paths that contain custom static files (such as style sheets) here,
214 | # relative to this directory. They are copied after the builtin static files,
215 | # so a file named "default.css" will overwrite the builtin "default.css".
216 | html_static_path = ["_static"]
217 | 
218 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
219 | # using the given strftime format.
220 | # html_last_updated_fmt = '%b %d, %Y'
221 | 
222 | # If true, SmartyPants will be used to convert quotes and dashes to
223 | # typographically correct entities.
224 | # html_use_smartypants = True
225 | 
226 | # Custom sidebar templates, maps document names to template names.
227 | # html_sidebars = {}
228 | 
229 | # Additional templates that should be rendered to pages, maps page names to
230 | # template names.
231 | # html_additional_pages = {}
232 | 
233 | # If false, no module index is generated.
234 | # html_domain_indices = True
235 | 
236 | # If false, no index is generated.
237 | # html_use_index = True
238 | 
239 | # If true, the index is split into individual pages for each letter.
240 | # html_split_index = False
241 | 
242 | # If true, links to the reST sources are added to the pages.
243 | # html_show_sourcelink = True
244 | 
245 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
246 | # html_show_sphinx = True
247 | 
248 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
249 | # html_show_copyright = True
250 | 
251 | # If true, an OpenSearch description file will be output, and all pages will
252 | # contain a <link> tag referring to it.  The value of this option must be the
253 | # base URL from which the finished HTML is served.
254 | # html_use_opensearch = ''
255 | 
256 | # This is the file name suffix for HTML files (e.g. ".xhtml").
257 | # html_file_suffix = None
258 | 
259 | # Output file base name for HTML help builder.
260 | htmlhelp_basename = "rds2py-doc"
261 | 
262 | 
263 | # -- Options for LaTeX output ------------------------------------------------
264 | 
265 | latex_elements = {
266 |     # The paper size ("letterpaper" or "a4paper").
267 |     # "papersize": "letterpaper",
268 |     # The font size ("10pt", "11pt" or "12pt").
269 |     # "pointsize": "10pt",
270 |     # Additional stuff for the LaTeX preamble.
271 |     # "preamble": "",
272 | }
273 | 
274 | # Grouping the document tree into LaTeX files. List of tuples
275 | # (source start file, target name, title, author, documentclass [howto/manual]).
276 | latex_documents = [
277 |     ("index", "user_guide.tex", "rds2py Documentation", "jkanche", "manual")
278 | ]
279 | 
280 | # The name of an image file (relative to this directory) to place at the top of
281 | # the title page.
282 | # latex_logo = ""
283 | 
284 | # For "manual" documents, if this is true, then toplevel headings are parts,
285 | # not chapters.
286 | # latex_use_parts = False
287 | 
288 | # If true, show page references after internal links.
289 | # latex_show_pagerefs = False
290 | 
291 | # If true, show URL addresses after external links.
292 | # latex_show_urls = False
293 | 
294 | # Documents to append as an appendix to all manuals.
295 | # latex_appendices = []
296 | 
297 | # If false, no module index is generated.
298 | # latex_domain_indices = True
299 | 
300 | # -- External mapping --------------------------------------------------------
301 | python_version = ".".join(map(str, sys.version_info[0:2]))
302 | intersphinx_mapping = {
303 |     "sphinx": ("https://www.sphinx-doc.org/en/master", None),
304 |     "python": ("https://docs.python.org/" + python_version, None),
305 |     "matplotlib": ("https://matplotlib.org", None),
306 |     "numpy": ("https://numpy.org/doc/stable", None),
307 |     "sklearn": ("https://scikit-learn.org/stable", None),
308 |     "pandas": ("https://pandas.pydata.org/pandas-docs/stable", None),
309 |     "scipy": ("https://docs.scipy.org/doc/scipy/reference", None),
310 |     "setuptools": ("https://setuptools.pypa.io/en/stable/", None),
311 |     "pyscaffold": ("https://pyscaffold.org/en/stable", None),
312 |     "singelcellexperiment": ("https://biocpy.github.io/SingleCellExperiment", None),
313 |     "summarizedexperiment": ("https://biocpy.github.io/SummarizedExperiment", None),
314 | }
315 | 
316 | print(f"loading configurations for {project} {version} ...", file=sys.stderr)


--------------------------------------------------------------------------------
/docs/contributing.md:
--------------------------------------------------------------------------------
1 | ```{include} ../CONTRIBUTING.md
2 | :relative-docs: docs/
3 | :relative-images:
4 | ```
5 | 


--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
 1 | # rds2py
 2 | 
 3 | Parse, extract and create Python representations for datasets stored in RDS files. It supports Bioconductor's `SummarizedExperiment` and `SingleCellExperiment` objects. This is possible because of [Aaron's rds2cpp library](https://github.com/LTLA/rds2cpp).
 4 | 
 5 | The package uses memory views (except for strings) so that we can access the same memory from C++ space in Python (through Cython of course). This is especially useful for large datasets so we don't make copies of data.
 6 | 
 7 | ## Install
 8 | 
 9 | Package is published to [PyPI](https://pypi.org/project/rds2py/)
10 | 
11 | ```shell
12 | pip install rds2py
13 | ```
14 | 
15 | ## Contents
16 | 
17 | ```{toctree}
18 | :maxdepth: 2
19 | 
20 | Overview <readme>
21 | Tutorial <tutorial>
22 | Contributions & Help <contributing>
23 | License <license>
24 | Authors <authors>
25 | Changelog <changelog>
26 | Module Reference <api/modules>
27 | ```
28 | 
29 | ## Indices and tables
30 | 
31 | * {ref}`genindex`
32 | * {ref}`modindex`
33 | * {ref}`search`
34 | 
35 | [Sphinx]: http://www.sphinx-doc.org/
36 | [Markdown]: https://daringfireball.net/projects/markdown/
37 | [reStructuredText]: http://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html
38 | [MyST]: https://myst-parser.readthedocs.io/en/latest/
39 | 


--------------------------------------------------------------------------------
/docs/license.md:
--------------------------------------------------------------------------------
1 | # License
2 | 
3 | ```{literalinclude} ../LICENSE.txt
4 | :language: text
5 | ```
6 | 


--------------------------------------------------------------------------------
/docs/readme.md:
--------------------------------------------------------------------------------
1 | ```{include} ../README.md
2 | :relative-docs: docs/
3 | :relative-images:
4 | ```
5 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | furo
2 | # Requirements file for ReadTheDocs, check .readthedocs.yml.
3 | # To build the module reference correctly, make sure every external package
4 | # under `install_requires` in `setup.cfg` is also listed here!
5 | # sphinx_rtd_theme
6 | myst-parser[linkify]
7 | sphinx>=3.2.1
8 | sphinx-autodoc-typehints
9 | 


--------------------------------------------------------------------------------
/docs/tutorial.md:
--------------------------------------------------------------------------------
 1 | # Tutorial
 2 | 
 3 | If you do not have an RDS object handy, feel free to download one from [single-cell-test-files](https://github.com/jkanche/random-test-files/releases).
 4 | 
 5 | ### Basic Usage
 6 | 
 7 | ```python
 8 | from rds2py import read_rds
 9 | r_obj = read_rds("path/to/file.rds")
10 | ```
11 | 
12 | The returned `r_obj` either returns an appropriate Python class if a parser is already implemented or returns the dictionary containing the data from the RDS file.
13 | 
14 | ## Write-your-own-reader
15 | 
16 | In addition, the package provides the dictionary representation of the RDS file, allowing users to write their own custom readers into appropriate Python representations.
17 | 
18 | ```python
19 | from rds2py import parse_rds
20 | 
21 | data = parse_rds("path/to/file.rds")
22 | print(data)
23 | 
24 | # now write your own parser to convert this dictionary.
25 | ```
26 | 
27 | ## Type Conversion Reference
28 | 
29 | | R Type | Python/NumPy Type |
30 | |--------|------------------|
31 | | numeric | numpy.ndarray (float64) |
32 | | integer | numpy.ndarray (int32) |
33 | | character | list of str |
34 | | logical | numpy.ndarray (bool) |
35 | | factor | list |
36 | | data.frame | BiocFrame |
37 | | matrix | numpy.ndarray or scipy.sparse matrix |
38 | | dgCMatrix | scipy.sparse.csc_matrix |
39 | | dgRMatrix | scipy.sparse.csr_matrix |
40 | 
41 | Check out the module reference for more information on these classes.
42 | 


--------------------------------------------------------------------------------
/lib/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.24)
 2 | 
 3 | project(rds2py
 4 |     VERSION 1.0.0
 5 |     DESCRIPTION "Building the rds shared library"
 6 |     LANGUAGES CXX)
 7 | 
 8 | # Importing all of the dependencies with pinned versions (even for transitive dependencies).
 9 | include(FetchContent)
10 | 
11 | FetchContent_Declare(
12 |   rds2cpp
13 |   GIT_REPOSITORY https://github.com/LTLA/rds2cpp
14 |   GIT_TAG v1.1.0
15 | )
16 | 
17 | FetchContent_Declare(
18 |   byteme
19 |   GIT_REPOSITORY https://github.com/LTLA/byteme
20 |   GIT_TAG v1.2.2
21 | )
22 | 
23 | FetchContent_MakeAvailable(byteme)
24 | FetchContent_MakeAvailable(rds2cpp)
25 | 
26 | # Defining the targets.
27 | set(TARGET rds2py)
28 | 
29 | find_package(pybind11 CONFIG)
30 | 
31 | # pybind11 method:
32 | pybind11_add_module(${TARGET}
33 |     src/rdswrapper.cpp
34 | )
35 | 
36 | set_property(TARGET ${TARGET} PROPERTY CXX_STANDARD 17)
37 | 
38 | target_link_libraries(${TARGET} PRIVATE rds2cpp pybind11::pybind11)
39 | 
40 | set_target_properties(${TARGET} PROPERTIES
41 |     OUTPUT_NAME lib_rds_parser
42 |     PREFIX ""
43 | )
44 | 


--------------------------------------------------------------------------------
/lib/src/rdswrapper.cpp:
--------------------------------------------------------------------------------
  1 | #include <pybind11/pybind11.h>
  2 | #include <pybind11/stl.h>
  3 | #include <pybind11/numpy.h>
  4 | #include <rds2cpp/rds2cpp.hpp>
  5 | #include <stdexcept>
  6 | #include <pybind11/iostream.h>
  7 | 
  8 | namespace py = pybind11;
  9 | 
 10 | class RdsReader {
 11 | private:
 12 |     const rds2cpp::RObject* ptr;
 13 | 
 14 | public:
 15 |     RdsReader(const rds2cpp::RObject* p) : ptr(p) {
 16 |         if (!p) throw std::runtime_error("Null pointer passed to 'RdsReader'.");
 17 |     }
 18 | 
 19 |     std::string get_rtype() const {
 20 |         if (!ptr) throw std::runtime_error("Null pointer in 'get_rtype'.");
 21 |         // py::print("arg::", static_cast<int>(ptr->type()));
 22 |         switch (ptr->type()) {
 23 |             case rds2cpp::SEXPType::S4: return "S4";
 24 |             case rds2cpp::SEXPType::INT: return "integer";
 25 |             case rds2cpp::SEXPType::REAL: return "double";
 26 |             case rds2cpp::SEXPType::STR: return "string";
 27 |             case rds2cpp::SEXPType::LGL: return "boolean";
 28 |             case rds2cpp::SEXPType::VEC: return "vector";
 29 |             case rds2cpp::SEXPType::NIL: return "null";
 30 |             default: return "other";
 31 |         }
 32 |     }
 33 | 
 34 |     int get_rsize() const {
 35 |         if (!ptr) throw std::runtime_error("Null pointer in 'get_rsize'.");
 36 |         switch (ptr->type()) {
 37 |             case rds2cpp::SEXPType::INT: return static_cast<const rds2cpp::IntegerVector*>(ptr)->data.size();
 38 |             case rds2cpp::SEXPType::REAL: return static_cast<const rds2cpp::DoubleVector*>(ptr)->data.size();
 39 |             case rds2cpp::SEXPType::STR: return static_cast<const rds2cpp::StringVector*>(ptr)->data.size();
 40 |             case rds2cpp::SEXPType::LGL: return static_cast<const rds2cpp::LogicalVector*>(ptr)->data.size();
 41 |             case rds2cpp::SEXPType::VEC: return static_cast<const rds2cpp::GenericVector*>(ptr)->data.size();
 42 |             default: return -1;
 43 |         }
 44 |     }
 45 | 
 46 |     py::array get_numeric_data() const {
 47 |         if (!ptr) throw std::runtime_error("Null pointer in 'get_numeric_data'.");
 48 |         switch (ptr->type()) {
 49 |             case rds2cpp::SEXPType::INT: {
 50 |                 const auto& data = static_cast<const rds2cpp::IntegerVector*>(ptr)->data;
 51 |                 return py::array_t<int32_t>({data.size()}, {sizeof(int32_t)}, data.data());
 52 |             }
 53 |             case rds2cpp::SEXPType::LGL: {
 54 |                 const auto& data = static_cast<const rds2cpp::LogicalVector*>(ptr)->data;
 55 |                 return py::array_t<int32_t>({data.size()}, {sizeof(int32_t)}, data.data());
 56 |             }
 57 |             case rds2cpp::SEXPType::REAL: {
 58 |                 const auto& data = static_cast<const rds2cpp::DoubleVector*>(ptr)->data;
 59 |                 return py::array_t<double>({data.size()}, {sizeof(double)}, data.data());
 60 |             }
 61 |             default:
 62 |                 throw std::runtime_error("Invalid type for numeric data");
 63 |         }
 64 |     }
 65 | 
 66 |     py::list get_string_arr() const {
 67 |         if (!ptr) throw std::runtime_error("Null pointer in 'get_string_arr'.");
 68 |         if (ptr->type() != rds2cpp::SEXPType::STR) {
 69 |             throw std::runtime_error("Invalid type for 'string_arr'");
 70 |         }
 71 |         const auto& data = static_cast<const rds2cpp::StringVector*>(ptr)->data;
 72 |         return py::cast(data);
 73 |     }
 74 | 
 75 |     py::list get_attribute_names() const {
 76 |         if (!ptr) throw std::runtime_error("Null pointer in 'get_attribute_names'");
 77 |         return py::cast(get_attributes().names);
 78 |     }
 79 | 
 80 |     py::object load_attribute_by_name(const std::string& name) const {
 81 |         if (!ptr) throw std::runtime_error("Null pointer in 'load_attribute_by_name'");
 82 |         const auto& attributes = get_attributes();
 83 |         auto it = std::find(attributes.names.begin(), attributes.names.end(), name);
 84 |         if (it == attributes.names.end()) {
 85 |             throw std::runtime_error("Attribute not found: " + name);
 86 |         }
 87 |         size_t index = std::distance(attributes.names.begin(), it);
 88 |         return py::cast(new RdsReader(attributes.values[index].get()));
 89 |     }
 90 | 
 91 |     py::object load_vec_element(int index) const {
 92 |         if (!ptr) throw std::runtime_error("Null pointer in 'load_vec_element'");
 93 |         if (ptr->type() != rds2cpp::SEXPType::VEC) {
 94 |             throw std::runtime_error("Not a vector type");
 95 |         }
 96 |         const auto& data = static_cast<const rds2cpp::GenericVector*>(ptr)->data;
 97 |         if (index < 0 || static_cast<size_t>(index) >= data.size()) {
 98 |             throw std::out_of_range("Vector index out of range");
 99 |         }
100 |         return py::cast(new RdsReader(data[index].get()));
101 |     }
102 | 
103 |     std::string get_package_name() const {
104 |         if (!ptr || ptr->type() != rds2cpp::SEXPType::S4) {
105 |             throw std::runtime_error("Not an S4 object");
106 |         }
107 |         return static_cast<const rds2cpp::S4Object*>(ptr)->package_name;
108 |     }
109 | 
110 |     std::string get_class_name() const {
111 |         if (!ptr || ptr->type() != rds2cpp::SEXPType::S4) {
112 |             throw std::runtime_error("Not an S4 object");
113 |         }
114 |         return static_cast<const rds2cpp::S4Object*>(ptr)->class_name;
115 |     }
116 | 
117 |     std::pair<size_t, size_t> get_dimensions() const {
118 |         if (!ptr || ptr->type() != rds2cpp::SEXPType::INT) {
119 |             throw std::runtime_error("Dimensions must be integer");
120 |         }
121 |         const auto& dims = static_cast<const rds2cpp::IntegerVector*>(ptr)->data;
122 |         if (dims.size() != 2 || dims[0] < 0 || dims[1] < 0) {
123 |             throw std::runtime_error("Invalid dimensions");
124 |         }
125 |         return {static_cast<size_t>(dims[0]), static_cast<size_t>(dims[1])};
126 |     }
127 | 
128 | private:
129 |     const rds2cpp::Attributes& get_attributes() const {
130 |         if (!ptr) throw std::runtime_error("Null pointer in get_attributes");
131 |         switch (ptr->type()) {
132 |             case rds2cpp::SEXPType::INT: return static_cast<const rds2cpp::IntegerVector*>(ptr)->attributes;
133 |             case rds2cpp::SEXPType::REAL: return static_cast<const rds2cpp::DoubleVector*>(ptr)->attributes;
134 |             case rds2cpp::SEXPType::LGL: return static_cast<const rds2cpp::LogicalVector*>(ptr)->attributes;
135 |             case rds2cpp::SEXPType::STR: return static_cast<const rds2cpp::StringVector*>(ptr)->attributes;
136 |             case rds2cpp::SEXPType::VEC: return static_cast<const rds2cpp::GenericVector*>(ptr)->attributes;
137 |             case rds2cpp::SEXPType::S4: return static_cast<const rds2cpp::S4Object*>(ptr)->attributes;
138 |             default: throw std::runtime_error("Unsupported type for attributes");
139 |         }
140 |     }
141 | };
142 | 
143 | class RdsObject {
144 | private:
145 |     std::unique_ptr<rds2cpp::Parsed> parsed;
146 |     std::unique_ptr<RdsReader> reader;
147 | 
148 | public:
149 |     RdsObject(const std::string& file) {
150 |         try {
151 |             parsed = std::make_unique<rds2cpp::Parsed>(rds2cpp::parse_rds(file));
152 |             if (!parsed || !parsed->object) {
153 |                 throw std::runtime_error("Failed to parse RDS file");
154 |             }
155 |             reader = std::make_unique<RdsReader>(parsed->object.get());
156 |         } catch (const std::exception& e) {
157 |             throw std::runtime_error(std::string("Error in 'RdsObject' constructor: ") + e.what());
158 |         }
159 |     }
160 | 
161 |     RdsReader* get_robject() const {
162 |         if (!reader) throw std::runtime_error("Null reader in 'get_robject'");
163 |         return reader.get();
164 |     }
165 | };
166 | 
167 | PYBIND11_MODULE(lib_rds_parser, m) {
168 |     py::register_exception<std::runtime_error>(m, "RdsParserError");
169 | 
170 |     py::class_<RdsObject>(m, "RdsObject")
171 |         .def(py::init<const std::string&>())
172 |         .def("get_robject", &RdsObject::get_robject, py::return_value_policy::reference_internal);
173 | 
174 |     py::class_<RdsReader>(m, "RdsReader")
175 |         .def(py::init<const rds2cpp::RObject*>())
176 |         .def("get_rtype", &RdsReader::get_rtype)
177 |         .def("get_rsize", &RdsReader::get_rsize)
178 |         .def("get_numeric_data", &RdsReader::get_numeric_data)
179 |         .def("get_string_arr", &RdsReader::get_string_arr)
180 |         .def("get_attribute_names", &RdsReader::get_attribute_names)
181 |         .def("load_attribute_by_name", &RdsReader::load_attribute_by_name)
182 |         .def("load_vec_element", &RdsReader::load_vec_element)
183 |         .def("get_package_name", &RdsReader::get_package_name)
184 |         .def("get_class_name", &RdsReader::get_class_name)
185 |         .def("get_dimensions", &RdsReader::get_dimensions);
186 | }
187 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | # AVOID CHANGING REQUIRES: IT WILL BE UPDATED BY PYSCAFFOLD!
 3 | requires = ["setuptools>=46.1.0", "setuptools_scm[toml]>=5", "cmake", "pybind11", "numpy"]
 4 | build-backend = "setuptools.build_meta"
 5 | 
 6 | [tool.setuptools_scm]
 7 | # See configuration details in https://github.com/pypa/setuptools_scm
 8 | version_scheme = "no-guess-dev"
 9 | 
10 | [tool.ruff]
11 | line-length = 120
12 | src = ["src"]
13 | exclude = ["tests"]
14 | extend-ignore = ["F821"]
15 | 
16 | [tool.ruff.pydocstyle]
17 | convention = "google"
18 | 
19 | [tool.ruff.per-file-ignores]
20 | "__init__.py" = ["E402", "F401"]
21 | 
22 | [tool.black]
23 | force-exclude = "__init__.py"
24 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
  1 | # This file is used to configure your project.
  2 | # Read more about the various options under:
  3 | # https://setuptools.pypa.io/en/latest/userguide/declarative_config.html
  4 | # https://setuptools.pypa.io/en/latest/references/keywords.html
  5 | 
  6 | [metadata]
  7 | name = rds2py
  8 | description = Parse and construct Python representations for datasets stored in RDS files
  9 | author = jkanche
 10 | author_email = jayaram.kancherla@gmail.com
 11 | license = MIT
 12 | license_files = LICENSE.txt
 13 | long_description = file: README.md
 14 | long_description_content_type = text/markdown; charset=UTF-8; variant=GFM
 15 | url = https://github.com/biocpy/rds2py
 16 | # Add here related links, for example:
 17 | project_urls =
 18 |     Documentation = https://biocpy.github.io/rds2py/
 19 |     Source = https://github.com/biocpy/rds2py
 20 | #    Changelog = https://pyscaffold.org/en/latest/changelog.html
 21 | #    Tracker = https://github.com/pyscaffold/pyscaffold/issues
 22 | #    Conda-Forge = https://anaconda.org/conda-forge/pyscaffold
 23 | #    Download = https://pypi.org/project/PyScaffold/#files
 24 | #    Twitter = https://twitter.com/PyScaffold
 25 | 
 26 | # Change if running only on Windows, Mac or Linux (comma-separated)
 27 | platforms = Mac, Linux
 28 | 
 29 | # Add here all kinds of additional classifiers as defined under
 30 | # https://pypi.org/classifiers/
 31 | classifiers =
 32 |     Development Status :: 4 - Beta
 33 |     Programming Language :: Python
 34 | 
 35 | 
 36 | [options]
 37 | zip_safe = False
 38 | packages = find_namespace:
 39 | include_package_data = True
 40 | package_dir =
 41 |     =src
 42 | 
 43 | # Require a min/specific Python version (comma-separated conditions)
 44 | python_requires = >=3.9
 45 | 
 46 | # Add here dependencies of your project (line-separated), e.g. requests>=2.2,<3.0.
 47 | # Version specifiers like >=2.2,<3.0 avoid problems due to API changes in
 48 | # new major versions. This works if the required packages follow Semantic Versioning.
 49 | # For more information, check out https://semver.org/.
 50 | install_requires =
 51 |     importlib-metadata; python_version<"3.8"
 52 |     numpy
 53 |     biocutils>=0.1.5
 54 | 
 55 | [options.packages.find]
 56 | where = src
 57 | exclude =
 58 |     tests
 59 | 
 60 | [options.extras_require]
 61 | # Add here additional requirements for extra features, to install with:
 62 | # `pip install rds2py[PDF]` like:
 63 | # PDF = ReportLab; RXP
 64 | optional =
 65 |     pandas
 66 |     hdf5array
 67 |     scipy
 68 |     biocframe
 69 |     genomicranges>=0.4.9
 70 |     summarizedexperiment>=0.4.1
 71 |     singlecellexperiment>=0.4.1
 72 |     multiassayexperiment
 73 | 
 74 | # Add here test requirements (semicolon/line-separated)
 75 | testing =
 76 |     setuptools
 77 |     pytest
 78 |     pytest-cov
 79 |     %(optional)s
 80 | 
 81 | [options.entry_points]
 82 | # Add here console scripts like:
 83 | # console_scripts =
 84 | #     script_name = rds2py.module:function
 85 | # For example:
 86 | # console_scripts =
 87 | #     fibonacci = rds2py.skeleton:run
 88 | # And any other entry points, for example:
 89 | # pyscaffold.cli =
 90 | #     awesome = pyscaffoldext.awesome.extension:AwesomeExtension
 91 | 
 92 | [tool:pytest]
 93 | # Specify command line options as you would do when invoking pytest directly.
 94 | # e.g. --cov-report html (or xml) for html/xml output or --junitxml junit.xml
 95 | # in order to write a coverage file that can be read by Jenkins.
 96 | # CAUTION: --cov flags may prohibit setting breakpoints while debugging.
 97 | #          Comment those flags to avoid this pytest issue.
 98 | addopts =
 99 |     --cov rds2py --cov-report term-missing
100 |     --verbose
101 | norecursedirs =
102 |     dist
103 |     build
104 |     .tox
105 | testpaths = tests
106 | # Use pytest markers to select/deselect specific tests
107 | # markers =
108 | #     slow: mark tests as slow (deselect with '-m "not slow"')
109 | #     system: mark end-to-end system tests
110 | 
111 | [devpi:upload]
112 | # Options for the devpi: PyPI server and packaging tool
113 | # VCS export must be deactivated since we are using setuptools-scm
114 | no_vcs = 1
115 | formats = bdist_wheel
116 | 
117 | [flake8]
118 | # Some sane defaults for the code style checker flake8
119 | max_line_length = 100
120 | extend_ignore = E203, W503
121 | # ^  Black-compatible
122 | #    E203 and W503 have edge cases handled by black
123 | exclude =
124 |     .tox
125 |     build
126 |     dist
127 |     .eggs
128 |     docs/conf.py
129 | per-file-ignores = __init__.py:F401
130 | 
131 | [pyscaffold]
132 | # PyScaffold's parameters when the project was created.
133 | # This will be used when updating. Do not change!
134 | version = 4.5
135 | package = rds2py
136 | extensions =
137 |     markdown
138 |     pre_commit
139 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | """Setup file for rds2py. Use setup.cfg to configure your project.
 2 | 
 3 | This file was generated with PyScaffold 4.5.
 4 | PyScaffold helps you to put up the scaffold of your new Python project.
 5 | Learn more under: https://pyscaffold.org/
 6 | """
 7 | 
 8 | from setuptools import setup, Extension
 9 | from setuptools.command.build_ext import build_ext as build_ext_orig
10 | import pathlib
11 | import os
12 | import shutil
13 | import sys
14 | import pybind11
15 | 
16 | 
17 | ###  Adapted from dolomite_base: https://github.com/ArtifactDB/dolomite-base/blob/master/setup.py
18 | ## Adapted from https://stackoverflow.com/questions/42585210/extending-setuptools-extension-to-use-cmake-in-setup-py.
19 | class CMakeExtension(Extension):
20 |     def __init__(self, name):
21 |         super().__init__(name, sources=[])
22 | 
23 | 
24 | class build_ext(build_ext_orig):
25 |     def run(self):
26 |         for ext in self.extensions:
27 |             self.build_cmake(ext)
28 | 
29 |     def build_cmake(self, ext):
30 |         build_temp = pathlib.Path(self.build_temp)
31 |         build_lib = pathlib.Path(self.build_lib)
32 |         outpath = os.path.join(build_lib.absolute(), ext.name)
33 | 
34 |         if not os.path.exists(build_temp):
35 |             cmd = [
36 |                 "cmake",
37 |                 "-S",
38 |                 "lib",
39 |                 "-B",
40 |                 build_temp,
41 |                 "-Dpybind11_DIR=" + os.path.join(os.path.dirname(pybind11.__file__), "share", "cmake", "pybind11"),
42 |                 "-DPYTHON_EXECUTABLE=" + sys.executable,
43 |             ]
44 |             if os.name != "nt":
45 |                 cmd.append("-DCMAKE_BUILD_TYPE=Release")
46 |                 cmd.append("-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=" + outpath)
47 | 
48 |             if "MORE_CMAKE_OPTIONS" in os.environ:
49 |                 cmd += os.environ["MORE_CMAKE_OPTIONS"].split()
50 |             self.spawn(cmd)
51 | 
52 |         if not self.dry_run:
53 |             cmd = ["cmake", "--build", build_temp]
54 |             if os.name == "nt":
55 |                 cmd += ["--config", "Release"]
56 |             self.spawn(cmd)
57 |             if os.name == "nt":
58 |                 # Gave up trying to get MSVC to respect the output directory.
59 |                 # Delvewheel also needs it to have a 'pyd' suffix... whatever.
60 |                 shutil.copyfile(
61 |                     os.path.join(build_temp, "Release", "_core.dll"),
62 |                     os.path.join(outpath, "_core.pyd"),
63 |                 )
64 | 
65 | 
66 | if __name__ == "__main__":
67 |     try:
68 |         setup(
69 |             use_scm_version={"version_scheme": "no-guess-dev"},
70 |             ext_modules=[CMakeExtension("rds2py")],
71 |             cmdclass={"build_ext": build_ext},
72 |         )
73 |     except:  # noqa
74 |         print(
75 |             "\n\nAn error occurred while building the project, "
76 |             "please ensure you have the most updated version of setuptools, "
77 |             "setuptools_scm and wheel with:\n"
78 |             "   pip install -U setuptools setuptools_scm wheel\n\n"
79 |         )
80 |         raise
81 | 


--------------------------------------------------------------------------------
/src/rds2py/PyRdsReader.py:
--------------------------------------------------------------------------------
  1 | """Low-level interface for reading RDS file format.
  2 | 
  3 | This module provides the core functionality for parsing RDS files at a binary level and converting them into a
  4 | dictionary representation that can be further processed by higher-level functions.
  5 | """
  6 | 
  7 | from typing import Any, Dict, List, Union
  8 | from warnings import warn
  9 | 
 10 | import numpy as np
 11 | 
 12 | from .lib_rds_parser import RdsObject, RdsReader
 13 | 
 14 | 
 15 | class PyRdsParserError(Exception):
 16 |     """Exception raised for errors during RDS parsing."""
 17 | 
 18 |     pass
 19 | 
 20 | 
 21 | class PyRdsParser:
 22 |     """Parser for reading RDS files.
 23 | 
 24 |     This class provides low-level access to RDS file contents, handling the binary
 25 |     format and converting it into Python data structures. It supports various R
 26 |     data types and handles special R cases like NA values, integer sequences and
 27 |     range functions.
 28 | 
 29 |     Attributes:
 30 |         R_MIN:
 31 |             Minimum integer value in R, used for handling NA values.
 32 | 
 33 |         rds_object:
 34 |             Internal representation of the RDS file.
 35 | 
 36 |         root_object:
 37 |             Root object of the parsed RDS file.
 38 |     """
 39 | 
 40 |     R_MIN: int = -2147483648
 41 | 
 42 |     def __init__(self, file_path: str):
 43 |         """Initialize the class.
 44 | 
 45 |         Args:
 46 |             file_path:
 47 |                 Path to the RDS file to be read.
 48 |         """
 49 |         try:
 50 |             self.rds_object = RdsObject(file_path)
 51 |             robject = self.rds_object.get_robject()
 52 | 
 53 |             if not isinstance(robject, RdsReader):
 54 |                 raise TypeError(f"Expected 'RdsReader' object, got {type(robject)}")
 55 | 
 56 |             self.root_object = robject
 57 |         except Exception as e:
 58 |             raise PyRdsParserError(f"Error initializing 'PyRdsParser': {str(e)}")
 59 | 
 60 |     def parse(self) -> Dict[str, Any]:
 61 |         """Parse the entire RDS file into a dictionary structure.
 62 | 
 63 |         Returns:
 64 |             A dictionary containing the parsed data with keys:
 65 |             - 'type': The R object type
 66 |             - 'data': The actual data (if applicable)
 67 |             - 'attributes': R object attributes (if any)
 68 |             - 'class_name': The R class name
 69 |             - Additional keys depending on the object type
 70 | 
 71 |         Raises:
 72 |             PyRdsParserError: If there's an error during parsing.
 73 |         """
 74 |         try:
 75 |             return self._process_object(self.root_object)
 76 |         except Exception as e:
 77 |             raise PyRdsParserError(f"Error parsing RDS object: {str(e)}")
 78 | 
 79 |     def _process_object(self, obj: RdsReader) -> Dict[str, Any]:
 80 |         try:
 81 |             rtype = obj.get_rtype()
 82 |             result: Dict[str, Any] = {"type": rtype}
 83 | 
 84 |             if rtype == "S4":
 85 |                 result["package_name"] = obj.get_package_name()
 86 |                 result["class_name"] = obj.get_class_name()
 87 |                 result["attributes"] = self._process_attributes(obj)
 88 |             elif rtype in ["integer", "boolean", "double"]:
 89 |                 result["data"] = self._handle_r_special_cases(
 90 |                     self._get_numeric_data(obj, rtype), rtype, obj.get_rsize()
 91 |                 )
 92 |                 result["attributes"] = self._process_attributes(obj)
 93 |                 result["class_name"] = f"{rtype}_vector"
 94 |             elif rtype == "string":
 95 |                 result["data"] = obj.get_string_arr()
 96 |                 result["class_name"] = "string_vector"
 97 |             elif rtype == "vector":
 98 |                 result["data"] = self._process_vector(obj)
 99 |                 result["attributes"] = self._process_attributes(obj)
100 |                 result["class_name"] = "vector"
101 |             elif rtype == "null":
102 |                 pass
103 |             else:
104 |                 # raise ValueError
105 |                 warn(f"Unsupported R object type: {rtype}", RuntimeWarning)
106 |                 result["data"] = None
107 |                 result["attributes"] = None
108 |                 result["class_name"] = None
109 | 
110 |             return result
111 |         except Exception as e:
112 |             raise PyRdsParserError(f"Error processing object: {str(e)}")
113 | 
114 |     def _handle_r_special_cases(self, data: np.ndarray, rtype: str, size: int) -> Union[np.ndarray, range]:
115 |         """Handle special R data representations."""
116 |         try:
117 |             # Special handling for R integer containing NA
118 |             if size != 2:
119 |                 if any(data == self.R_MIN):
120 |                     return np.array([np.nan if x == self.R_MIN else x for x in data])
121 | 
122 |             # Special handling for R integer sequences
123 |             if rtype == "integer" and size == 2 and data[0] == self.R_MIN and data[1] < 0:
124 |                 if data[1] == self.R_MIN:
125 |                     return [None, None]
126 |                 return range(data[1] * -1)
127 | 
128 |             return data
129 |         except Exception as e:
130 |             raise PyRdsParserError(f"Error handling R special cases: {str(e)}")
131 | 
132 |     def _get_numeric_data(self, obj: RdsReader, rtype: str) -> np.ndarray:
133 |         try:
134 |             data = obj.get_numeric_data()
135 |             if rtype == "boolean":
136 |                 return data.astype(bool)
137 | 
138 |             return data
139 |         except Exception as e:
140 |             raise PyRdsParserError(f"Error getting numeric data: {str(e)}")
141 | 
142 |     def _process_vector(self, obj: RdsReader) -> List[Dict[str, Any]]:
143 |         return [self._process_object(obj.load_vec_element(i)) for i in range(obj.get_rsize())]
144 | 
145 |     def _process_attributes(self, obj: RdsReader) -> Dict[str, Dict[str, Any]]:
146 |         try:
147 |             attributes = {}
148 |             for name in obj.get_attribute_names():
149 |                 attr_obj = obj.load_attribute_by_name(name)
150 |                 attributes[name] = self._process_object(attr_obj)
151 | 
152 |             return attributes
153 |         except Exception as e:
154 |             raise PyRdsParserError(f"Error processing attributes: {str(e)}")
155 | 
156 |     def get_dimensions(self) -> Union[tuple, None]:
157 |         try:
158 |             return self.root_object.get_dimensions()
159 |         except Exception as e:
160 |             raise PyRdsParserError(f"Error getting dimensions: {str(e)}")
161 | 


--------------------------------------------------------------------------------
/src/rds2py/__init__.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | if sys.version_info[:2] >= (3, 8):
 4 |     # TODO: Import directly (no need for conditional) when `python_requires = >= 3.8`
 5 |     from importlib.metadata import PackageNotFoundError, version  # pragma: no cover
 6 | else:
 7 |     from importlib_metadata import PackageNotFoundError, version  # pragma: no cover
 8 | 
 9 | try:
10 |     # Change here if project is renamed and does not equal the package name
11 |     dist_name = __name__
12 |     __version__ = version(dist_name)
13 | except PackageNotFoundError:  # pragma: no cover
14 |     __version__ = "unknown"
15 | finally:
16 |     del version, PackageNotFoundError
17 | 
18 | from .generics import read_rds
19 | from .rdsutils import parse_rds
20 | 


--------------------------------------------------------------------------------
/src/rds2py/generics.py:
--------------------------------------------------------------------------------
  1 | """Core functionality for reading RDS files in Python.
  2 | 
  3 | This module provides the main interface for reading RDS files and converting them
  4 | to appropriate Python objects. It maintains a registry of supported R object types
  5 | and their corresponding Python parser functions.
  6 | 
  7 | The module supports various R object types including vectors, matrices, data frames,
  8 | and specialized Bioconductor objects like GenomicRanges and SummarizedExperiment.
  9 | 
 10 | Example:
 11 | 
 12 |     .. code-block:: python
 13 | 
 14 |         data = read_rds("example.rds")
 15 |         print(type(data))
 16 | """
 17 | 
 18 | from importlib import import_module
 19 | from warnings import warn
 20 | 
 21 | from .rdsutils import get_class, parse_rds
 22 | 
 23 | __author__ = "jkanche"
 24 | __copyright__ = "jkanche"
 25 | __license__ = "MIT"
 26 | 
 27 | REGISTRY = {
 28 |     # typed vectors
 29 |     "integer_vector": "rds2py.read_atomic.read_integer_vector",
 30 |     "boolean_vector": "rds2py.read_atomic.read_boolean_vector",
 31 |     "string_vector": "rds2py.read_atomic.read_string_vector",
 32 |     "double_vector": "rds2py.read_atomic.read_double_vector",
 33 |     # dictionary
 34 |     "vector": "rds2py.read_dict.read_dict",
 35 |     # factors
 36 |     "factor": "rds2py.read_factor.read_factor",
 37 |     # Rle
 38 |     "Rle": "rds2py.read_rle.read_rle",
 39 |     # matrices
 40 |     "dgCMatrix": "rds2py.read_matrix.read_dgcmatrix",
 41 |     "dgRMatrix": "rds2py.read_matrix.read_dgrmatrix",
 42 |     "dgTMatrix": "rds2py.read_matrix.read_dgtmatrix",
 43 |     "ndarray": "rds2py.read_matrix.read_ndarray",
 44 |     # data frames
 45 |     "data.frame": "rds2py.read_frame.read_data_frame",
 46 |     "DFrame": "rds2py.read_frame.read_dframe",
 47 |     # genomic ranges
 48 |     "GRanges": "rds2py.read_granges.read_genomic_ranges",
 49 |     "GenomicRanges": "rds2py.read_granges.read_genomic_ranges",
 50 |     "CompressedGRangesList": "rds2py.read_granges.read_granges_list",
 51 |     "GRangesList": "rds2py.read_granges.read_granges_list",
 52 |     # summarized experiment
 53 |     "SummarizedExperiment": "rds2py.read_se.read_summarized_experiment",
 54 |     "RangedSummarizedExperiment": "rds2py.read_se.read_ranged_summarized_experiment",
 55 |     # single-cell experiment
 56 |     "SingleCellExperiment": "rds2py.read_sce.read_single_cell_experiment",
 57 |     "SummarizedExperimentByColumn": "rds2py.read_sce.read_alts_summarized_experiment_by_column",
 58 |     # multi assay experiment
 59 |     "MultiAssayExperiment": "rds2py.read_mae.read_multi_assay_experiment",
 60 |     "ExperimentList": "rds2py.read_dict.read_dict",
 61 |     # delayed matrices
 62 |     "H5SparseMatrix": "rds2py.read_delayed_matrix.read_hdf5_sparse",
 63 | }
 64 | 
 65 | 
 66 | # @singledispatch
 67 | # def save_rds(x, path: str):
 68 | #     """Save a Python object as RDS file.
 69 | 
 70 | #     Args:
 71 | #         x:
 72 | #             Object to save.
 73 | 
 74 | #         path:
 75 | #             Path to save the object.
 76 | #     """
 77 | #     raise NotImplementedError(
 78 | #         f"No `save_rds` method implemented for '{type(x).__name__}' objects."
 79 | #     )
 80 | 
 81 | 
 82 | def read_rds(path: str, **kwargs):
 83 |     """Read an RDS file and convert it to an appropriate Python object.
 84 | 
 85 |     Args:
 86 |         path:
 87 |             Path to the RDS file to be read.
 88 | 
 89 |         **kwargs:
 90 |             Additional arguments passed to specific parser functions.
 91 | 
 92 |     Returns:
 93 |         A Python object representing the data in the RDS file. The exact type
 94 |         depends on the contents of the RDS file and the available parsers.
 95 |     """
 96 |     _robj = parse_rds(path=path)
 97 |     return _dispatcher(_robj, **kwargs)
 98 | 
 99 | 
100 | def _dispatcher(robject: dict, **kwargs):
101 |     """Internal function to dispatch R objects to appropriate parser functions.
102 | 
103 |     Args:
104 |         robject:
105 |             Dictionary containing parsed R object data.
106 | 
107 |         **kwargs:
108 |             Additional arguments passed to specific parser functions.
109 | 
110 |     Returns:
111 |         Parsed Python object corresponding to the R data structure.
112 |         Returns the original dictionary if no appropriate parser is found.
113 |     """
114 |     _class_name = get_class(robject)
115 | 
116 |     if _class_name is None:
117 |         return None
118 | 
119 |     # if a class is registered, coerce the object
120 |     # to the representation.
121 |     if _class_name in REGISTRY:
122 |         try:
123 |             command = REGISTRY[_class_name]
124 |             if isinstance(command, str):
125 |                 last_period = command.rfind(".")
126 |                 mod = import_module(command[:last_period])
127 |                 command = getattr(mod, command[last_period + 1 :])
128 |                 REGISTRY[_class_name] = command
129 | 
130 |             return command(robject, **kwargs)
131 |         except Exception as e:
132 |             warn(
133 |                 f"Failed to coerce RDS object to class: '{_class_name}', returning the dictionary, {str(e)}",
134 |                 RuntimeWarning,
135 |             )
136 |     else:
137 |         warn(
138 |             f"RDS file contains an unknown class: '{_class_name}', returning the dictionary",
139 |             RuntimeWarning,
140 |         )
141 | 
142 |     return robject
143 | 


--------------------------------------------------------------------------------
/src/rds2py/rdsutils.py:
--------------------------------------------------------------------------------
 1 | """Utility functions for RDS file parsing and class inference.
 2 | 
 3 | This module provides helper functions for parsing RDS files and inferring the appropriate R class information from
 4 | parsed objects.
 5 | """
 6 | 
 7 | from .PyRdsReader import PyRdsParser
 8 | 
 9 | __author__ = "jkanche"
10 | __copyright__ = "jkanche"
11 | __license__ = "MIT"
12 | 
13 | 
14 | def parse_rds(path: str) -> dict:
15 |     """Parse an RDS file into a dictionary representation.
16 | 
17 |     Args:
18 |         path:
19 |             Path to the RDS file to be parsed.
20 | 
21 |     Returns:
22 |         A dictionary containing the parsed contents of the RDS file.
23 |         The structure depends on the type of R object stored in the file.
24 |     """
25 |     parsed_obj = PyRdsParser(path)
26 |     realized = parsed_obj.parse()
27 | 
28 |     return realized
29 | 
30 | 
31 | def get_class(robj: dict) -> str:
32 |     """Infer the R class name from a parsed RDS object.
33 | 
34 |     Notes:
35 |         - Handles both S4 and non-S4 R objects
36 |         - Special handling for vectors and matrices
37 |         - Checks for class information in object attributes
38 | 
39 |     Args:
40 |         robj:
41 |             Dictionary containing parsed RDS data, typically
42 |             the output of :py:func:`~.parse_rds`.
43 | 
44 |     Returns:
45 |         The inferred R class name, or None if no class can be determined.
46 |     """
47 |     _inferred_cls_name = None
48 |     if robj["type"] != "S4":
49 |         if "class_name" in robj:
50 |             _inferred_cls_name = robj["class_name"]
51 |             if _inferred_cls_name is not None and (
52 |                 "integer" in _inferred_cls_name or "double" in _inferred_cls_name or _inferred_cls_name == "vector"
53 |             ):
54 |                 if "attributes" in robj:
55 |                     obj_attr = robj["attributes"]
56 | 
57 |                     # kind of making this assumption, if we ever see a dim, its a matrix
58 |                     if obj_attr is not None:
59 |                         if "dim" in obj_attr:
60 |                             _inferred_cls_name = "ndarray"
61 |                         elif "class" in obj_attr:
62 |                             _inferred_cls_name = obj_attr["class"]["data"][0]
63 | 
64 |     else:
65 |         _inferred_cls_name = robj["class_name"]
66 | 
67 |     return _inferred_cls_name
68 | 


--------------------------------------------------------------------------------
/src/rds2py/read_atomic.py:
--------------------------------------------------------------------------------
  1 | """Functions for parsing atomic R vector types into Python objects.
  2 | 
  3 | This module provides parser functions for converting R's atomic vector types (boolean, integer, string, and double) into
  4 | appropriate Python objects using the biocutils package's specialized list classes.
  5 | """
  6 | 
  7 | from biocutils import BooleanList, FloatList, IntegerList, StringList
  8 | 
  9 | from .generics import _dispatcher
 10 | 
 11 | __author__ = "jkanche"
 12 | __copyright__ = "jkanche"
 13 | __license__ = "MIT"
 14 | 
 15 | 
 16 | def _extract_names(robject: dict, **kwargs):
 17 |     """Extract names attribute from an R object if present.
 18 | 
 19 |     Args:
 20 |         robject:
 21 |             Dictionary containing parsed R object data.
 22 | 
 23 |         **kwargs:
 24 |             Additional arguments.
 25 | 
 26 |     Returns:
 27 |         List of names if present in the object's attributes,
 28 |         None otherwise.
 29 |     """
 30 |     _names = None
 31 |     if "attributes" in robject and robject["attributes"] is not None:
 32 |         if "names" in robject["attributes"]:
 33 |             _names = _dispatcher(robject["attributes"]["names"])
 34 | 
 35 |     return _names
 36 | 
 37 | 
 38 | def read_boolean_vector(robject: dict, **kwargs) -> BooleanList:
 39 |     """Convert an R boolean vector to a Python :py:class:`~biocutils.BooleanList`.
 40 | 
 41 |     Args:
 42 |         robject:
 43 |             Dictionary containing parsed R boolean vector data.
 44 | 
 45 |         **kwargs:
 46 |             Additional arguments.
 47 | 
 48 |     Returns:
 49 |         A `BooleanList` object containing the vector data
 50 |         and any associated names.
 51 |     """
 52 |     _names = _extract_names(robject, **kwargs)
 53 | 
 54 |     obj = BooleanList(robject["data"], names=_names)
 55 |     return obj
 56 | 
 57 | 
 58 | def read_integer_vector(robject: dict, **kwargs) -> IntegerList:
 59 |     """Convert an R integer vector to a Python :py:class:`~biocutils.IntegerList`.
 60 | 
 61 |     Args:
 62 |         robject:
 63 |             Dictionary containing parsed R integer vector data.
 64 | 
 65 |         **kwargs:
 66 |             Additional arguments.
 67 | 
 68 |     Returns:
 69 |         A `IntegerList` object containing the vector data
 70 |         and any associated names.
 71 |     """
 72 |     _names = _extract_names(robject, **kwargs)
 73 | 
 74 |     obj = IntegerList(robject["data"], names=_names)
 75 |     return obj
 76 | 
 77 | 
 78 | def read_string_vector(robject: dict, **kwargs) -> StringList:
 79 |     """Convert an R string vector to a Python :py:class:`~biocutils.StringList`.
 80 | 
 81 |     Args:
 82 |         robject:
 83 |             Dictionary containing parsed R string vector data.
 84 | 
 85 |         **kwargs:
 86 |             Additional arguments.
 87 | 
 88 |     Returns:
 89 |         A `StringList` object containing the vector data
 90 |         and any associated names.
 91 |     """
 92 |     _names = _extract_names(robject, **kwargs)
 93 | 
 94 |     obj = StringList(robject["data"], names=_names)
 95 |     return obj
 96 | 
 97 | 
 98 | def read_double_vector(robject: dict, **kwargs) -> FloatList:
 99 |     """Convert an R double vector to a Python :py:class:`~biocutils.FloatList`.
100 | 
101 |     Args:
102 |         robject:
103 |             Dictionary containing parsed R double vector data.
104 | 
105 |         **kwargs:
106 |             Additional arguments.
107 | 
108 |     Returns:
109 |         A `FloatList` object containing the vector data
110 |         and any associated names.
111 |     """
112 |     _names = _extract_names(robject, **kwargs)
113 | 
114 |     obj = FloatList(robject["data"], names=_names)
115 |     return obj
116 | 


--------------------------------------------------------------------------------
/src/rds2py/read_delayed_matrix.py:
--------------------------------------------------------------------------------
 1 | """Functions and classes for parsing R delayed matrix objects from HDF5Array."""
 2 | 
 3 | from .generics import _dispatcher
 4 | from .rdsutils import get_class
 5 | 
 6 | __author__ = "jkanche"
 7 | __copyright__ = "jkanche"
 8 | __license__ = "MIT"
 9 | 
10 | 
11 | def read_hdf5_sparse(robject: dict, **kwargs):
12 |     """Convert an R delayed sparse array (H5-backed).
13 | 
14 |     Args:
15 |         robject:
16 |             Dictionary containing parsed delayed sparse array.
17 | 
18 |         **kwargs:
19 |             Additional arguments.
20 | 
21 |     Returns:
22 |        A Hdf5CompressedSparseMatrix from the 'hdf5array' package.
23 |     """
24 |     _cls = get_class(robject)
25 |     if _cls not in ["H5SparseMatrix"]:
26 |         raise RuntimeError(f"`robject` does not contain not a 'H5SparseMatrix' object, contains `{_cls}`.")
27 | 
28 |     by_column = False
29 |     # get seed package name
30 |     _seed_cls = get_class(robject["attributes"]["seed"])
31 |     if _seed_cls in ["CSC_H5SparseMatrixSeed"]:
32 |         by_column = True
33 | 
34 |     _seed_obj = robject["attributes"]["seed"]
35 |     shape = tuple(_dispatcher(_seed_obj["attributes"]["dim"], **kwargs))
36 |     fpath = list(_dispatcher(_seed_obj["attributes"]["filepath"], **kwargs))[0]
37 |     group_name = list(_dispatcher(_seed_obj["attributes"]["group"], **kwargs))[0]
38 | 
39 |     from hdf5array import Hdf5CompressedSparseMatrix
40 | 
41 |     return Hdf5CompressedSparseMatrix(path=fpath, group_name=group_name, shape=shape, by_column=by_column)
42 | 


--------------------------------------------------------------------------------
/src/rds2py/read_dict.py:
--------------------------------------------------------------------------------
 1 | """Functions for parsing R vector and dictionary-like objects.
 2 | 
 3 | This module provides functionality to convert R named vectors and list objects into Python dictionaries or lists,
 4 | maintaining the structure and names of the original R objects.
 5 | """
 6 | 
 7 | from .generics import _dispatcher
 8 | from .rdsutils import get_class
 9 | 
10 | __author__ = "jkanche"
11 | __copyright__ = "jkanche"
12 | __license__ = "MIT"
13 | 
14 | 
15 | def read_dict(robject: dict, **kwargs) -> dict:
16 |     """Convert an R named vector or list to a Python dictionary or list.
17 | 
18 |     Args:
19 |         robject:
20 |             Dictionary containing parsed R vector/list data.
21 | 
22 |         **kwargs:
23 |             Additional arguments.
24 | 
25 |     Returns:
26 |         If the R object has names, returns a dictionary mapping
27 |         names to values. Otherwise, returns a list of parsed values.
28 | 
29 |     Example:
30 |         >>> # For a named R vector c(a=1, b=2)
31 |         >>> result = read_dict(robject)
32 |         >>> print(result)
33 |         {'a': 1, 'b': 2}
34 |     """
35 |     _cls = get_class(robject)
36 | 
37 |     if _cls not in ["vector"]:
38 |         raise RuntimeError(f"`robject` does not contain not a vector/dictionary object, contains `{_cls}`.")
39 | 
40 |     if "names" not in robject["attributes"]:
41 |         return [_dispatcher(x, **kwargs) for x in robject["data"]]
42 | 
43 |     dict_keys = list(_dispatcher(robject["attributes"]["names"], **kwargs))
44 | 
45 |     final_vec = {}
46 |     for idx, dkey in enumerate(dict_keys):
47 |         final_vec[dkey] = _dispatcher(robject["data"][idx], **kwargs)
48 | 
49 |     return final_vec
50 | 


--------------------------------------------------------------------------------
/src/rds2py/read_factor.py:
--------------------------------------------------------------------------------
 1 | """Functions for parsing R factor objects.
 2 | 
 3 | This module handles the conversion of R factors (categorical variables) into Python lists, preserving the levels and
 4 | maintaining the order of the factor levels.
 5 | """
 6 | 
 7 | from .generics import _dispatcher
 8 | from .rdsutils import get_class
 9 | 
10 | __author__ = "jkanche"
11 | __copyright__ = "jkanche"
12 | __license__ = "MIT"
13 | 
14 | 
15 | def read_factor(robject: dict, **kwargs) -> list:
16 |     """Convert an R factor to a Python list.
17 | 
18 |     Args:
19 |         robject:
20 |             Dictionary containing parsed R factor data.
21 | 
22 |         **kwargs:
23 |             Additional arguments.
24 | 
25 |     Returns:
26 |         A list containing the factor values, with each value repeated
27 |         according to its length if specified.
28 |     """
29 |     _cls = get_class(robject)
30 | 
31 |     if _cls not in ["factor"]:
32 |         raise RuntimeError(f"`robject` does not contain not a factor object, contains `{_cls}`.")
33 | 
34 |     data = robject["data"]
35 | 
36 |     levels = None
37 |     if "levels" in robject["attributes"]:
38 |         levels = _dispatcher(robject["attributes"]["levels"], **kwargs)
39 |     level_vec = [levels[x - 1] for x in data]
40 | 
41 |     if "lengths" in robject["attributes"]:
42 |         lengths = _dispatcher(robject["attributes"]["lengths"], **kwargs)
43 |     else:
44 |         lengths = [1] * len(data)
45 | 
46 |     final_vec = []
47 |     for i, x in enumerate(lengths):
48 |         final_vec.extend([level_vec[i]] * x)
49 | 
50 |     return final_vec
51 | 


--------------------------------------------------------------------------------
/src/rds2py/read_frame.py:
--------------------------------------------------------------------------------
 1 | """Functions for parsing R data frame objects.
 2 | 
 3 | This module provides parsers for converting both base R `data.frame` objects
 4 | and Bioconductor `DataFrame` objects into Python `BiocFrame` objects, preserving
 5 | row names, column names, and data types.
 6 | """
 7 | 
 8 | from .generics import _dispatcher
 9 | from .rdsutils import get_class
10 | 
11 | __author__ = "jkanche"
12 | __copyright__ = "jkanche"
13 | __license__ = "MIT"
14 | 
15 | 
16 | def read_data_frame(robject: dict, **kwargs):
17 |     """Convert an R data.frame to a :py:class:`~biocframe.BiocFrame` object.
18 | 
19 |     Args:
20 |         robject:
21 |             Dictionary containing parsed R `data.frame` object.
22 | 
23 |         **kwargs:
24 |             Additional arguments.
25 | 
26 |     Returns:
27 |         A BiocFrame object containing the data frame's contents,
28 |         with preserved column and row names.
29 |     """
30 |     cls = get_class(robject)
31 | 
32 |     if cls != "data.frame":
33 |         raise RuntimeError("`robject` does not contain a 'data.frame'.")
34 | 
35 |     col_names = _dispatcher(robject["attributes"]["names"], **kwargs)
36 | 
37 |     bframe_obj = {}
38 |     for idx, rd in enumerate(robject["data"]):
39 |         bframe_obj[col_names[idx]] = _dispatcher(rd, **kwargs)
40 | 
41 |     from biocframe import BiocFrame
42 | 
43 |     df = BiocFrame(
44 |         bframe_obj,
45 |         row_names=_dispatcher(robject["attributes"]["row.names"], **kwargs),
46 |     )
47 | 
48 |     return df
49 | 
50 | 
51 | def read_dframe(robject: dict, **kwargs):
52 |     """Convert an R DFrame (Bioconductor's `DataFrame`) to a `BiocFrame` object.
53 | 
54 |     Args:
55 |         robject:
56 |             Dictionary containing parsed R `DFrame` object.
57 | 
58 |         **kwargs:
59 |             Additional arguments.
60 | 
61 |     Returns:
62 |         A BiocFrame object containing the DataFrame's contents,
63 |         with preserved metadata and structure.
64 |     """
65 |     from biocframe import BiocFrame
66 | 
67 |     cls = get_class(robject)
68 | 
69 |     if cls != "DFrame":
70 |         raise RuntimeError("`robject` does not contain a 'DFrame'.")
71 | 
72 |     data = {}
73 |     col_names = _dispatcher(robject["attributes"]["listData"]["attributes"]["names"], **kwargs)
74 |     for idx, colname in enumerate(col_names):
75 |         data[colname] = _dispatcher(robject["attributes"]["listData"]["data"][idx], **kwargs)
76 | 
77 |     index = None
78 |     if robject["attributes"]["rownames"]["data"]:
79 |         index = _dispatcher(robject["attributes"]["rownames"], **kwargs)
80 | 
81 |     nrows = None
82 |     if robject["attributes"]["nrows"]["data"]:
83 |         nrows = list(_dispatcher(robject["attributes"]["nrows"]), **kwargs)[0]
84 | 
85 |     df = BiocFrame(
86 |         data,
87 |         # column_names=col_names,
88 |         row_names=index,
89 |         number_of_rows=nrows,
90 |     )
91 | 
92 |     return df
93 | 


--------------------------------------------------------------------------------
/src/rds2py/read_granges.py:
--------------------------------------------------------------------------------
  1 | """Functions for parsing Bioconductor GenomicRanges objects.
  2 | 
  3 | This module provides parsers for converting Bioconductor's GenomicRanges and GenomicRangesList objects into their Python
  4 | equivalents, preserving all genomic coordinates and associated metadata.
  5 | """
  6 | 
  7 | from .generics import _dispatcher
  8 | from .rdsutils import get_class
  9 | 
 10 | __author__ = "jkanche"
 11 | __copyright__ = "jkanche"
 12 | __license__ = "MIT"
 13 | 
 14 | 
 15 | def read_genomic_ranges(robject: dict, **kwargs):
 16 |     """Convert an R `GenomicRanges` object to a Python :py:class:`~genomicranges.GenomicRanges` object.
 17 | 
 18 |     Args:
 19 |         robject:
 20 |             Dictionary containing parsed `GenomicRanges` data.
 21 | 
 22 |         **kwargs:
 23 |             Additional arguments.
 24 | 
 25 |     Returns:
 26 |         A Python `GenomicRanges` object containing genomic intervals
 27 |         with associated annotations.
 28 |     """
 29 | 
 30 |     from genomicranges import GenomicRanges, SeqInfo
 31 |     from iranges import IRanges
 32 | 
 33 |     _cls = get_class(robject)
 34 | 
 35 |     if _cls not in ["GenomicRanges", "GRanges"]:
 36 |         raise TypeError(f"obj is not 'GenomicRanges', but is `{_cls}`.")
 37 | 
 38 |     _range_start = _dispatcher(robject["attributes"]["ranges"]["attributes"]["start"], **kwargs)
 39 |     _range_width = _dispatcher(robject["attributes"]["ranges"]["attributes"]["width"], **kwargs)
 40 |     _range_names = None
 41 |     if "NAMES" in robject["attributes"]["ranges"]["attributes"]:
 42 |         _tmp_names = robject["attributes"]["ranges"]["attributes"]["NAMES"]
 43 |         _range_names = _dispatcher(_tmp_names, **kwargs)
 44 |         if _range_names is not None:
 45 |             _range_names = list(_range_names)
 46 | 
 47 |     _ranges = IRanges(_range_start, _range_width, names=_range_names)
 48 | 
 49 |     _strands = _dispatcher(robject["attributes"]["strand"], **kwargs)
 50 |     _seqnames = _dispatcher(robject["attributes"]["seqnames"], **kwargs)
 51 |     _seqinfo_seqnames = _dispatcher(robject["attributes"]["seqinfo"]["attributes"]["seqnames"], **kwargs)
 52 |     _seqinfo_seqlengths = _dispatcher(robject["attributes"]["seqinfo"]["attributes"]["seqlengths"], **kwargs)
 53 |     _seqinfo_is_circular = _dispatcher(robject["attributes"]["seqinfo"]["attributes"]["is_circular"], **kwargs)
 54 |     _seqinfo_genome = _dispatcher(robject["attributes"]["seqinfo"]["attributes"]["genome"], **kwargs)
 55 |     _seqinfo = SeqInfo(
 56 |         seqnames=_seqinfo_seqnames,
 57 |         seqlengths=_seqinfo_seqlengths,
 58 |         is_circular=_seqinfo_is_circular,
 59 |         genome=_seqinfo_genome,
 60 |     )
 61 |     _mcols = _dispatcher(robject["attributes"]["elementMetadata"], **kwargs)
 62 | 
 63 |     _gr_names = None
 64 |     if "NAMES" in robject["attributes"]:
 65 |         _tmp_names = robject["attributes"]["NAMES"]
 66 |         _gr_names = None if _tmp_names is None else _dispatcher(_tmp_names, **kwargs)
 67 | 
 68 |     return GenomicRanges(
 69 |         seqnames=_seqnames,
 70 |         ranges=_ranges,
 71 |         strand=_strands,
 72 |         names=_gr_names,
 73 |         mcols=_mcols,
 74 |         seqinfo=_seqinfo,
 75 |     )
 76 | 
 77 | 
 78 | def read_granges_list(robject: dict, **kwargs):
 79 |     """Convert an R `GenomicRangesList` object to a Python :py:class:`~genomicranges.GenomicRangesList`.
 80 | 
 81 |     Args:
 82 |         robject:
 83 |             Dictionary containing parsed GenomicRangesList data.
 84 | 
 85 |         **kwargs:
 86 |             Additional arguments.
 87 | 
 88 |     Returns:
 89 |         A Python `GenomicRangesList` object containing containing multiple
 90 |         `GenomicRanges` objects.
 91 |     """
 92 | 
 93 |     from genomicranges import GenomicRangesList
 94 | 
 95 |     _cls = get_class(robject)
 96 | 
 97 |     if _cls not in ["CompressedGRangesList", "GRangesList"]:
 98 |         raise TypeError(f"`robject` is not genomic ranges list, but is `{_cls}`.")
 99 | 
100 |     _gre = _dispatcher(robject["attributes"]["unlistData"], **kwargs)
101 | 
102 |     _groups = None
103 |     if "NAMES" in robject["attributes"]["partitioning"]["attributes"]:
104 |         _tmp_names = robject["attributes"]["partitioning"]["attributes"]["NAMES"]
105 |         _groups = None if _tmp_names is None else _dispatcher(_tmp_names, **kwargs)
106 | 
107 |     _partitionends = _dispatcher(robject["attributes"]["partitioning"]["attributes"]["end"], **kwargs)
108 | 
109 |     _grelist = []
110 | 
111 |     current = 0
112 |     for _pend in _partitionends:
113 |         _grelist.append(_gre[current:_pend])
114 |         current = _pend
115 | 
116 |     return GenomicRangesList(ranges=_grelist, names=_groups)
117 | 


--------------------------------------------------------------------------------
/src/rds2py/read_mae.py:
--------------------------------------------------------------------------------
 1 | """Functions for parsing Bioconductor MultiAssayExperiment objects.
 2 | 
 3 | This module handles the conversion of Bioconductor's MultiAssayExperiment container format into its Python equivalent,
 4 | preserving the complex relationships between multiple experimental assays and sample metadata.
 5 | """
 6 | 
 7 | from .generics import _dispatcher
 8 | from .rdsutils import get_class
 9 | from .read_matrix import MatrixWrapper
10 | 
11 | __author__ = "jkanche"
12 | __copyright__ = "jkanche"
13 | __license__ = "MIT"
14 | 
15 | 
16 | def _sanitize_expts(expts, **kwargs):
17 |     """Convert raw experiment objects into SummarizedExperiment format.
18 | 
19 |     Args:
20 |         expts:
21 |             Dictionary of experiment objects.
22 | 
23 |     Returns:
24 |         Dictionary of converted experiments, with matrix-like objects
25 |         wrapped in SummarizedExperiment containers.
26 |     """
27 |     from biocframe import BiocFrame
28 |     from summarizedexperiment import SummarizedExperiment
29 | 
30 |     res = {}
31 |     for k, v in expts.items():
32 |         if isinstance(v, MatrixWrapper):
33 |             res[k] = SummarizedExperiment(
34 |                 assays={"matrix": v.matrix},
35 |                 row_data=BiocFrame(row_names=v.dimnames[0]),
36 |                 column_data=BiocFrame(row_names=v.dimnames[1]),
37 |             )
38 |         else:
39 |             res[k] = v
40 | 
41 |     return res
42 | 
43 | 
44 | def read_multi_assay_experiment(robject: dict, **kwargs):
45 |     """Convert an R `MultiAssayExperiment` to a Python :py:class:`~multiassayexperiment.MultiAssayExperiment` object.
46 | 
47 |     Args:
48 |         robject:
49 |             Dictionary containing parsed MultiAssayExperiment data.
50 | 
51 |         **kwargs:
52 |             Additional arguments.
53 | 
54 |     Returns:
55 |         A Python `MultiAssayExperiment` object containing
56 |         multiple experimental assays with associated metadata.
57 |     """
58 | 
59 |     _cls = get_class(robject)
60 | 
61 |     if _cls not in ["MultiAssayExperiment"]:
62 |         raise RuntimeError(f"`robject` does not contain a 'MultiAssayExperiment' object, contains `{_cls}`.")
63 | 
64 |     # parse experiment  names
65 |     _expt_obj = robject["attributes"]["ExperimentList"]["attributes"]["listData"]
66 |     robj_expts = _dispatcher(_expt_obj, **kwargs)
67 | 
68 |     # parse sample_map
69 |     robj_samplemap = _dispatcher(robject["attributes"]["sampleMap"], **kwargs)
70 | 
71 |     # parse coldata
72 |     robj_coldata = _dispatcher(robject["attributes"]["colData"], **kwargs)
73 | 
74 |     from multiassayexperiment import MultiAssayExperiment
75 | 
76 |     return MultiAssayExperiment(
77 |         experiments=_sanitize_expts(robj_expts),
78 |         sample_map=robj_samplemap,
79 |         column_data=robj_coldata,
80 |     )
81 | 


--------------------------------------------------------------------------------
/src/rds2py/read_matrix.py:
--------------------------------------------------------------------------------
  1 | """Functions and classes for parsing R matrix objects.
  2 | 
  3 | This module provides functionality to convert R matrix objects (both dense and sparse) into their Python equivalents
  4 | using NumPy and SciPy sparse matrix formats. It handles various R matrix types including dgCMatrix, dgRMatrix, and
  5 | dgTMatrix.
  6 | """
  7 | 
  8 | from typing import Literal
  9 | 
 10 | from numpy import ndarray
 11 | 
 12 | from .generics import _dispatcher
 13 | from .rdsutils import get_class
 14 | 
 15 | __author__ = "jkanche"
 16 | __copyright__ = "jkanche"
 17 | __license__ = "MIT"
 18 | 
 19 | 
 20 | class MatrixWrapper:
 21 |     """A simple wrapper class for matrices that preserves dimension names.
 22 | 
 23 |     This class bundles a matrix (dense or sparse) with its dimension names,
 24 |     maintaining the R-style naming of rows and columns.
 25 | 
 26 |     Attributes:
 27 |         matrix:
 28 |             The underlying matrix object (numpy.ndarray or scipy.sparse matrix).
 29 | 
 30 |         dimnames:
 31 |             A tuple of (row_names, column_names), each being a list of strings or None.
 32 |     """
 33 | 
 34 |     def __init__(self, matrix, dimnames=None) -> None:
 35 |         self.matrix = matrix
 36 |         self.dimnames = dimnames
 37 | 
 38 |     @property
 39 |     def shape(self):
 40 |         return self.matrix.shape
 41 | 
 42 | 
 43 | def _as_sparse_matrix(robject: dict, **kwargs):
 44 |     """Convert an R sparse matrix to a SciPy sparse matrix.
 45 | 
 46 |     Notes:
 47 |         - Supports dgCMatrix (column-sparse)
 48 |         - Supports dgRMatrix (row-sparse)
 49 |         - Supports dgTMatrix (triplet format)
 50 |         - Preserves dimension names if present
 51 | 
 52 |     Args:
 53 |         robject:
 54 |             Dictionary containing parsed R sparse matrix data.
 55 | 
 56 |         **kwargs:
 57 |             Additional arguments.
 58 | 
 59 |     Returns:
 60 |         A SciPy sparse matrix or wrapped matrix if dimension names exist.
 61 |     """
 62 | 
 63 |     from scipy.sparse import csc_matrix, csr_matrix
 64 | 
 65 |     _cls = get_class(robject)
 66 | 
 67 |     if _cls not in ["dgCMatrix", "dgRMatrix", "dgTMatrix"]:
 68 |         raise RuntimeError(f"`robject` does not contain not a supported sparse matrix format, contains `{_cls}`.")
 69 | 
 70 |     if _cls == "dgCMatrix":
 71 |         mat = csc_matrix(
 72 |             (
 73 |                 robject["attributes"]["x"]["data"],
 74 |                 robject["attributes"]["i"]["data"],
 75 |                 robject["attributes"]["p"]["data"],
 76 |             ),
 77 |             shape=tuple(robject["attributes"]["Dim"]["data"].tolist()),
 78 |         )
 79 |     elif _cls == "dgRMatrix":
 80 |         mat = csr_matrix(
 81 |             (
 82 |                 robject["attributes"]["x"]["data"],
 83 |                 robject["attributes"]["i"]["data"],
 84 |                 robject["attributes"]["p"]["data"],
 85 |             ),
 86 |             shape=tuple(robject["attributes"]["Dim"]["data"].tolist()),
 87 |         )
 88 |     elif _cls == "dgTMatrix":
 89 |         mat = csr_matrix(
 90 |             (
 91 |                 robject["attributes"]["x"]["data"],
 92 |                 (
 93 |                     robject["attributes"]["i"]["data"],
 94 |                     robject["attributes"]["j"]["data"],
 95 |                 ),
 96 |             ),
 97 |             shape=tuple(robject["attributes"]["Dim"]["data"].tolist()),
 98 |         )
 99 | 
100 |     names = None
101 |     if "Dimnames" in robject["attributes"]:
102 |         names = _dispatcher(robject["attributes"]["Dimnames"], **kwargs)
103 |         if names is not None and len(names) > 0:
104 |             # Use the wrapper class onyly if names are available
105 |             # for atleast one dimension
106 |             if not all(x is None for x in names):
107 |                 return MatrixWrapper(mat, names)
108 | 
109 |     return mat
110 | 
111 | 
112 | def _as_dense_matrix(robject, order: Literal["C", "F"] = "F", **kwargs) -> ndarray:
113 |     """Convert an R matrix to a `NumPy` array.
114 | 
115 |     Args:
116 |         robject:
117 |             Dictionary containing parsed R matrix data.
118 | 
119 |         order:
120 |             Memory layout for the array.
121 |             'C' for row-major, 'F' for column-major (default).
122 | 
123 |         **kwargs:
124 |             Additional arguments.
125 | 
126 |     Returns:
127 |         A NumPy array or wrapped array if dimension names exist.
128 |     """
129 |     _cls = get_class(robject)
130 | 
131 |     if order not in ["C", "F"]:
132 |         raise ValueError("order must be either 'C' or 'F'.")
133 | 
134 |     if _cls not in ["ndarray"]:
135 |         raise TypeError(f"obj is not a supported dense matrix format, but is `{_cls}`.")
136 | 
137 |     mat = ndarray(
138 |         shape=tuple(robject["attributes"]["dim"]["data"].tolist()),
139 |         dtype=robject["data"].dtype,
140 |         buffer=robject["data"],
141 |         order=order,
142 |     )
143 | 
144 |     names = None
145 |     if "dimnames" in robject["attributes"]:
146 |         names = _dispatcher(robject["attributes"]["dimnames"], **kwargs)
147 |         if names is not None and len(names) > 0:
148 |             return MatrixWrapper(mat, names)
149 | 
150 |     return mat
151 | 
152 | 
153 | def read_dgcmatrix(robject: dict, **kwargs):
154 |     """Parse an R dgCMatrix (sparse column matrix).
155 | 
156 |     Args:
157 |         robject:
158 |             Dictionary containing parsed dgCMatrix data.
159 | 
160 |         **kwargs:
161 |             Additional arguments.
162 | 
163 |     Returns:
164 |         Parsed sparse column matrix.
165 |     """
166 |     return _as_sparse_matrix(robject, **kwargs)
167 | 
168 | 
169 | def read_dgrmatrix(robject: dict, **kwargs):
170 |     """Parse an R dgRMatrix (sparse row matrix).
171 | 
172 |     Args:
173 |         robject:
174 |             Dictionary containing parsed dgRMatrix data.
175 | 
176 |         **kwargs:
177 |             Additional arguments.
178 | 
179 |     Returns:
180 |         Parsed sparse row matrix.
181 |     """
182 |     return _as_sparse_matrix(robject, **kwargs)
183 | 
184 | 
185 | def read_dgtmatrix(robject: dict, **kwargs):
186 |     """Parse an R dgTMatrix (sparse triplet matrix)..
187 | 
188 |     Args:
189 |         robject:
190 |             Dictionary containing parsed dgTMatrix data.
191 | 
192 |         **kwargs:
193 |             Additional arguments.
194 | 
195 |     Returns:
196 |         Parsed sparse matrix.
197 |     """
198 |     return _as_sparse_matrix(robject, **kwargs)
199 | 
200 | 
201 | def read_ndarray(robject: dict, order: Literal["C", "F"] = "F", **kwargs) -> ndarray:
202 |     """Parse an R matrix as a NumPy array.
203 | 
204 |     Args:
205 |         robject:
206 |             Dictionary containing parsed dgCMatrix data.
207 | 
208 |         order:
209 |             Memory layout for the array.
210 | 
211 |         **kwargs:
212 |             Additional arguments.
213 | 
214 |     Returns:
215 |         Parsed dense array.
216 |     """
217 |     return _as_dense_matrix(robject, order=order, **kwargs)
218 | 


--------------------------------------------------------------------------------
/src/rds2py/read_rle.py:
--------------------------------------------------------------------------------
 1 | """Functions for parsing R's Rle (Run-length encoding) objects.
 2 | 
 3 | This module provides functionality to convert R's Rle (Run-length encoding) objects into Python lists, expanding the
 4 | compressed representation into its full form.
 5 | """
 6 | 
 7 | from .generics import _dispatcher
 8 | from .rdsutils import get_class
 9 | 
10 | __author__ = "jkanche"
11 | __copyright__ = "jkanche"
12 | __license__ = "MIT"
13 | 
14 | 
15 | def read_rle(robject: dict, **kwargs) -> list:
16 |     """Convert an R Rle object to a Python list.
17 | 
18 |     Args:
19 |         robject:
20 |             Dictionary containing parsed Rle data.
21 | 
22 |         **kwargs:
23 |             Additional arguments.
24 | 
25 |     Returns:
26 |         Expanded list where each value is repeated according to its run length.
27 | 
28 |     Example:
29 |         >>> # For Rle with values=[1,2] and lengths=[3,2]
30 |         >>> result = read_rle(robject)
31 |         >>> print(result)
32 |         [1, 1, 1, 2, 2]
33 |     """
34 |     _cls = get_class(robject)
35 | 
36 |     if _cls != "Rle":
37 |         raise RuntimeError(f"`robject` does not contain a 'Rle' object, contains `{_cls}`.")
38 | 
39 |     data = list(_dispatcher(robject["attributes"]["values"], **kwargs))
40 | 
41 |     if "lengths" in robject["attributes"]:
42 |         lengths = _dispatcher(robject["attributes"]["lengths"], **kwargs)
43 |     else:
44 |         lengths = [1] * len(data)
45 | 
46 |     final_vec = []
47 |     for i, x in enumerate(lengths):
48 |         final_vec.extend([data[i]] * x)
49 | 
50 |     return final_vec
51 | 


--------------------------------------------------------------------------------
/src/rds2py/read_sce.py:
--------------------------------------------------------------------------------
 1 | """Functions for parsing Bioconductor `SingleCellExperiment` objects.
 2 | 
 3 | This module provides parsers for converting Bioconductor's `SingleCellExperiment`
 4 | objects into their Python equivalents, handling the complex structure of single-cell
 5 | data including multiple assays, reduced dimensions, and alternative experiments.
 6 | """
 7 | 
 8 | from .generics import _dispatcher
 9 | from .rdsutils import get_class
10 | 
11 | __author__ = "jkanche"
12 | __copyright__ = "jkanche"
13 | __license__ = "MIT"
14 | 
15 | 
16 | def read_alts_summarized_experiment_by_column(robject: dict, **kwargs):
17 |     """Parse alternative experiments in a SingleCellExperiment."""
18 |     _cls = get_class(robject)
19 | 
20 |     if _cls not in ["SummarizedExperimentByColumn"]:
21 |         raise RuntimeError(f"`robject` does not contain a 'SummarizedExperimentByColumn' object, contains `{_cls}`.")
22 | 
23 |     objs = {}
24 | 
25 |     for key, val in robject["attributes"].items():
26 |         objs[key] = _dispatcher(val, **kwargs)
27 | 
28 |     return objs
29 | 
30 | 
31 | def read_single_cell_experiment(robject: dict, **kwargs):
32 |     """Convert an R SingleCellExperiment to Python SingleCellExperiment.
33 | 
34 |     Args:
35 |         robject:
36 |             Dictionary containing parsed SingleCellExperiment data.
37 | 
38 |         **kwargs:
39 |             Additional arguments.
40 | 
41 |     Returns:
42 |         A Python SingleCellExperiment object containing
43 |         the assay data and associated metadata.
44 |     """
45 | 
46 |     _cls = get_class(robject)
47 | 
48 |     if _cls not in ["SingleCellExperiment"]:
49 |         raise RuntimeError(f"`robject` does not contain a 'SingleCellExperiment' object, contains `{_cls}`.")
50 | 
51 |     robject["class_name"] = "RangedSummarizedExperiment"
52 |     _rse = _dispatcher(robject, **kwargs)
53 | 
54 |     # check red. dims, alternative expts
55 |     robj_reduced_dims = None
56 |     robj_alt_exps = None
57 |     col_attrs = list(
58 |         _dispatcher(robject["attributes"]["int_colData"]["attributes"]["listData"]["attributes"]["names"], **kwargs)
59 |     )
60 | 
61 |     for idx in range(len(col_attrs)):
62 |         idx_col = col_attrs[idx]
63 |         idx_value = robject["attributes"]["int_colData"]["attributes"]["listData"]["data"][idx]
64 | 
65 |         if idx_col == "reducedDims" and idx_value.get("data", None) is not None:
66 |             robj_reduced_dims = _dispatcher(idx_value, **kwargs)
67 | 
68 |         if idx_col == "altExps":
69 |             alt_names = list(_dispatcher(idx_value["attributes"]["listData"]["attributes"]["names"], **kwargs))
70 |             robj_alt_exps = {}
71 |             for idx, altn in enumerate(alt_names):
72 |                 robj_alt_exps[altn] = _dispatcher(idx_value["attributes"]["listData"]["data"][idx], **kwargs)["se"]
73 | 
74 |         # ignore colpairs for now, does anyone even use this ?
75 |         # if col == "colPairs":
76 | 
77 |     from singlecellexperiment import SingleCellExperiment
78 | 
79 |     return SingleCellExperiment(
80 |         assays=_rse.assays,
81 |         row_data=_rse.row_data,
82 |         column_data=_rse.column_data,
83 |         row_ranges=_rse.row_ranges,
84 |         alternative_experiments=robj_alt_exps,
85 |         reduced_dims=robj_reduced_dims,
86 |     )
87 | 


--------------------------------------------------------------------------------
/src/rds2py/read_se.py:
--------------------------------------------------------------------------------
  1 | """Functions for parsing Bioconductor `SummarizedExperiment` objects.
  2 | 
  3 | This module provides parsers for converting Bioconductor's `SummarizedExperiment`
  4 | objects into their Python equivalents.
  5 | """
  6 | 
  7 | from .generics import _dispatcher
  8 | from .rdsutils import get_class
  9 | from .read_matrix import MatrixWrapper
 10 | 
 11 | __author__ = "jkanche"
 12 | __copyright__ = "jkanche"
 13 | __license__ = "MIT"
 14 | 
 15 | 
 16 | def _sanitize_empty_frame(frame, nrows):
 17 |     if frame.shape == (0, 0):
 18 |         from biocframe import BiocFrame
 19 | 
 20 |         return BiocFrame(number_of_rows=nrows)
 21 | 
 22 |     return frame
 23 | 
 24 | 
 25 | def _sanitize_assays(assays):
 26 |     res = {}
 27 |     for k, v in assays.items():
 28 |         if isinstance(v, MatrixWrapper):
 29 |             res[k] = v.matrix
 30 |         else:
 31 |             res[k] = v
 32 | 
 33 |     return res
 34 | 
 35 | 
 36 | def read_summarized_experiment(robject: dict, **kwargs):
 37 |     """Convert an R SummarizedExperiment to Python
 38 |     :py:class:`~summarizedexperiment.SummarizedExperiment.SummarizedExperiment`.
 39 | 
 40 |     Args:
 41 |         robject:
 42 |             Dictionary containing parsed SummarizedExperiment data.
 43 | 
 44 |         **kwargs:
 45 |             Additional arguments.
 46 | 
 47 |     Returns:
 48 |         A `SummarizedExperiment` from the R object.
 49 |     """
 50 | 
 51 |     _cls = get_class(robject)
 52 | 
 53 |     if _cls not in ["SummarizedExperiment"]:
 54 |         raise RuntimeError(f"`robject` does not contain a 'SummarizedExperiment' object, contains `{_cls}`.")
 55 |     # parse assays  names
 56 |     robj_asys = {}
 57 |     assay_dims = None
 58 |     asy_names = list(
 59 |         _dispatcher(
 60 |             robject["attributes"]["assays"]["attributes"]["data"]["attributes"]["listData"]["attributes"]["names"],
 61 |             **kwargs,
 62 |         )
 63 |     )
 64 |     for idx, asyname in enumerate(asy_names):
 65 |         idx_asy = robject["attributes"]["assays"]["attributes"]["data"]["attributes"]["listData"]["data"][idx]
 66 | 
 67 |         robj_asys[asyname] = _dispatcher(idx_asy, **kwargs)
 68 |         if assay_dims is None:
 69 |             assay_dims = robj_asys[asyname].shape
 70 | 
 71 |     # parse coldata
 72 |     robj_coldata = _sanitize_empty_frame(_dispatcher(robject["attributes"]["colData"], **kwargs), assay_dims[1])
 73 | 
 74 |     # parse rowdata
 75 |     robj_rowdata = _sanitize_empty_frame(_dispatcher(robject["attributes"]["elementMetadata"], **kwargs), assay_dims[0])
 76 | 
 77 |     from summarizedexperiment import SummarizedExperiment
 78 | 
 79 |     return SummarizedExperiment(
 80 |         assays=_sanitize_assays(robj_asys),
 81 |         row_data=robj_rowdata,
 82 |         column_data=robj_coldata,
 83 |     )
 84 | 
 85 | 
 86 | def read_ranged_summarized_experiment(robject: dict, **kwargs):
 87 |     """Convert an R RangedSummarizedExperiment to its Python equivalent.
 88 | 
 89 |     Args:
 90 |         robject:
 91 |             Dictionary containing parsed SummarizedExperiment data.
 92 | 
 93 |         **kwargs:
 94 |             Additional arguments.
 95 | 
 96 |     Returns:
 97 |         A Python RangedSummarizedExperiment object.
 98 |     """
 99 | 
100 |     _cls = get_class(robject)
101 | 
102 |     if _cls not in ["RangedSummarizedExperiment"]:
103 |         raise RuntimeError(f"`robject` does not contain a 'RangedSummarizedExperiment' object, contains `{_cls}`.")
104 | 
105 |     robject["class_name"] = "SummarizedExperiment"
106 |     _se = _dispatcher(robject, **kwargs)
107 | 
108 |     # parse rowRanges
109 |     row_ranges_data = None
110 |     if "rowRanges" in robject["attributes"]:
111 |         row_ranges_data = _dispatcher(robject["attributes"]["rowRanges"], **kwargs)
112 | 
113 |     from summarizedexperiment import RangedSummarizedExperiment
114 | 
115 |     return RangedSummarizedExperiment(
116 |         assays=_se.assays,
117 |         row_data=_se.row_data,
118 |         column_data=_se.column_data,
119 |         row_ranges=row_ranges_data,
120 |     )
121 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | """Dummy conftest.py for rds2py.
 2 | 
 3 | If you don't know what this is for, just leave it empty.
 4 | Read more about conftest.py under:
 5 | - https://docs.pytest.org/en/stable/fixture.html
 6 | - https://docs.pytest.org/en/stable/writing_plugins.html
 7 | """
 8 | 
 9 | # import pytest
10 | 


--------------------------------------------------------------------------------
/tests/data/atomic_attr.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/atomic_attr.rds


--------------------------------------------------------------------------------
/tests/data/atomic_chars.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/atomic_chars.rds


--------------------------------------------------------------------------------
/tests/data/atomic_chars_unicode.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/atomic_chars_unicode.rds


--------------------------------------------------------------------------------
/tests/data/atomic_complex.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/atomic_complex.rds


--------------------------------------------------------------------------------
/tests/data/atomic_double.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/atomic_double.rds


--------------------------------------------------------------------------------
/tests/data/atomic_ints.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/atomic_ints.rds


--------------------------------------------------------------------------------
/tests/data/atomic_ints_with_names.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/atomic_ints_with_names.rds


--------------------------------------------------------------------------------
/tests/data/atomic_logical.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/atomic_logical.rds


--------------------------------------------------------------------------------
/tests/data/atomic_logical_wNA.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/atomic_logical_wNA.rds


--------------------------------------------------------------------------------
/tests/data/atomic_raw.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/atomic_raw.rds


--------------------------------------------------------------------------------
/tests/data/data.frame.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/data.frame.rds


--------------------------------------------------------------------------------
/tests/data/example_anndata.h5ad:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/example_anndata.h5ad


--------------------------------------------------------------------------------
/tests/data/generate_files.R:
--------------------------------------------------------------------------------
  1 | # # pairlist
  2 | 
  3 | # y <- pairlist(runif(10), runif(20), runif(30))
  4 | # saveRDS(y, file="pairlist.rds")
  5 | 
  6 | # y <- pairlist(sample(letters), pairlist(sample(11), runif(12)))
  7 | # saveRDS(y, file="pairlist_nested.rds")
  8 | 
  9 | # y <- pairlist(foo=sample(letters), bar=pairlist(whee=sample(11), bum=runif(12))) # with names
 10 | # saveRDS(y, file="pairlist_names.rds")
 11 | 
 12 | # y <- pairlist(aaron=sample(letters), bar=list(sample(11), runif(12)))
 13 | # attr(y, "foo") <- "bar"
 14 | # saveRDS(y, file="pairlist_attr.rds")
 15 | 
 16 | 
 17 | # altrep
 18 | 
 19 | # scenarios <- 1:15
 20 | # saveRDS(y, file="altrep_series.rds")
 21 | 
 22 | # x <- 1:100
 23 | # names(x) <- sprintf("GENE_%s", seq_along(x))
 24 | # saveRDS(x, file="altrep_attr.rds")
 25 | 
 26 | # x <- as.character(1:100)
 27 | # saveRDS(x, file="altrep_strings_deferred.rds")
 28 | 
 29 | # x <- c(NA_integer_, 1:10, NA_integer_)
 30 | # x <- as.character(x)
 31 | # saveRDS(x, file="altrep_strings_wNA.rds")
 32 | 
 33 | # x <- as.character(1:100 * 2)
 34 | # saveRDS(x, file="altrep_double_deferred.rds")
 35 | 
 36 | # x <- c(NaN, 1:10, Inf, -Inf, NA)
 37 | # x <- as.character(x)
 38 | # saveRDS(x, file="altrep_double_wNA.rds")
 39 | 
 40 | # atomic
 41 | 
 42 | y <- rpois(112, lambda=8)
 43 | saveRDS(y, file="atomic_ints.rds")
 44 | 
 45 | y <- rbinom(55, 1, 0.5) == 0
 46 | saveRDS(y, file="atomic_logical.rds")
 47 | 
 48 | y <- rbinom(999, 1, 0.5) == 0
 49 | y[sample(length(y), 10)] <- NA
 50 | saveRDS(y, file="atomic_logical_wNA.rds")
 51 | 
 52 | y <- rnorm(99)
 53 | saveRDS(y, file="atomic_double.rds")
 54 | 
 55 | y <- as.raw(sample(256, 99, replace=TRUE) - 1)
 56 | saveRDS(y, file="atomic_raw.rds")
 57 | 
 58 | y <- rnorm(99) + rnorm(99) * 1i
 59 | saveRDS(y, file="atomic_complex.rds")
 60 | 
 61 | y <- sample(LETTERS)
 62 | saveRDS(y, file="atomic_chars.rds")
 63 | 
 64 | y <- c("α-globin", "😀😀😀", "fußball", "Hervé Pagès")
 65 | saveRDS(y, file="atomic_chars_unicode.rds")
 66 | 
 67 | vals <- sample(.Machine$integer.max, 1000)
 68 | names(vals) <- sprintf("GENE_%i", seq_along(vals))
 69 | attr(vals, "foo") <- c("BAR", "bar", "Bar")
 70 | class(vals) <- "frog"
 71 | saveRDS(vals, file="atomic_attr.rds")
 72 | 
 73 | # scalars
 74 | 
 75 | y <- 10
 76 | saveRDS(y, file="scalar_int.rds")
 77 | 
 78 | # lists
 79 | 
 80 | y <- list(runif(10), runif(20), runif(30))
 81 | saveRDS(y, file="lists.rds")
 82 | 
 83 | y <- list(sample(letters), list(sample(11), runif(12)))
 84 | saveRDS(y, file="lists_nested.rds")
 85 | 
 86 | y <- list(list(2, 6), list(5, c("cat", "dog", "bouse"), list(sample(99), runif(20))))
 87 | saveRDS(y, file="lists_nested_deep.rds")
 88 | 
 89 | df <- data.frame(xxx=runif(19), YYY=sample(letters, 19), ZZZ=rbinom(19, 1, 0.4) == 0)
 90 | saveRDS(df, file="lists_df.rds")
 91 | 
 92 | rownames(df) <- paste0("FOO-", LETTERS[1:19])
 93 | saveRDS(df, file="lists_df_rownames.rds")
 94 | 
 95 | # S4
 96 | 
 97 | y <- Matrix::rsparsematrix(100, 10, 0.05)
 98 | saveRDS(y, file="s4_matrix.rds")
 99 | 
100 | rownames(y) <- paste("row", 1:nrow(y), sep="_")
101 | saveRDS(y, file="matrix_with_row_names.rds")
102 | 
103 | colnames(y) <- paste("col", 1:ncol(y), sep="_")
104 | saveRDS(y, file="matrix_with_dim_names.rds")
105 | 
106 | setClass("FOO", slots=c(bar="integer"))
107 | y <- new("FOO", bar=2L)
108 | saveRDS(y, file="s4_class.rds")
109 | 
110 | # GenomicRanges
111 | 
112 | gr <- GRanges(
113 |     seqnames = Rle(c("chr1", "chr2", "chr1", "chr3"), c(1, 3, 2, 4)),
114 |     ranges = IRanges(101:110, end = 111:120, names = head(letters, 10)),
115 |     strand = Rle(strand(c("-", "+", "*", "+", "-")), c(1, 2, 2, 3, 2)),
116 |     score = 1:10,
117 |     GC = seq(1, 0, length=10))
118 | 
119 | saveRDS(gr, file="granges.rds")
120 | 
121 | # factors
122 | 
123 | f1 <- factor(c("chr1", "chr2", "chr1", "chr3"))
124 | saveRDS(f1, "simple_factors.rds")
125 | 
126 | # Rle
127 | x2 <- Rle(LETTERS[c(21:26, 25:26)], 8:1)
128 | saveRDS(x2, "simple_rle.rds")
129 | 
130 | 
131 | # SummarizedExperiment
132 | 
133 | nrows <- 200
134 | ncols <- 6
135 | counts <- matrix(runif(nrows * ncols, 1, 1e4), nrows)
136 | rowRanges <- GRanges(rep(c("chr1", "chr2"), c(50, 150)),
137 |                      IRanges(floor(runif(200, 1e5, 1e6)), width=100),
138 |                      strand=sample(c("+", "-"), 200, TRUE),
139 |                      feature_id=sprintf("ID%03d", 1:200))
140 | rowd <- DataFrame(seqs = rep(c("chr1", "chr2"), c(50, 150)))
141 | colData <- DataFrame(Treatment=rep(c("ChIP", "Input"), 3),
142 |                      row.names=LETTERS[1:6])
143 | 
144 | se <- SummarizedExperiment(assays=list(counts=counts),
145 |                      rowData = rowd, colData=colData)
146 | 
147 | rse <- SummarizedExperiment(assays=list(counts=counts),
148 |                             rowRanges = rowRanges, colData=colData)
149 | saveRDS(se, "sumexpt.rds")
150 | saveRDS(rse, "ranged_se.rds")
151 | 
152 | # SingleCell Experiment
153 | 
154 | library(scRNAseq)
155 | sce <- ReprocessedAllenData("tophat_counts")
156 | sce_subset <- sce[1:100, 1:100]
157 | saveRDS(sce_subset, "simple_sce.rds")
158 | 
159 | # lists
160 | 
161 | x <- list(github = "jkanche", fullname=c("Kancherla", "Jayaram"),
162 |           collab=list(github = "ltla", fullname=c("Lun", "Aaron")))
163 | saveRDS(x, "simple_list.rds")
164 | 
165 | # frames
166 | dframe <- as.data.frame(lists_df)
167 | saveRDS(dframe, "data.frame.rds")
168 | 
169 | # MAE
170 | library(MultiAssayExperiment)
171 | patient.data <- data.frame(sex=c("M", "F", "M", "F"),
172 |                            age=38:41,
173 |                            row.names=c("Jack", "Jill", "Bob", "Barbara"))
174 | 
175 | exprss1 <- matrix(rnorm(16), ncol = 4,
176 |                   dimnames = list(sprintf("ENST00000%i", sample(288754:290000, 4)),
177 |                                   c("Jack", "Jill", "Bob", "Bobby")))
178 | exprss2 <- matrix(rnorm(12), ncol = 3,
179 |                   dimnames = list(sprintf("ENST00000%i", sample(288754:290000, 4)),
180 |                                   c("Jack", "Jane", "Bob")))
181 | doubleExp <- list("methyl 2k"  = exprss1, "methyl 3k" = exprss2)
182 | simpleMultiAssay <- MultiAssayExperiment(experiments=doubleExp)
183 | simpleMultiAssay2 <- MultiAssayExperiment(experiments=doubleExp,
184 |                                           colData=patient.data)
185 | saveRDS(simpleMultiAssay2, "simple_mae.rds")
186 | 
187 | ## Delayed Arrays
188 | 
189 | library(zellkonverter)
190 | h5ad_file <- system.file("extdata", "example_anndata.h5ad",
191 |                          package="zellkonverter")
192 | h5ls(h5ad_file)
193 | 
194 | M <- H5SparseMatrix(h5ad_file, "/obsp/connectivities")
195 | saveRDS(M, "h5sparse.rds")
196 | 


--------------------------------------------------------------------------------
/tests/data/granges.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/granges.rds


--------------------------------------------------------------------------------
/tests/data/grangeslist.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/grangeslist.rds


--------------------------------------------------------------------------------
/tests/data/h5sparse.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/h5sparse.rds


--------------------------------------------------------------------------------
/tests/data/lists.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/lists.rds


--------------------------------------------------------------------------------
/tests/data/lists_df.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/lists_df.rds


--------------------------------------------------------------------------------
/tests/data/lists_df_rownames.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/lists_df_rownames.rds


--------------------------------------------------------------------------------
/tests/data/lists_nested.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/lists_nested.rds


--------------------------------------------------------------------------------
/tests/data/lists_nested_deep.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/lists_nested_deep.rds


--------------------------------------------------------------------------------
/tests/data/matrix_with_dim_names.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/matrix_with_dim_names.rds


--------------------------------------------------------------------------------
/tests/data/matrix_with_row_names.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/matrix_with_row_names.rds


--------------------------------------------------------------------------------
/tests/data/numpy_dtype.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/numpy_dtype.rds


--------------------------------------------------------------------------------
/tests/data/ranged_se.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/ranged_se.rds


--------------------------------------------------------------------------------
/tests/data/s4_class.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/s4_class.rds


--------------------------------------------------------------------------------
/tests/data/s4_dense_matrix.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/s4_dense_matrix.rds


--------------------------------------------------------------------------------
/tests/data/s4_matrix.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/s4_matrix.rds


--------------------------------------------------------------------------------
/tests/data/s4_matrix_dgt.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/s4_matrix_dgt.rds


--------------------------------------------------------------------------------
/tests/data/scalar_int.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/scalar_int.rds


--------------------------------------------------------------------------------
/tests/data/simple_factors.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/simple_factors.rds


--------------------------------------------------------------------------------
/tests/data/simple_list.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/simple_list.rds


--------------------------------------------------------------------------------
/tests/data/simple_mae.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/simple_mae.rds


--------------------------------------------------------------------------------
/tests/data/simple_rle.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/simple_rle.rds


--------------------------------------------------------------------------------
/tests/data/simple_sce.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/simple_sce.rds


--------------------------------------------------------------------------------
/tests/data/sumexpt.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/sumexpt.rds


--------------------------------------------------------------------------------
/tests/test_atomics.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | from rds2py import read_rds
  4 | 
  5 | from biocutils import BooleanList, FloatList, IntegerList, StringList
  6 | 
  7 | __author__ = "jkanche"
  8 | __copyright__ = "jkanche"
  9 | __license__ = "MIT"
 10 | 
 11 | ## With attributes
 12 | 
 13 | 
 14 | def test_read_atomic_attrs():
 15 |     data = read_rds("tests/data/atomic_attr.rds")
 16 | 
 17 |     assert data is not None
 18 |     assert isinstance(data, dict)
 19 |     assert data["attributes"]["class"]["data"][0] == "frog"
 20 | 
 21 | 
 22 | ## Booleans
 23 | 
 24 | 
 25 | def test_read_atomic_logical():
 26 |     arr = read_rds("tests/data/atomic_logical.rds")
 27 | 
 28 |     assert arr is not None
 29 |     assert isinstance(arr, BooleanList)
 30 |     assert len(arr) > 0
 31 | 
 32 | 
 33 | def test_read_atomic_logical_na():
 34 |     arr = read_rds("tests/data/atomic_logical_wNA.rds")
 35 | 
 36 |     assert arr is not None
 37 |     assert isinstance(arr, BooleanList)
 38 |     assert len(arr) > 0
 39 | 
 40 | 
 41 | ## Doubles/Floats
 42 | 
 43 | 
 44 | def test_read_atomic_double():
 45 |     obj = read_rds("tests/data/atomic_double.rds")
 46 | 
 47 |     assert obj is not None
 48 |     assert isinstance(obj, FloatList)
 49 |     assert len(obj) == 99
 50 | 
 51 | 
 52 | ## Ints
 53 | 
 54 | 
 55 | def test_read_atomic_ints():
 56 |     arr = read_rds("tests/data/atomic_ints.rds")
 57 | 
 58 |     assert arr is not None
 59 |     assert isinstance(arr, IntegerList)
 60 |     assert len(arr) == 112
 61 |     assert arr.names is None
 62 | 
 63 | 
 64 | def test_read_atomic_ints_with_names():
 65 |     arr = read_rds("tests/data/atomic_ints_with_names.rds")
 66 | 
 67 |     assert arr is not None
 68 |     assert isinstance(arr, IntegerList)
 69 |     assert arr.names is not None
 70 |     assert len(arr) == 112
 71 | 
 72 | 
 73 | ## Strings
 74 | 
 75 | 
 76 | def test_read_atomic_chars():
 77 |     arr = read_rds("tests/data/atomic_chars.rds")
 78 | 
 79 |     assert arr is not None
 80 |     assert isinstance(arr, StringList)
 81 |     assert len(arr) == 26
 82 |     assert arr.names is None
 83 | 
 84 | 
 85 | def test_read_atomic_chars_unicode():
 86 |     arr = read_rds("tests/data/atomic_chars_unicode.rds")
 87 | 
 88 |     assert arr is not None
 89 |     assert isinstance(arr, StringList)
 90 |     assert len(arr) == 4
 91 |     assert arr.names is None
 92 | 
 93 | 
 94 | ## Test scalar values, defaults to a vector
 95 | 
 96 | 
 97 | def test_read_scalar_float():
 98 |     obj = read_rds("tests/data/scalar_int.rds")
 99 | 
100 |     assert obj is not None
101 |     assert isinstance(obj, FloatList)
102 |     assert len(obj) == 1
103 |     assert obj[0] == 10.0
104 | 


--------------------------------------------------------------------------------
/tests/test_delayedmatrices.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from rds2py import read_rds
 4 | from hdf5array import Hdf5CompressedSparseMatrix
 5 | 
 6 | __author__ = "jkanche"
 7 | __copyright__ = "jkanche"
 8 | __license__ = "MIT"
 9 | 
10 | @pytest.mark.skip(reason="delayedarray uses full file paths. this should be run locally.")
11 | def test_read_h5sparse():
12 |     array = read_rds("tests/data/h5sparse.rds")
13 | 
14 |     assert array is not None
15 |     assert isinstance(array, Hdf5CompressedSparseMatrix)
16 |     assert array.shape == (200, 200)
17 | 


--------------------------------------------------------------------------------
/tests/test_dict.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from rds2py import read_rds
 4 | 
 5 | __author__ = "jkanche"
 6 | __copyright__ = "jkanche"
 7 | __license__ = "MIT"
 8 | 
 9 | 
10 | def test_read_simple_lists():
11 |     obj = read_rds("tests/data/simple_list.rds")
12 | 
13 |     assert obj is not None
14 |     assert len(obj) > 0
15 | 
16 |     assert "collab" in obj
17 |     assert len(obj["collab"]) > 0
18 | 
19 | 
20 | def test_read_atomic_lists():
21 |     obj = read_rds("tests/data/lists.rds")
22 | 
23 |     assert obj is not None
24 |     assert len(obj) > 0
25 | 
26 | 
27 | def test_read_atomic_lists_nested():
28 |     obj = read_rds("tests/data/lists_nested.rds")
29 | 
30 |     assert obj is not None
31 |     assert len(obj) > 0
32 | 
33 | 
34 | def test_read_atomic_lists_nested_deep():
35 |     obj = read_rds("tests/data/lists_nested_deep.rds")
36 | 
37 |     assert obj is not None
38 |     assert len(obj) > 0
39 | 
40 | 
41 | def test_read_atomic_lists_df():
42 |     obj = read_rds("tests/data/lists_df.rds")
43 | 
44 |     assert obj is not None
45 |     assert len(obj) > 0
46 | 
47 | 
48 | def test_read_atomic_lists_nested_deep_rownames():
49 |     obj = read_rds("tests/data/lists_df_rownames.rds")
50 | 
51 |     assert obj is not None
52 |     assert len(obj) > 0
53 | 


--------------------------------------------------------------------------------
/tests/test_factors.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from rds2py import read_rds
 4 | 
 5 | __author__ = "jkanche"
 6 | __copyright__ = "jkanche"
 7 | __license__ = "MIT"
 8 | 
 9 | ## With attributes
10 | 
11 | 
12 | def test_read_simple_factors():
13 |     data = read_rds("tests/data/simple_factors.rds")
14 | 
15 |     assert data is not None
16 |     assert len(data) == 4
17 | 


--------------------------------------------------------------------------------
/tests/test_frames.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from rds2py import read_rds
 4 | from biocframe import BiocFrame
 5 | 
 6 | __author__ = "jkanche"
 7 | __copyright__ = "jkanche"
 8 | __license__ = "MIT"
 9 | 
10 | 
11 | def test_read_atomic_lists_df():
12 |     frame = read_rds("tests/data/lists_df.rds")
13 | 
14 |     assert frame is not None
15 |     assert isinstance(frame, BiocFrame)
16 |     assert len(frame) > 0
17 | 
18 | 
19 | def test_read_atomic_lists_nested_deep_rownames():
20 |     frame = read_rds("tests/data/lists_df_rownames.rds")
21 | 
22 |     assert frame is not None
23 |     assert isinstance(frame, BiocFrame)
24 |     assert len(frame) > 0
25 | 


--------------------------------------------------------------------------------
/tests/test_granges.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from rds2py import read_rds
 4 | 
 5 | from genomicranges import GenomicRanges, GenomicRangesList
 6 | import numpy as np
 7 | 
 8 | __author__ = "jkanche"
 9 | __copyright__ = "jkanche"
10 | __license__ = "MIT"
11 | 
12 | 
13 | def test_granges():
14 |     gr = read_rds("tests/data/granges.rds")
15 | 
16 |     assert isinstance(gr, GenomicRanges)
17 |     assert gr.get_seqnames("list") == [
18 |         "chr1",
19 |         "chr2",
20 |         "chr2",
21 |         "chr2",
22 |         "chr1",
23 |         "chr1",
24 |         "chr3",
25 |         "chr3",
26 |         "chr3",
27 |         "chr3",
28 |     ]
29 |     assert np.allclose(gr.get_start(), range(101, 111))
30 |     assert len(gr.get_mcols().get_column_names()) == 2
31 |     assert gr.get_strand("list") == ["-", "+", "+", "*", "*", "+", "+", "+", "-", "-"]
32 | 
33 | 
34 | def test_granges_list():
35 |     gr = read_rds("tests/data/grangeslist.rds")
36 | 
37 |     assert isinstance(gr, GenomicRangesList)
38 |     assert len(gr) == 5
39 | 


--------------------------------------------------------------------------------
/tests/test_mae.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from rds2py import read_rds
 4 | 
 5 | from multiassayexperiment import MultiAssayExperiment
 6 | 
 7 | __author__ = "jkanche"
 8 | __copyright__ = "jkanche"
 9 | __license__ = "MIT"
10 | 
11 | 
12 | def test_read_sce():
13 |     data = read_rds("tests/data/simple_mae.rds")
14 | 
15 |     assert data is not None
16 |     assert isinstance(data, MultiAssayExperiment)
17 |     assert len(data.get_experiment_names()) == 2
18 | 


--------------------------------------------------------------------------------
/tests/test_matrices.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from rds2py import read_rds
 4 | import numpy as np
 5 | from scipy import sparse as sp
 6 | 
 7 | from rds2py.read_matrix import MatrixWrapper
 8 | 
 9 | __author__ = "jkanche"
10 | __copyright__ = "jkanche"
11 | __license__ = "MIT"
12 | 
13 | 
14 | def test_read_s4_matrix_dgc():
15 |     array = read_rds("tests/data/s4_matrix.rds")
16 | 
17 |     assert array is not None
18 |     assert isinstance(array, sp.spmatrix)
19 | 
20 | def test_read_s4_matrix_dgc_with_rownames():
21 |     array = read_rds("tests/data/matrix_with_row_names.rds")
22 | 
23 |     assert array is not None
24 |     assert isinstance(array, MatrixWrapper)
25 |     assert len(array.dimnames[0]) == 100
26 |     assert array.dimnames[1] is None
27 | 
28 | 
29 | def test_read_s4_matrix_dgc_with_bothnames():
30 |     array = read_rds("tests/data/matrix_with_dim_names.rds")
31 | 
32 |     assert array is not None
33 |     assert isinstance(array, MatrixWrapper)
34 |     assert len(array.dimnames[0]) == 100
35 |     assert len(array.dimnames[1]) == 10
36 | 
37 | def test_read_s4_matrix_dgt():
38 |     array = read_rds("tests/data/s4_matrix_dgt.rds")
39 | 
40 |     assert array is not None
41 |     assert isinstance(array, sp.spmatrix)
42 | 
43 | 
44 | def test_read_dense_numpy_dtype():
45 |     array = read_rds("tests/data/numpy_dtype.rds")
46 | 
47 |     assert array is not None
48 |     assert isinstance(array, MatrixWrapper)
49 |     assert isinstance(array.matrix, np.ndarray)
50 |     assert array.dimnames is not None
51 |     assert len(array.dimnames) == len(array.matrix.shape)
52 | 


--------------------------------------------------------------------------------
/tests/test_rle.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from rds2py import read_rds
 4 | 
 5 | from biocutils import BooleanList, FloatList, IntegerList, StringList
 6 | 
 7 | __author__ = "jkanche"
 8 | __copyright__ = "jkanche"
 9 | __license__ = "MIT"
10 | 
11 | ## With attributes
12 | 
13 | 
14 | def test_read_simple_rle():
15 |     data = read_rds("tests/data/simple_rle.rds")
16 | 
17 |     assert data is not None
18 |     assert len(data) == 36
19 | 


--------------------------------------------------------------------------------
/tests/test_s4.py:
--------------------------------------------------------------------------------
 1 | # import pytest
 2 | 
 3 | from rds2py.PyRdsReader import PyRdsParser
 4 | 
 5 | # __author__ = "jkanche"
 6 | # __copyright__ = "jkanche"
 7 | # __license__ = "MIT"
 8 | 
 9 | 
10 | def test_read_s4_class():
11 |     parsed_obj = PyRdsParser("tests/data/s4_class.rds")
12 |     robject_obj = parsed_obj.parse()
13 | 
14 |     assert robject_obj is not None
15 | 
16 | 
17 | def test_read_s4_matrix():
18 |     parsed_obj = PyRdsParser("tests/data/s4_matrix.rds")
19 |     robject_obj = parsed_obj.parse()
20 | 
21 |     assert robject_obj is not None
22 | 
23 | 
24 | def test_read_s4_matrix_dgt():
25 |     parsed_obj = PyRdsParser("tests/data/s4_matrix_dgt.rds")
26 |     robject_obj = parsed_obj.parse()
27 | 
28 |     assert robject_obj is not None
29 | 


--------------------------------------------------------------------------------
/tests/test_sce.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from rds2py import read_rds
 4 | 
 5 | from singlecellexperiment import SingleCellExperiment
 6 | 
 7 | __author__ = "jkanche"
 8 | __copyright__ = "jkanche"
 9 | __license__ = "MIT"
10 | 
11 | 
12 | def test_read_sce():
13 |     data = read_rds("tests/data/simple_sce.rds")
14 | 
15 |     assert data is not None
16 |     assert isinstance(data, SingleCellExperiment)
17 |     assert data.shape == (100, 100)
18 | 


--------------------------------------------------------------------------------
/tests/test_se.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from rds2py import read_rds
 4 | 
 5 | from summarizedexperiment import SummarizedExperiment, RangedSummarizedExperiment
 6 | 
 7 | __author__ = "jkanche"
 8 | __copyright__ = "jkanche"
 9 | __license__ = "MIT"
10 | 
11 | 
12 | def test_read_summ_expt():
13 |     data = read_rds("tests/data/sumexpt.rds")
14 | 
15 |     assert data is not None
16 |     assert isinstance(data, SummarizedExperiment)
17 |     assert data.shape == (200, 6)
18 | 
19 | 
20 | def test_read_ranged_summ_expt():
21 |     data = read_rds("tests/data/ranged_se.rds")
22 | 
23 |     assert data is not None
24 |     assert isinstance(data, RangedSummarizedExperiment)
25 |     assert data.shape == (200, 6)
26 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | # Tox configuration file
 2 | # Read more under https://tox.wiki/
 3 | # THIS SCRIPT IS SUPPOSED TO BE AN EXAMPLE. MODIFY IT ACCORDING TO YOUR NEEDS!
 4 | 
 5 | [tox]
 6 | minversion = 3.24
 7 | envlist = default
 8 | isolated_build = True
 9 | 
10 | 
11 | [testenv]
12 | description = Invoke pytest to run automated tests
13 | setenv =
14 |     TOXINIDIR = {toxinidir}
15 | passenv =
16 |     HOME
17 |     SETUPTOOLS_*
18 | extras =
19 |     testing
20 | commands =
21 |     pytest {posargs}
22 | 
23 | 
24 | # # To run `tox -e lint` you need to make sure you have a
25 | # # `.pre-commit-config.yaml` file. See https://pre-commit.com
26 | # [testenv:lint]
27 | # description = Perform static analysis and style checks
28 | # skip_install = True
29 | # deps = pre-commit
30 | # passenv =
31 | #     HOMEPATH
32 | #     PROGRAMDATA
33 | #     SETUPTOOLS_*
34 | # commands =
35 | #     pre-commit run --all-files {posargs:--show-diff-on-failure}
36 | 
37 | 
38 | [testenv:{build,clean}]
39 | description =
40 |     build: Build the package in isolation according to PEP517, see https://github.com/pypa/build
41 |     clean: Remove old distribution files and temporary build artifacts (./build and ./dist)
42 | # https://setuptools.pypa.io/en/stable/build_meta.html#how-to-use-it
43 | skip_install = True
44 | changedir = {toxinidir}
45 | deps =
46 |     build: build[virtualenv]
47 | passenv =
48 |     SETUPTOOLS_*
49 | commands =
50 |     clean: python -c 'import shutil; [shutil.rmtree(p, True) for p in ("build", "dist", "docs/_build")]'
51 |     clean: python -c 'import pathlib, shutil; [shutil.rmtree(p, True) for p in pathlib.Path("src").glob("*.egg-info")]'
52 |     build: python -m build {posargs}
53 | 
54 | 
55 | [testenv:{docs,doctests,linkcheck}]
56 | description =
57 |     docs: Invoke sphinx-build to build the docs
58 |     doctests: Invoke sphinx-build to run doctests
59 |     linkcheck: Check for broken links in the documentation
60 | passenv =
61 |     SETUPTOOLS_*
62 | setenv =
63 |     DOCSDIR = {toxinidir}/docs
64 |     BUILDDIR = {toxinidir}/docs/_build
65 |     docs: BUILD = html
66 |     doctests: BUILD = doctest
67 |     linkcheck: BUILD = linkcheck
68 | deps =
69 |     -r {toxinidir}/docs/requirements.txt
70 |     # ^  requirements.txt shared with Read The Docs
71 | commands =
72 |     sphinx-build --color -b {env:BUILD} -d "{env:BUILDDIR}/doctrees" "{env:DOCSDIR}" "{env:BUILDDIR}/{env:BUILD}" {posargs}
73 | 
74 | 
75 | [testenv:publish]
76 | description =
77 |     Publish the package you have been developing to a package index server.
78 |     By default, it uses testpypi. If you really want to publish your package
79 |     to be publicly accessible in PyPI, use the `-- --repository pypi` option.
80 | skip_install = True
81 | changedir = {toxinidir}
82 | passenv =
83 |     # See: https://twine.readthedocs.io/en/latest/
84 |     TWINE_USERNAME
85 |     TWINE_PASSWORD
86 |     TWINE_REPOSITORY
87 | deps = twine
88 | commands =
89 |     python -m twine check dist/*
90 |     python -m twine upload {posargs:--repository {env:TWINE_REPOSITORY:testpypi}} dist/*
91 | 


--------------------------------------------------------------------------------