├── .coveragerc ├── .github └── workflows │ ├── build-docs.yml │ ├── publish-pypi.yml │ └── run-tests.yml ├── .gitignore ├── .pre-commit-config.yaml ├── .readthedocs.yml ├── AUTHORS.md ├── CHANGELOG.md ├── CONTRIBUTING.md ├── LICENSE.txt ├── README.md ├── docs ├── Makefile ├── _static │ └── .gitignore ├── authors.md ├── changelog.md ├── conf.py ├── contributing.md ├── index.md ├── license.md ├── readme.md ├── requirements.txt └── tutorial.md ├── lib ├── CMakeLists.txt └── src │ └── rdswrapper.cpp ├── pyproject.toml ├── setup.cfg ├── setup.py ├── src └── rds2py │ ├── PyRdsReader.py │ ├── __init__.py │ ├── generics.py │ ├── rdsutils.py │ ├── read_atomic.py │ ├── read_delayed_matrix.py │ ├── read_dict.py │ ├── read_factor.py │ ├── read_frame.py │ ├── read_granges.py │ ├── read_mae.py │ ├── read_matrix.py │ ├── read_rle.py │ ├── read_sce.py │ └── read_se.py ├── tests ├── conftest.py ├── data │ ├── atomic_attr.rds │ ├── atomic_chars.rds │ ├── atomic_chars_unicode.rds │ ├── atomic_complex.rds │ ├── atomic_double.rds │ ├── atomic_ints.rds │ ├── atomic_ints_with_names.rds │ ├── atomic_logical.rds │ ├── atomic_logical_wNA.rds │ ├── atomic_raw.rds │ ├── data.frame.rds │ ├── example_anndata.h5ad │ ├── generate_files.R │ ├── granges.rds │ ├── grangeslist.rds │ ├── h5sparse.rds │ ├── lists.rds │ ├── lists_df.rds │ ├── lists_df_rownames.rds │ ├── lists_nested.rds │ ├── lists_nested_deep.rds │ ├── matrix_with_dim_names.rds │ ├── matrix_with_row_names.rds │ ├── numpy_dtype.rds │ ├── ranged_se.rds │ ├── s4_class.rds │ ├── s4_dense_matrix.rds │ ├── s4_matrix.rds │ ├── s4_matrix_dgt.rds │ ├── scalar_int.rds │ ├── simple_factors.rds │ ├── simple_list.rds │ ├── simple_mae.rds │ ├── simple_rle.rds │ ├── simple_sce.rds │ └── sumexpt.rds ├── test_atomics.py ├── test_delayedmatrices.py ├── test_dict.py ├── test_factors.py ├── test_frames.py ├── test_granges.py ├── test_mae.py ├── test_matrices.py ├── test_rle.py ├── test_s4.py ├── test_sce.py └── test_se.py └── tox.ini /.coveragerc: -------------------------------------------------------------------------------- 1 | # .coveragerc to control coverage.py 2 | [run] 3 | branch = True 4 | source = rds2py 5 | # omit = bad_file.py 6 | 7 | [paths] 8 | source = 9 | src/ 10 | */site-packages/ 11 | 12 | [report] 13 | # Regexes for lines to exclude from consideration 14 | exclude_lines = 15 | # Have to re-enable the standard pragma 16 | pragma: no cover 17 | 18 | # Don't complain about missing debug-only code: 19 | def __repr__ 20 | if self\.debug 21 | 22 | # Don't complain if tests don't hit defensive assertion code: 23 | raise AssertionError 24 | raise NotImplementedError 25 | 26 | # Don't complain if non-runnable code isn't run: 27 | if 0: 28 | if __name__ == .__main__.: 29 | -------------------------------------------------------------------------------- /.github/workflows/build-docs.yml: -------------------------------------------------------------------------------- 1 | name: Build documentation 2 | 3 | on: 4 | push: 5 | tags: 6 | - "*" 7 | 8 | jobs: 9 | test: 10 | name: Build docs 11 | runs-on: ubuntu-latest 12 | 13 | steps: 14 | - uses: actions/checkout@v4 15 | 16 | - name: Set up Python 3.12 17 | uses: actions/setup-python@v5 18 | with: 19 | python-version: 3.12 20 | cache: 'pip' 21 | 22 | - name: Install Python dependencies 23 | run: | 24 | python -m pip install --upgrade pip setuptools 25 | pip install cmake pybind11 numpy tox 26 | 27 | - name: Build docs 28 | run: | 29 | python setup.py build_ext --inplace 30 | cp build/lib*/rds2py/lib_rds_parser* src/rds2py/ 31 | tox -e docs 32 | touch ./docs/_build/html/.nojekyll 33 | 34 | - name: GH Pages Deployment 35 | if: github.ref == 'refs/heads/master' || startsWith(github.ref, 'refs/tags/') 36 | uses: JamesIves/github-pages-deploy-action@v4 37 | with: 38 | branch: gh-pages # The branch the action should deploy to. 39 | folder: ./docs/_build/html 40 | clean: true # Automatically remove deleted files from the deploy branch 41 | -------------------------------------------------------------------------------- /.github/workflows/publish-pypi.yml: -------------------------------------------------------------------------------- 1 | name: Publish to PyPI 2 | 3 | on: 4 | push: 5 | tags: 6 | - "*" 7 | 8 | jobs: 9 | build_wheels: 10 | name: Build wheels on ${{ matrix.os }} 11 | 12 | runs-on: ${{ matrix.os }} 13 | 14 | strategy: 15 | matrix: 16 | # macos-13 is an intel runner, higher macos's are apple silicon 17 | # At some point, maybe get this to work on windows-latest 18 | os: [ubuntu-latest, macos-13, macos-latest] 19 | 20 | steps: 21 | - uses: actions/checkout@v4 22 | with: 23 | submodules: true 24 | 25 | - name: Build wheels 26 | uses: pypa/cibuildwheel@v2.22.0 27 | env: 28 | CIBW_ARCHS_LINUX: x86_64 # remove this later so we build for all linux archs 29 | CIBW_PROJECT_REQUIRES_PYTHON: ">=3.9" 30 | CIBW_SKIP: pp* 31 | 32 | - uses: actions/upload-artifact@v4 33 | with: 34 | name: cibw-wheels-${{ matrix.os }}-${{ strategy.job-index }} 35 | path: ./wheelhouse/*.whl 36 | 37 | build_sdist: 38 | name: Build source distribution 39 | runs-on: ubuntu-latest 40 | steps: 41 | - uses: actions/checkout@v4 42 | 43 | - name: Build sdist 44 | run: pipx run build --sdist 45 | 46 | - uses: actions/upload-artifact@v4 47 | with: 48 | name: cibw-sdist 49 | path: dist/*.tar.gz 50 | 51 | upload_pypi: 52 | needs: [build_wheels, build_sdist] 53 | runs-on: ubuntu-latest 54 | permissions: 55 | id-token: write 56 | repository-projects: write 57 | contents: write 58 | pages: write 59 | 60 | steps: 61 | - uses: actions/download-artifact@v4 62 | with: 63 | pattern: cibw-* 64 | path: dist 65 | merge-multiple: true 66 | 67 | # This uses the trusted publisher workflow so no token is required. 68 | - name: Publish to PyPI 69 | uses: pypa/gh-action-pypi-publish@release/v1 70 | -------------------------------------------------------------------------------- /.github/workflows/run-tests.yml: -------------------------------------------------------------------------------- 1 | name: Test the library 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | pull_request: 8 | 9 | jobs: 10 | test: 11 | runs-on: ubuntu-latest 12 | strategy: 13 | matrix: 14 | python-version: [ '3.9', '3.10', '3.11', '3.12', '3.13' ] 15 | 16 | name: Python ${{ matrix.python-version }} 17 | steps: 18 | - uses: actions/checkout@v4 19 | with: 20 | submodules: true 21 | 22 | - name: Setup Python 23 | uses: actions/setup-python@v5 24 | with: 25 | python-version: ${{ matrix.python-version }} 26 | cache: 'pip' 27 | 28 | - name: Get latest CMake 29 | uses: lukka/get-cmake@latest 30 | 31 | - name: Test with tox 32 | run: | 33 | pip install tox 34 | tox 35 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Temporary and binary files 2 | *~ 3 | *.py[cod] 4 | *.so 5 | *.cfg 6 | !.isort.cfg 7 | !setup.cfg 8 | *.orig 9 | *.log 10 | *.pot 11 | __pycache__/* 12 | .cache/* 13 | .*.swp 14 | */.ipynb_checkpoints/* 15 | .DS_Store 16 | 17 | # Project files 18 | .ropeproject 19 | .project 20 | .pydevproject 21 | .settings 22 | .idea 23 | .vscode 24 | tags 25 | 26 | # Package files 27 | *.egg 28 | *.eggs/ 29 | .installed.cfg 30 | *.egg-info 31 | 32 | # Unittest and coverage 33 | htmlcov/* 34 | .coverage 35 | .coverage.* 36 | .tox 37 | junit*.xml 38 | coverage.xml 39 | .pytest_cache/ 40 | 41 | # Build and docs folder/files 42 | build/* 43 | dist/* 44 | sdist/* 45 | docs/api/* 46 | docs/_rst/* 47 | docs/_build/* 48 | cover/* 49 | MANIFEST 50 | 51 | # Per-project virtualenvs 52 | .venv*/ 53 | .conda*/ 54 | .python-version 55 | 56 | extern/rds2cpp* 57 | src/rds2py/lib/parser.cpp 58 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | exclude: '^docs/conf.py' 2 | 3 | repos: 4 | - repo: https://github.com/pre-commit/pre-commit-hooks 5 | rev: v5.0.0 6 | hooks: 7 | - id: trailing-whitespace 8 | - id: check-added-large-files 9 | - id: check-ast 10 | - id: check-json 11 | - id: check-merge-conflict 12 | - id: check-xml 13 | - id: check-yaml 14 | - id: debug-statements 15 | - id: end-of-file-fixer 16 | - id: requirements-txt-fixer 17 | - id: mixed-line-ending 18 | args: ['--fix=auto'] # replace 'auto' with 'lf' to enforce Linux/Mac line endings or 'crlf' for Windows 19 | 20 | # - repo: https://github.com/PyCQA/docformatter 21 | # rev: "v1.7.5" 22 | # hooks: 23 | # - id: docformatter 24 | # additional_dependencies: [tomli] 25 | # args: [--in-place, --wrap-descriptions=120, --wrap-summaries=120] 26 | # # --config, ./pyproject.toml 27 | 28 | # - repo: https://github.com/psf/black 29 | # rev: 24.8.0 30 | # hooks: 31 | # - id: black 32 | # language_version: python3 33 | 34 | - repo: https://github.com/astral-sh/ruff-pre-commit 35 | # Ruff version. 36 | rev: v0.11.10 37 | hooks: 38 | - id: ruff 39 | args: [--fix, --exit-non-zero-on-fix] 40 | # Run the formatter. 41 | - id: ruff-format 42 | 43 | ## If like to embrace black styles even in the docs: 44 | # - repo: https://github.com/asottile/blacken-docs 45 | # rev: v1.13.0 46 | # hooks: 47 | # - id: blacken-docs 48 | # additional_dependencies: [black] 49 | 50 | ## Check for misspells in documentation files: 51 | # - repo: https://github.com/codespell-project/codespell 52 | # rev: v2.2.5 53 | # hooks: 54 | # - id: codespell 55 | -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | # Read the Docs configuration file 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 3 | 4 | # Required 5 | version: 2 6 | 7 | # Build documentation in the docs/ directory with Sphinx 8 | sphinx: 9 | configuration: docs/conf.py 10 | 11 | # Build documentation with MkDocs 12 | #mkdocs: 13 | # configuration: mkdocs.yml 14 | 15 | # Optionally build your docs in additional formats such as PDF 16 | formats: 17 | - pdf 18 | 19 | python: 20 | version: 3.8 21 | install: 22 | - requirements: docs/requirements.txt 23 | - {path: ., method: pip} 24 | -------------------------------------------------------------------------------- /AUTHORS.md: -------------------------------------------------------------------------------- 1 | # Contributors 2 | 3 | * Jayaram Kancherla [jayaram.kancherla@gmail.com](mailto:jayaram.kancherla@gmail.com) 4 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## Version 0.7.0 - 0.7.3 4 | 5 | - All dependencies are now listed under optional, except for numpy and biocutils. 6 | - Pin the version of byteme. 7 | - Fix an issue when trying to access shape of the `MatrixWrapper` objects. 8 | - Fix bug sanitizing empty data frame like objects. 9 | 10 | ## Version 0.6.1 11 | 12 | - Fix name of the attribute that contains names of dimensions in matrices. 13 | - Update relevant tests and generate new rds files to test matrix behavior. 14 | 15 | ## Version 0.6.0 16 | 17 | - chore: Remove Python 3.8 (EOL). 18 | - precommit: Replace docformatter with ruff's formatter. 19 | 20 | ## Version 0.5.1 21 | 22 | - Added parser for delayed sparse objects backed by H5 23 | 24 | ## Version 0.5.0 25 | 26 | - Complete overhaul of the codebase using pybind11 27 | - Streamlined readers for R data types 28 | - Updated API for all classes and methods 29 | - Updated documentation and tests. 30 | 31 | ## Version 0.4.5 32 | 33 | - Switch to pybind11 to implementing the bindings to rds2cpp. 34 | - Update tests, documentation and actions. 35 | - Fix github issue with showing incorrect package version on github pages. 36 | 37 | ## Version 0.4.4 38 | 39 | - Add methods to parse RDS files containing `GenomicRangesList` 40 | - Fix bug in reading strand information; mostly RLE vectors. 41 | - Update tests and documentation 42 | 43 | ## Version 0.4.0 - 0.4.3 44 | 45 | - Migrate to the new class implementations 46 | - Add reader for objects containing genomic ranges 47 | 48 | ## Version 0.3.0 49 | 50 | This release migrates the package to a more palatable Google's Python style guide. A major modification to the package is with casing, all `camelCase` properties, methods, functions and parameters are now `snake_case`. 51 | 52 | In addition, docstrings and documentation has been updated to use sphinx's features of linking objects to their types. Sphinx now also documents private and special dunder methods (e.g. `__getitem__`, `__copy__` etc). Intersphinx has been updated to link to references from dependent packages. 53 | 54 | Configuration for flake8, ruff and black has been added to pyproject.toml and setup.cfg to be less annoying. 55 | 56 | Finally, pyscaffold has been updated to use "myst-parser" as the markdown compiler instead of recommonmark. As part of the pyscaffold setup, one may use pre-commits to run some of the routine tasks of linting and formatting before every commit. While this is sometimes annoying and can be ignored with `--no-verify`, it brings some consistency to the code base. 57 | 58 | ## Version 0.1 59 | 60 | - First implementation 61 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | ```{todo} THIS IS SUPPOSED TO BE AN EXAMPLE. MODIFY IT ACCORDING TO YOUR NEEDS! 2 | 3 | The document assumes you are using a source repository service that promotes a 4 | contribution model similar to [GitHub's fork and pull request workflow]. 5 | While this is true for the majority of services (like GitHub, GitLab, 6 | BitBucket), it might not be the case for private repositories (e.g., when 7 | using Gerrit). 8 | 9 | Also notice that the code examples might refer to GitHub URLs or the text 10 | might use GitHub specific terminology (e.g., *Pull Request* instead of *Merge 11 | Request*). 12 | 13 | Please make sure to check the document having these assumptions in mind 14 | and update things accordingly. 15 | ``` 16 | 17 | ```{todo} Provide the correct links/replacements at the bottom of the document. 18 | ``` 19 | 20 | ```{todo} You might want to have a look on [PyScaffold's contributor's guide], 21 | 22 | especially if your project is open source. The text should be very similar to 23 | this template, but there are a few extra contents that you might decide to 24 | also include, like mentioning labels of your issue tracker or automated 25 | releases. 26 | ``` 27 | 28 | # Contributing 29 | 30 | Welcome to `rds2py` contributor's guide. 31 | 32 | This document focuses on getting any potential contributor familiarized with 33 | the development processes, but [other kinds of contributions] are also appreciated. 34 | 35 | If you are new to using [git] or have never collaborated in a project previously, 36 | please have a look at [contribution-guide.org]. Other resources are also 37 | listed in the excellent [guide created by FreeCodeCamp] [^contrib1]. 38 | 39 | Please notice, all users and contributors are expected to be **open, 40 | considerate, reasonable, and respectful**. When in doubt, 41 | [Python Software Foundation's Code of Conduct] is a good reference in terms of 42 | behavior guidelines. 43 | 44 | ## Issue Reports 45 | 46 | If you experience bugs or general issues with `rds2py`, please have a look 47 | on the [issue tracker]. 48 | If you don't see anything useful there, please feel free to fire an issue report. 49 | 50 | :::{tip} 51 | Please don't forget to include the closed issues in your search. 52 | Sometimes a solution was already reported, and the problem is considered 53 | **solved**. 54 | ::: 55 | 56 | New issue reports should include information about your programming environment 57 | (e.g., operating system, Python version) and steps to reproduce the problem. 58 | Please try also to simplify the reproduction steps to a very minimal example 59 | that still illustrates the problem you are facing. By removing other factors, 60 | you help us to identify the root cause of the issue. 61 | 62 | ## Documentation Improvements 63 | 64 | You can help improve `rds2py` docs by making them more readable and coherent, or 65 | by adding missing information and correcting mistakes. 66 | 67 | `rds2py` documentation uses [Sphinx] as its main documentation compiler. 68 | This means that the docs are kept in the same repository as the project code, and 69 | that any documentation update is done in the same way was a code contribution. 70 | 71 | ```{todo} Don't forget to mention which markup language you are using. 72 | 73 | e.g., [reStructuredText] or [CommonMark] with [MyST] extensions. 74 | ``` 75 | 76 | ```{todo} If your project is hosted on GitHub, you can also mention the following tip: 77 | 78 | :::{tip} 79 | Please notice that the [GitHub web interface] provides a quick way of 80 | propose changes in `rds2py`'s files. While this mechanism can 81 | be tricky for normal code contributions, it works perfectly fine for 82 | contributing to the docs, and can be quite handy. 83 | 84 | If you are interested in trying this method out, please navigate to 85 | the `docs` folder in the source [repository], find which file you 86 | would like to propose changes and click in the little pencil icon at the 87 | top, to open [GitHub's code editor]. Once you finish editing the file, 88 | please write a message in the form at the bottom of the page describing 89 | which changes have you made and what are the motivations behind them and 90 | submit your proposal. 91 | ::: 92 | ``` 93 | 94 | When working on documentation changes in your local machine, you can 95 | compile them using [tox] : 96 | 97 | ``` 98 | tox -e docs 99 | ``` 100 | 101 | and use Python's built-in web server for a preview in your web browser 102 | (`http://localhost:8000`): 103 | 104 | ``` 105 | python3 -m http.server --directory 'docs/_build/html' 106 | ``` 107 | 108 | ## Code Contributions 109 | 110 | ```{todo} Please include a reference or explanation about the internals of the project. 111 | 112 | An architecture description, design principles or at least a summary of the 113 | main concepts will make it easy for potential contributors to get started 114 | quickly. 115 | ``` 116 | 117 | ### Submit an issue 118 | 119 | Before you work on any non-trivial code contribution it's best to first create 120 | a report in the [issue tracker] to start a discussion on the subject. 121 | This often provides additional considerations and avoids unnecessary work. 122 | 123 | ### Create an environment 124 | 125 | Before you start coding, we recommend creating an isolated [virtual environment] 126 | to avoid any problems with your installed Python packages. 127 | This can easily be done via either [virtualenv]: 128 | 129 | ``` 130 | virtualenv 131 | source /bin/activate 132 | ``` 133 | 134 | or [Miniconda]: 135 | 136 | ``` 137 | conda create -n rds2py python=3 six virtualenv pytest pytest-cov 138 | conda activate rds2py 139 | ``` 140 | 141 | ### Clone the repository 142 | 143 | 1. Create an user account on GitHub if you do not already have one. 144 | 145 | 2. Fork the project [repository]: click on the *Fork* button near the top of the 146 | page. This creates a copy of the code under your account on GitHub. 147 | 148 | 3. Clone this copy to your local disk: 149 | 150 | ``` 151 | git clone git@github.com:YourLogin/rds2py.git 152 | cd rds2py 153 | ``` 154 | 155 | 4. You should run: 156 | 157 | ``` 158 | pip install -U pip setuptools -e . 159 | ``` 160 | 161 | to be able to import the package under development in the Python REPL. 162 | 163 | ```{todo} if you are not using pre-commit, please remove the following item: 164 | ``` 165 | 166 | 5. Install [pre-commit]: 167 | 168 | ``` 169 | pip install pre-commit 170 | pre-commit install 171 | ``` 172 | 173 | `rds2py` comes with a lot of hooks configured to automatically help the 174 | developer to check the code being written. 175 | 176 | ### Implement your changes 177 | 178 | 1. Create a branch to hold your changes: 179 | 180 | ``` 181 | git checkout -b my-feature 182 | ``` 183 | 184 | and start making changes. Never work on the main branch! 185 | 186 | 2. Start your work on this branch. Don't forget to add [docstrings] to new 187 | functions, modules and classes, especially if they are part of public APIs. 188 | 189 | 3. Add yourself to the list of contributors in `AUTHORS.rst`. 190 | 191 | 4. When you’re done editing, do: 192 | 193 | ``` 194 | git add 195 | git commit 196 | ``` 197 | 198 | to record your changes in [git]. 199 | 200 | ```{todo} if you are not using pre-commit, please remove the following item: 201 | ``` 202 | 203 | Please make sure to see the validation messages from [pre-commit] and fix 204 | any eventual issues. 205 | This should automatically use [flake8]/[black] to check/fix the code style 206 | in a way that is compatible with the project. 207 | 208 | :::{important} 209 | Don't forget to add unit tests and documentation in case your 210 | contribution adds an additional feature and is not just a bugfix. 211 | 212 | Moreover, writing a [descriptive commit message] is highly recommended. 213 | In case of doubt, you can check the commit history with: 214 | 215 | ``` 216 | git log --graph --decorate --pretty=oneline --abbrev-commit --all 217 | ``` 218 | 219 | to look for recurring communication patterns. 220 | ::: 221 | 222 | 5. Please check that your changes don't break any unit tests with: 223 | 224 | ``` 225 | tox 226 | ``` 227 | 228 | (after having installed [tox] with `pip install tox` or `pipx`). 229 | 230 | You can also use [tox] to run several other pre-configured tasks in the 231 | repository. Try `tox -av` to see a list of the available checks. 232 | 233 | ### Submit your contribution 234 | 235 | 1. If everything works fine, push your local branch to the remote server with: 236 | 237 | ``` 238 | git push -u origin my-feature 239 | ``` 240 | 241 | 2. Go to the web page of your fork and click "Create pull request" 242 | to send your changes for review. 243 | 244 | ```{todo} if you are using GitHub, you can uncomment the following paragraph 245 | 246 | Find more detailed information in [creating a PR]. You might also want to open 247 | the PR as a draft first and mark it as ready for review after the feedbacks 248 | from the continuous integration (CI) system or any required fixes. 249 | 250 | ``` 251 | 252 | ### Troubleshooting 253 | 254 | The following tips can be used when facing problems to build or test the 255 | package: 256 | 257 | 1. Make sure to fetch all the tags from the upstream [repository]. 258 | The command `git describe --abbrev=0 --tags` should return the version you 259 | are expecting. If you are trying to run CI scripts in a fork repository, 260 | make sure to push all the tags. 261 | You can also try to remove all the egg files or the complete egg folder, i.e., 262 | `.eggs`, as well as the `*.egg-info` folders in the `src` folder or 263 | potentially in the root of your project. 264 | 265 | 2. Sometimes [tox] misses out when new dependencies are added, especially to 266 | `setup.cfg` and `docs/requirements.txt`. If you find any problems with 267 | missing dependencies when running a command with [tox], try to recreate the 268 | `tox` environment using the `-r` flag. For example, instead of: 269 | 270 | ``` 271 | tox -e docs 272 | ``` 273 | 274 | Try running: 275 | 276 | ``` 277 | tox -r -e docs 278 | ``` 279 | 280 | 3. Make sure to have a reliable [tox] installation that uses the correct 281 | Python version (e.g., 3.7+). When in doubt you can run: 282 | 283 | ``` 284 | tox --version 285 | # OR 286 | which tox 287 | ``` 288 | 289 | If you have trouble and are seeing weird errors upon running [tox], you can 290 | also try to create a dedicated [virtual environment] with a [tox] binary 291 | freshly installed. For example: 292 | 293 | ``` 294 | virtualenv .venv 295 | source .venv/bin/activate 296 | .venv/bin/pip install tox 297 | .venv/bin/tox -e all 298 | ``` 299 | 300 | 4. [Pytest can drop you] in an interactive session in the case an error occurs. 301 | In order to do that you need to pass a `--pdb` option (for example by 302 | running `tox -- -k --pdb`). 303 | You can also setup breakpoints manually instead of using the `--pdb` option. 304 | 305 | ## Maintainer tasks 306 | 307 | ### Releases 308 | 309 | ```{todo} This section assumes you are using PyPI to publicly release your package. 310 | 311 | If instead you are using a different/private package index, please update 312 | the instructions accordingly. 313 | ``` 314 | 315 | If you are part of the group of maintainers and have correct user permissions 316 | on [PyPI], the following steps can be used to release a new version for 317 | `rds2py`: 318 | 319 | 1. Make sure all unit tests are successful. 320 | 2. Tag the current commit on the main branch with a release tag, e.g., `v1.2.3`. 321 | 3. Push the new tag to the upstream [repository], 322 | e.g., `git push upstream v1.2.3` 323 | 4. Clean up the `dist` and `build` folders with `tox -e clean` 324 | (or `rm -rf dist build`) 325 | to avoid confusion with old builds and Sphinx docs. 326 | 5. Run `tox -e build` and check that the files in `dist` have 327 | the correct version (no `.dirty` or [git] hash) according to the [git] tag. 328 | Also check the sizes of the distributions, if they are too big (e.g., > 329 | 500KB), unwanted clutter may have been accidentally included. 330 | 6. Run `tox -e publish -- --repository pypi` and check that everything was 331 | uploaded to [PyPI] correctly. 332 | 333 | [^contrib1]: Even though, these resources focus on open source projects and 334 | communities, the general ideas behind collaborating with other developers 335 | to collectively create software are general and can be applied to all sorts 336 | of environments, including private companies and proprietary code bases. 337 | 338 | 339 | [black]: https://pypi.org/project/black/ 340 | [commonmark]: https://commonmark.org/ 341 | [contribution-guide.org]: http://www.contribution-guide.org/ 342 | [creating a pr]: https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request 343 | [descriptive commit message]: https://chris.beams.io/posts/git-commit 344 | [docstrings]: https://www.sphinx-doc.org/en/master/usage/extensions/napoleon.html 345 | [first-contributions tutorial]: https://github.com/firstcontributions/first-contributions 346 | [flake8]: https://flake8.pycqa.org/en/stable/ 347 | [git]: https://git-scm.com 348 | [github web interface]: https://docs.github.com/en/github/managing-files-in-a-repository/managing-files-on-github/editing-files-in-your-repository 349 | [github's code editor]: https://docs.github.com/en/github/managing-files-in-a-repository/managing-files-on-github/editing-files-in-your-repository 350 | [github's fork and pull request workflow]: https://guides.github.com/activities/forking/ 351 | [guide created by freecodecamp]: https://github.com/freecodecamp/how-to-contribute-to-open-source 352 | [miniconda]: https://docs.conda.io/en/latest/miniconda.html 353 | [myst]: https://myst-parser.readthedocs.io/en/latest/syntax/syntax.html 354 | [other kinds of contributions]: https://opensource.guide/how-to-contribute 355 | [pre-commit]: https://pre-commit.com/ 356 | [pypi]: https://pypi.org/ 357 | [pyscaffold's contributor's guide]: https://pyscaffold.org/en/stable/contributing.html 358 | [pytest can drop you]: https://docs.pytest.org/en/stable/usage.html#dropping-to-pdb-python-debugger-at-the-start-of-a-test 359 | [python software foundation's code of conduct]: https://www.python.org/psf/conduct/ 360 | [restructuredtext]: https://www.sphinx-doc.org/en/master/usage/restructuredtext/ 361 | [sphinx]: https://www.sphinx-doc.org/en/master/ 362 | [tox]: https://tox.readthedocs.io/en/stable/ 363 | [virtual environment]: https://realpython.com/python-virtual-environments-a-primer/ 364 | [virtualenv]: https://virtualenv.pypa.io/en/stable/ 365 | 366 | 367 | ```{todo} Please review and change the following definitions: 368 | ``` 369 | 370 | [repository]: https://github.com//rds2py 371 | [issue tracker]: https://github.com//rds2py/issues 372 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2022 Genentech, Inc. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Project generated with PyScaffold](https://img.shields.io/badge/-PyScaffold-005CA0?logo=pyscaffold)](https://pyscaffold.org/) 2 | [![PyPI-Server](https://img.shields.io/pypi/v/rds2py.svg)](https://pypi.org/project/rds2py/) 3 | ![Unit tests](https://github.com/BiocPy/rds2py/actions/workflows/run-tests.yml/badge.svg) 4 | 5 | # rds2py 6 | 7 | Parse and construct Python representations for datasets stored in RDS files. `rds2py` supports various base classes from R, and Bioconductor's `SummarizedExperiment` and `SingleCellExperiment` S4 classes. **_For more details, check out [rds2cpp library](https://github.com/LTLA/rds2cpp)._** 8 | 9 | --- 10 | 11 | **Version 0.5.0** brings major changes to the package, 12 | 13 | - Complete overhaul of the codebase using pybind11 14 | - Streamlined readers for R data types 15 | - Updated API for all classes and methods 16 | 17 | Please refer to the [documentation](https://biocpy.github.io/rds2py/) for the latest usage guidelines. Previous versions may have incompatible APIs. 18 | 19 | --- 20 | 21 | The package provides: 22 | 23 | - Efficient parsing of RDS files with _minimal_ memory overhead 24 | - Support for R's basic data types and complex S4 objects 25 | - Vectors (numeric, character, logical) 26 | - Factors 27 | - Data frames 28 | - Matrices (dense and sparse) 29 | - Run-length encoded vectors (Rle) 30 | - Conversion to appropriate Python/NumPy/SciPy data structures 31 | - dgCMatrix (sparse column matrix) 32 | - dgRMatrix (sparse row matrix) 33 | - dgTMatrix (sparse triplet matrix) 34 | - Preservation of metadata and attributes from R objects 35 | - Integration with BiocPy ecosystem for Bioconductor classes 36 | - SummarizedExperiment 37 | - RangedSummarizedExperiment 38 | - SingleCellExperiment 39 | - GenomicRanges 40 | - MultiAssayExperiment 41 | 42 | ## Installation 43 | 44 | Package is published to [PyPI](https://pypi.org/project/rds2py/) 45 | 46 | ```shell 47 | pip install rds2py 48 | 49 | # or install optional dependencies 50 | pip install rds2py[optional] 51 | ``` 52 | 53 | By default, the package does not install packages to convert python representations to BiocPy classes. Please consider installing all optional dependencies. 54 | 55 | ## Usage 56 | 57 | If you do not have an RDS object handy, feel free to download one from [single-cell-test-files](https://github.com/jkanche/random-test-files/releases). 58 | 59 | ```python 60 | from rds2py import read_rds 61 | r_obj = read_rds("path/to/file.rds") 62 | ``` 63 | 64 | The returned `r_obj` either returns an appropriate Python class if a parser is already implemented or returns the dictionary containing the data from the RDS file. 65 | 66 | In addition, the package provides the dictionary representation of the RDS file. 67 | 68 | ```python 69 | from rds2py import parse_rds 70 | 71 | robject_dict = parse_rds("path/to/file.rds") 72 | print(robject_dict) 73 | ``` 74 | 75 | ### Write-your-own-reader 76 | 77 | Reading RDS files as dictionary representations allows users to write their own custom readers into appropriate Python representations. 78 | 79 | ```python 80 | from rds2py import parse_rds 81 | 82 | robject = parse_rds("path/to/file.rds") 83 | print(robject) 84 | ``` 85 | 86 | if you know this RDS file contains an `GenomicRanges` object, you can use the built-in reader or write your own reader to convert this dictionary. 87 | 88 | ```python 89 | from rds2py.read_granges import read_genomic_ranges 90 | 91 | gr = read_genomic_ranges(robject) 92 | print(gr) 93 | ``` 94 | 95 | ## Type Conversion Reference 96 | 97 | | R Type | Python/NumPy Type | 98 | | ---------- | ------------------------------------ | 99 | | numeric | numpy.ndarray (float64) | 100 | | integer | numpy.ndarray (int32) | 101 | | character | list of str | 102 | | logical | numpy.ndarray (bool) | 103 | | factor | list | 104 | | data.frame | BiocFrame | 105 | | matrix | numpy.ndarray or scipy.sparse matrix | 106 | | dgCMatrix | scipy.sparse.csc_matrix | 107 | | dgRMatrix | scipy.sparse.csr_matrix | 108 | 109 | ## Developer Notes 110 | 111 | This project uses pybind11 to provide bindings to the rds2cpp library. Please make sure necessary C++ compiler is installed on your system. 112 | 113 | 114 | 115 | ## Note 116 | 117 | This project has been set up using PyScaffold 4.5. For details and usage 118 | information on PyScaffold see https://pyscaffold.org/. 119 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | AUTODOCDIR = api 11 | 12 | # User-friendly check for sphinx-build 13 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $?), 1) 14 | $(error "The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from https://sphinx-doc.org/") 15 | endif 16 | 17 | .PHONY: help clean Makefile 18 | 19 | # Put it first so that "make" without argument is like "make help". 20 | help: 21 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 22 | 23 | clean: 24 | rm -rf $(BUILDDIR)/* $(AUTODOCDIR) 25 | 26 | # Catch-all target: route all unknown targets to Sphinx using the new 27 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 28 | %: Makefile 29 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 30 | -------------------------------------------------------------------------------- /docs/_static/.gitignore: -------------------------------------------------------------------------------- 1 | # Empty directory 2 | -------------------------------------------------------------------------------- /docs/authors.md: -------------------------------------------------------------------------------- 1 | ```{include} ../AUTHORS.md 2 | :relative-docs: docs/ 3 | :relative-images: 4 | ``` 5 | -------------------------------------------------------------------------------- /docs/changelog.md: -------------------------------------------------------------------------------- 1 | ```{include} ../CHANGELOG.md 2 | :relative-docs: docs/ 3 | :relative-images: 4 | ``` 5 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # This file is execfile()d with the current directory set to its containing dir. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | # 7 | # All configuration values have a default; values that are commented out 8 | # serve to show the default. 9 | 10 | import os 11 | import sys 12 | import shutil 13 | 14 | # -- Path setup -------------------------------------------------------------- 15 | 16 | __location__ = os.path.dirname(__file__) 17 | 18 | # If extensions (or modules to document with autodoc) are in another directory, 19 | # add these directories to sys.path here. If the directory is relative to the 20 | # documentation root, use os.path.abspath to make it absolute, like shown here. 21 | sys.path.insert(0, os.path.join(__location__, "../src")) 22 | 23 | # -- Run sphinx-apidoc ------------------------------------------------------- 24 | # This hack is necessary since RTD does not issue `sphinx-apidoc` before running 25 | # `sphinx-build -b html . _build/html`. See Issue: 26 | # https://github.com/readthedocs/readthedocs.org/issues/1139 27 | # DON'T FORGET: Check the box "Install your project inside a virtualenv using 28 | # setup.py install" in the RTD Advanced Settings. 29 | # Additionally it helps us to avoid running apidoc manually 30 | 31 | try: # for Sphinx >= 1.7 32 | from sphinx.ext import apidoc 33 | except ImportError: 34 | from sphinx import apidoc 35 | 36 | output_dir = os.path.join(__location__, "api") 37 | module_dir = os.path.join(__location__, "../src/rds2py") 38 | try: 39 | shutil.rmtree(output_dir) 40 | except FileNotFoundError: 41 | pass 42 | 43 | try: 44 | import sphinx 45 | 46 | cmd_line = f"sphinx-apidoc --implicit-namespaces -f -o {output_dir} {module_dir}" 47 | 48 | args = cmd_line.split(" ") 49 | if tuple(sphinx.__version__.split(".")) >= ("1", "7"): 50 | # This is a rudimentary parse_version to avoid external dependencies 51 | args = args[1:] 52 | 53 | apidoc.main(args) 54 | except Exception as e: 55 | print("Running `sphinx-apidoc` failed!\n{}".format(e)) 56 | 57 | # -- General configuration --------------------------------------------------- 58 | 59 | # If your documentation needs a minimal Sphinx version, state it here. 60 | # needs_sphinx = '1.0' 61 | 62 | # Add any Sphinx extension module names here, as strings. They can be extensions 63 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. 64 | extensions = [ 65 | "sphinx.ext.autodoc", 66 | "sphinx.ext.intersphinx", 67 | "sphinx.ext.todo", 68 | "sphinx.ext.autosummary", 69 | "sphinx.ext.viewcode", 70 | "sphinx.ext.coverage", 71 | "sphinx.ext.doctest", 72 | "sphinx.ext.ifconfig", 73 | "sphinx.ext.mathjax", 74 | "sphinx.ext.napoleon", 75 | "sphinx_autodoc_typehints", 76 | ] 77 | 78 | # Add any paths that contain templates here, relative to this directory. 79 | templates_path = ["_templates"] 80 | 81 | 82 | # Enable markdown 83 | extensions.append("myst_parser") 84 | 85 | # Configure MyST-Parser 86 | myst_enable_extensions = [ 87 | "amsmath", 88 | "colon_fence", 89 | "deflist", 90 | "dollarmath", 91 | "html_image", 92 | "linkify", 93 | "replacements", 94 | "smartquotes", 95 | "substitution", 96 | "tasklist", 97 | ] 98 | 99 | # The suffix of source filenames. 100 | source_suffix = [".rst", ".md"] 101 | 102 | # The encoding of source files. 103 | # source_encoding = 'utf-8-sig' 104 | 105 | # The master toctree document. 106 | master_doc = "index" 107 | 108 | # General information about the project. 109 | project = "rds2py" 110 | copyright = "2023, jkanche" 111 | 112 | # The version info for the project you're documenting, acts as replacement for 113 | # |version| and |release|, also used in various other places throughout the 114 | # built documents. 115 | # 116 | # version: The short X.Y version. 117 | # release: The full version, including alpha/beta/rc tags. 118 | # If you don’t need the separation provided between version and release, 119 | # just set them both to the same value. 120 | try: 121 | from rds2py import __version__ as version 122 | except ImportError: 123 | version = "" 124 | 125 | if not version or version.lower() == "unknown": 126 | version = os.getenv("READTHEDOCS_VERSION", "unknown") # automatically set by RTD 127 | 128 | release = version 129 | 130 | # The language for content autogenerated by Sphinx. Refer to documentation 131 | # for a list of supported languages. 132 | # language = None 133 | 134 | # There are two options for replacing |today|: either, you set today to some 135 | # non-false value, then it is used: 136 | # today = '' 137 | # Else, today_fmt is used as the format for a strftime call. 138 | # today_fmt = '%B %d, %Y' 139 | 140 | # List of patterns, relative to source directory, that match files and 141 | # directories to ignore when looking for source files. 142 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", ".venv"] 143 | 144 | # The reST default role (used for this markup: `text`) to use for all documents. 145 | # default_role = None 146 | 147 | # If true, '()' will be appended to :func: etc. cross-reference text. 148 | # add_function_parentheses = True 149 | 150 | # If true, the current module name will be prepended to all description 151 | # unit titles (such as .. function::). 152 | # add_module_names = True 153 | 154 | # If true, sectionauthor and moduleauthor directives will be shown in the 155 | # output. They are ignored by default. 156 | # show_authors = False 157 | 158 | # The name of the Pygments (syntax highlighting) style to use. 159 | pygments_style = "sphinx" 160 | 161 | # A list of ignored prefixes for module index sorting. 162 | # modindex_common_prefix = [] 163 | 164 | # If true, keep warnings as "system message" paragraphs in the built documents. 165 | # keep_warnings = False 166 | 167 | # If this is True, todo emits a warning for each TODO entries. The default is False. 168 | todo_emit_warnings = True 169 | 170 | autodoc_default_options = { 171 | 'special-members': True, 172 | 'undoc-members': False, 173 | 'exclude-members': '__weakref__, __dict__, __str__, __module__, __init__' 174 | } 175 | 176 | autosummary_generate = True 177 | autosummary_imported_members = True 178 | 179 | 180 | # -- Options for HTML output ------------------------------------------------- 181 | 182 | # The theme to use for HTML and HTML Help pages. See the documentation for 183 | # a list of builtin themes. 184 | html_theme = "furo" 185 | 186 | # Theme options are theme-specific and customize the look and feel of a theme 187 | # further. For a list of options available for each theme, see the 188 | # documentation. 189 | html_theme_options = { 190 | "sidebar_width": "300px", 191 | "page_width": "1200px" 192 | } 193 | 194 | # Add any paths that contain custom themes here, relative to this directory. 195 | # html_theme_path = [] 196 | 197 | # The name for this set of Sphinx documents. If None, it defaults to 198 | # " v documentation". 199 | # html_title = None 200 | 201 | # A shorter title for the navigation bar. Default is the same as html_title. 202 | # html_short_title = None 203 | 204 | # The name of an image file (relative to this directory) to place at the top 205 | # of the sidebar. 206 | # html_logo = "" 207 | 208 | # The name of an image file (within the static path) to use as favicon of the 209 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 210 | # pixels large. 211 | # html_favicon = None 212 | 213 | # Add any paths that contain custom static files (such as style sheets) here, 214 | # relative to this directory. They are copied after the builtin static files, 215 | # so a file named "default.css" will overwrite the builtin "default.css". 216 | html_static_path = ["_static"] 217 | 218 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 219 | # using the given strftime format. 220 | # html_last_updated_fmt = '%b %d, %Y' 221 | 222 | # If true, SmartyPants will be used to convert quotes and dashes to 223 | # typographically correct entities. 224 | # html_use_smartypants = True 225 | 226 | # Custom sidebar templates, maps document names to template names. 227 | # html_sidebars = {} 228 | 229 | # Additional templates that should be rendered to pages, maps page names to 230 | # template names. 231 | # html_additional_pages = {} 232 | 233 | # If false, no module index is generated. 234 | # html_domain_indices = True 235 | 236 | # If false, no index is generated. 237 | # html_use_index = True 238 | 239 | # If true, the index is split into individual pages for each letter. 240 | # html_split_index = False 241 | 242 | # If true, links to the reST sources are added to the pages. 243 | # html_show_sourcelink = True 244 | 245 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 246 | # html_show_sphinx = True 247 | 248 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 249 | # html_show_copyright = True 250 | 251 | # If true, an OpenSearch description file will be output, and all pages will 252 | # contain a tag referring to it. The value of this option must be the 253 | # base URL from which the finished HTML is served. 254 | # html_use_opensearch = '' 255 | 256 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 257 | # html_file_suffix = None 258 | 259 | # Output file base name for HTML help builder. 260 | htmlhelp_basename = "rds2py-doc" 261 | 262 | 263 | # -- Options for LaTeX output ------------------------------------------------ 264 | 265 | latex_elements = { 266 | # The paper size ("letterpaper" or "a4paper"). 267 | # "papersize": "letterpaper", 268 | # The font size ("10pt", "11pt" or "12pt"). 269 | # "pointsize": "10pt", 270 | # Additional stuff for the LaTeX preamble. 271 | # "preamble": "", 272 | } 273 | 274 | # Grouping the document tree into LaTeX files. List of tuples 275 | # (source start file, target name, title, author, documentclass [howto/manual]). 276 | latex_documents = [ 277 | ("index", "user_guide.tex", "rds2py Documentation", "jkanche", "manual") 278 | ] 279 | 280 | # The name of an image file (relative to this directory) to place at the top of 281 | # the title page. 282 | # latex_logo = "" 283 | 284 | # For "manual" documents, if this is true, then toplevel headings are parts, 285 | # not chapters. 286 | # latex_use_parts = False 287 | 288 | # If true, show page references after internal links. 289 | # latex_show_pagerefs = False 290 | 291 | # If true, show URL addresses after external links. 292 | # latex_show_urls = False 293 | 294 | # Documents to append as an appendix to all manuals. 295 | # latex_appendices = [] 296 | 297 | # If false, no module index is generated. 298 | # latex_domain_indices = True 299 | 300 | # -- External mapping -------------------------------------------------------- 301 | python_version = ".".join(map(str, sys.version_info[0:2])) 302 | intersphinx_mapping = { 303 | "sphinx": ("https://www.sphinx-doc.org/en/master", None), 304 | "python": ("https://docs.python.org/" + python_version, None), 305 | "matplotlib": ("https://matplotlib.org", None), 306 | "numpy": ("https://numpy.org/doc/stable", None), 307 | "sklearn": ("https://scikit-learn.org/stable", None), 308 | "pandas": ("https://pandas.pydata.org/pandas-docs/stable", None), 309 | "scipy": ("https://docs.scipy.org/doc/scipy/reference", None), 310 | "setuptools": ("https://setuptools.pypa.io/en/stable/", None), 311 | "pyscaffold": ("https://pyscaffold.org/en/stable", None), 312 | "singelcellexperiment": ("https://biocpy.github.io/SingleCellExperiment", None), 313 | "summarizedexperiment": ("https://biocpy.github.io/SummarizedExperiment", None), 314 | } 315 | 316 | print(f"loading configurations for {project} {version} ...", file=sys.stderr) -------------------------------------------------------------------------------- /docs/contributing.md: -------------------------------------------------------------------------------- 1 | ```{include} ../CONTRIBUTING.md 2 | :relative-docs: docs/ 3 | :relative-images: 4 | ``` 5 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | # rds2py 2 | 3 | Parse, extract and create Python representations for datasets stored in RDS files. It supports Bioconductor's `SummarizedExperiment` and `SingleCellExperiment` objects. This is possible because of [Aaron's rds2cpp library](https://github.com/LTLA/rds2cpp). 4 | 5 | The package uses memory views (except for strings) so that we can access the same memory from C++ space in Python (through Cython of course). This is especially useful for large datasets so we don't make copies of data. 6 | 7 | ## Install 8 | 9 | Package is published to [PyPI](https://pypi.org/project/rds2py/) 10 | 11 | ```shell 12 | pip install rds2py 13 | ``` 14 | 15 | ## Contents 16 | 17 | ```{toctree} 18 | :maxdepth: 2 19 | 20 | Overview 21 | Tutorial 22 | Contributions & Help 23 | License 24 | Authors 25 | Changelog 26 | Module Reference 27 | ``` 28 | 29 | ## Indices and tables 30 | 31 | * {ref}`genindex` 32 | * {ref}`modindex` 33 | * {ref}`search` 34 | 35 | [Sphinx]: http://www.sphinx-doc.org/ 36 | [Markdown]: https://daringfireball.net/projects/markdown/ 37 | [reStructuredText]: http://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html 38 | [MyST]: https://myst-parser.readthedocs.io/en/latest/ 39 | -------------------------------------------------------------------------------- /docs/license.md: -------------------------------------------------------------------------------- 1 | # License 2 | 3 | ```{literalinclude} ../LICENSE.txt 4 | :language: text 5 | ``` 6 | -------------------------------------------------------------------------------- /docs/readme.md: -------------------------------------------------------------------------------- 1 | ```{include} ../README.md 2 | :relative-docs: docs/ 3 | :relative-images: 4 | ``` 5 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | furo 2 | # Requirements file for ReadTheDocs, check .readthedocs.yml. 3 | # To build the module reference correctly, make sure every external package 4 | # under `install_requires` in `setup.cfg` is also listed here! 5 | # sphinx_rtd_theme 6 | myst-parser[linkify] 7 | sphinx>=3.2.1 8 | sphinx-autodoc-typehints 9 | -------------------------------------------------------------------------------- /docs/tutorial.md: -------------------------------------------------------------------------------- 1 | # Tutorial 2 | 3 | If you do not have an RDS object handy, feel free to download one from [single-cell-test-files](https://github.com/jkanche/random-test-files/releases). 4 | 5 | ### Basic Usage 6 | 7 | ```python 8 | from rds2py import read_rds 9 | r_obj = read_rds("path/to/file.rds") 10 | ``` 11 | 12 | The returned `r_obj` either returns an appropriate Python class if a parser is already implemented or returns the dictionary containing the data from the RDS file. 13 | 14 | ## Write-your-own-reader 15 | 16 | In addition, the package provides the dictionary representation of the RDS file, allowing users to write their own custom readers into appropriate Python representations. 17 | 18 | ```python 19 | from rds2py import parse_rds 20 | 21 | data = parse_rds("path/to/file.rds") 22 | print(data) 23 | 24 | # now write your own parser to convert this dictionary. 25 | ``` 26 | 27 | ## Type Conversion Reference 28 | 29 | | R Type | Python/NumPy Type | 30 | |--------|------------------| 31 | | numeric | numpy.ndarray (float64) | 32 | | integer | numpy.ndarray (int32) | 33 | | character | list of str | 34 | | logical | numpy.ndarray (bool) | 35 | | factor | list | 36 | | data.frame | BiocFrame | 37 | | matrix | numpy.ndarray or scipy.sparse matrix | 38 | | dgCMatrix | scipy.sparse.csc_matrix | 39 | | dgRMatrix | scipy.sparse.csr_matrix | 40 | 41 | Check out the module reference for more information on these classes. 42 | -------------------------------------------------------------------------------- /lib/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.24) 2 | 3 | project(rds2py 4 | VERSION 1.0.0 5 | DESCRIPTION "Building the rds shared library" 6 | LANGUAGES CXX) 7 | 8 | # Importing all of the dependencies with pinned versions (even for transitive dependencies). 9 | include(FetchContent) 10 | 11 | FetchContent_Declare( 12 | rds2cpp 13 | GIT_REPOSITORY https://github.com/LTLA/rds2cpp 14 | GIT_TAG v1.1.0 15 | ) 16 | 17 | FetchContent_Declare( 18 | byteme 19 | GIT_REPOSITORY https://github.com/LTLA/byteme 20 | GIT_TAG v1.2.2 21 | ) 22 | 23 | FetchContent_MakeAvailable(byteme) 24 | FetchContent_MakeAvailable(rds2cpp) 25 | 26 | # Defining the targets. 27 | set(TARGET rds2py) 28 | 29 | find_package(pybind11 CONFIG) 30 | 31 | # pybind11 method: 32 | pybind11_add_module(${TARGET} 33 | src/rdswrapper.cpp 34 | ) 35 | 36 | set_property(TARGET ${TARGET} PROPERTY CXX_STANDARD 17) 37 | 38 | target_link_libraries(${TARGET} PRIVATE rds2cpp pybind11::pybind11) 39 | 40 | set_target_properties(${TARGET} PROPERTIES 41 | OUTPUT_NAME lib_rds_parser 42 | PREFIX "" 43 | ) 44 | -------------------------------------------------------------------------------- /lib/src/rdswrapper.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | namespace py = pybind11; 9 | 10 | class RdsReader { 11 | private: 12 | const rds2cpp::RObject* ptr; 13 | 14 | public: 15 | RdsReader(const rds2cpp::RObject* p) : ptr(p) { 16 | if (!p) throw std::runtime_error("Null pointer passed to 'RdsReader'."); 17 | } 18 | 19 | std::string get_rtype() const { 20 | if (!ptr) throw std::runtime_error("Null pointer in 'get_rtype'."); 21 | // py::print("arg::", static_cast(ptr->type())); 22 | switch (ptr->type()) { 23 | case rds2cpp::SEXPType::S4: return "S4"; 24 | case rds2cpp::SEXPType::INT: return "integer"; 25 | case rds2cpp::SEXPType::REAL: return "double"; 26 | case rds2cpp::SEXPType::STR: return "string"; 27 | case rds2cpp::SEXPType::LGL: return "boolean"; 28 | case rds2cpp::SEXPType::VEC: return "vector"; 29 | case rds2cpp::SEXPType::NIL: return "null"; 30 | default: return "other"; 31 | } 32 | } 33 | 34 | int get_rsize() const { 35 | if (!ptr) throw std::runtime_error("Null pointer in 'get_rsize'."); 36 | switch (ptr->type()) { 37 | case rds2cpp::SEXPType::INT: return static_cast(ptr)->data.size(); 38 | case rds2cpp::SEXPType::REAL: return static_cast(ptr)->data.size(); 39 | case rds2cpp::SEXPType::STR: return static_cast(ptr)->data.size(); 40 | case rds2cpp::SEXPType::LGL: return static_cast(ptr)->data.size(); 41 | case rds2cpp::SEXPType::VEC: return static_cast(ptr)->data.size(); 42 | default: return -1; 43 | } 44 | } 45 | 46 | py::array get_numeric_data() const { 47 | if (!ptr) throw std::runtime_error("Null pointer in 'get_numeric_data'."); 48 | switch (ptr->type()) { 49 | case rds2cpp::SEXPType::INT: { 50 | const auto& data = static_cast(ptr)->data; 51 | return py::array_t({data.size()}, {sizeof(int32_t)}, data.data()); 52 | } 53 | case rds2cpp::SEXPType::LGL: { 54 | const auto& data = static_cast(ptr)->data; 55 | return py::array_t({data.size()}, {sizeof(int32_t)}, data.data()); 56 | } 57 | case rds2cpp::SEXPType::REAL: { 58 | const auto& data = static_cast(ptr)->data; 59 | return py::array_t({data.size()}, {sizeof(double)}, data.data()); 60 | } 61 | default: 62 | throw std::runtime_error("Invalid type for numeric data"); 63 | } 64 | } 65 | 66 | py::list get_string_arr() const { 67 | if (!ptr) throw std::runtime_error("Null pointer in 'get_string_arr'."); 68 | if (ptr->type() != rds2cpp::SEXPType::STR) { 69 | throw std::runtime_error("Invalid type for 'string_arr'"); 70 | } 71 | const auto& data = static_cast(ptr)->data; 72 | return py::cast(data); 73 | } 74 | 75 | py::list get_attribute_names() const { 76 | if (!ptr) throw std::runtime_error("Null pointer in 'get_attribute_names'"); 77 | return py::cast(get_attributes().names); 78 | } 79 | 80 | py::object load_attribute_by_name(const std::string& name) const { 81 | if (!ptr) throw std::runtime_error("Null pointer in 'load_attribute_by_name'"); 82 | const auto& attributes = get_attributes(); 83 | auto it = std::find(attributes.names.begin(), attributes.names.end(), name); 84 | if (it == attributes.names.end()) { 85 | throw std::runtime_error("Attribute not found: " + name); 86 | } 87 | size_t index = std::distance(attributes.names.begin(), it); 88 | return py::cast(new RdsReader(attributes.values[index].get())); 89 | } 90 | 91 | py::object load_vec_element(int index) const { 92 | if (!ptr) throw std::runtime_error("Null pointer in 'load_vec_element'"); 93 | if (ptr->type() != rds2cpp::SEXPType::VEC) { 94 | throw std::runtime_error("Not a vector type"); 95 | } 96 | const auto& data = static_cast(ptr)->data; 97 | if (index < 0 || static_cast(index) >= data.size()) { 98 | throw std::out_of_range("Vector index out of range"); 99 | } 100 | return py::cast(new RdsReader(data[index].get())); 101 | } 102 | 103 | std::string get_package_name() const { 104 | if (!ptr || ptr->type() != rds2cpp::SEXPType::S4) { 105 | throw std::runtime_error("Not an S4 object"); 106 | } 107 | return static_cast(ptr)->package_name; 108 | } 109 | 110 | std::string get_class_name() const { 111 | if (!ptr || ptr->type() != rds2cpp::SEXPType::S4) { 112 | throw std::runtime_error("Not an S4 object"); 113 | } 114 | return static_cast(ptr)->class_name; 115 | } 116 | 117 | std::pair get_dimensions() const { 118 | if (!ptr || ptr->type() != rds2cpp::SEXPType::INT) { 119 | throw std::runtime_error("Dimensions must be integer"); 120 | } 121 | const auto& dims = static_cast(ptr)->data; 122 | if (dims.size() != 2 || dims[0] < 0 || dims[1] < 0) { 123 | throw std::runtime_error("Invalid dimensions"); 124 | } 125 | return {static_cast(dims[0]), static_cast(dims[1])}; 126 | } 127 | 128 | private: 129 | const rds2cpp::Attributes& get_attributes() const { 130 | if (!ptr) throw std::runtime_error("Null pointer in get_attributes"); 131 | switch (ptr->type()) { 132 | case rds2cpp::SEXPType::INT: return static_cast(ptr)->attributes; 133 | case rds2cpp::SEXPType::REAL: return static_cast(ptr)->attributes; 134 | case rds2cpp::SEXPType::LGL: return static_cast(ptr)->attributes; 135 | case rds2cpp::SEXPType::STR: return static_cast(ptr)->attributes; 136 | case rds2cpp::SEXPType::VEC: return static_cast(ptr)->attributes; 137 | case rds2cpp::SEXPType::S4: return static_cast(ptr)->attributes; 138 | default: throw std::runtime_error("Unsupported type for attributes"); 139 | } 140 | } 141 | }; 142 | 143 | class RdsObject { 144 | private: 145 | std::unique_ptr parsed; 146 | std::unique_ptr reader; 147 | 148 | public: 149 | RdsObject(const std::string& file) { 150 | try { 151 | parsed = std::make_unique(rds2cpp::parse_rds(file)); 152 | if (!parsed || !parsed->object) { 153 | throw std::runtime_error("Failed to parse RDS file"); 154 | } 155 | reader = std::make_unique(parsed->object.get()); 156 | } catch (const std::exception& e) { 157 | throw std::runtime_error(std::string("Error in 'RdsObject' constructor: ") + e.what()); 158 | } 159 | } 160 | 161 | RdsReader* get_robject() const { 162 | if (!reader) throw std::runtime_error("Null reader in 'get_robject'"); 163 | return reader.get(); 164 | } 165 | }; 166 | 167 | PYBIND11_MODULE(lib_rds_parser, m) { 168 | py::register_exception(m, "RdsParserError"); 169 | 170 | py::class_(m, "RdsObject") 171 | .def(py::init()) 172 | .def("get_robject", &RdsObject::get_robject, py::return_value_policy::reference_internal); 173 | 174 | py::class_(m, "RdsReader") 175 | .def(py::init()) 176 | .def("get_rtype", &RdsReader::get_rtype) 177 | .def("get_rsize", &RdsReader::get_rsize) 178 | .def("get_numeric_data", &RdsReader::get_numeric_data) 179 | .def("get_string_arr", &RdsReader::get_string_arr) 180 | .def("get_attribute_names", &RdsReader::get_attribute_names) 181 | .def("load_attribute_by_name", &RdsReader::load_attribute_by_name) 182 | .def("load_vec_element", &RdsReader::load_vec_element) 183 | .def("get_package_name", &RdsReader::get_package_name) 184 | .def("get_class_name", &RdsReader::get_class_name) 185 | .def("get_dimensions", &RdsReader::get_dimensions); 186 | } 187 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | # AVOID CHANGING REQUIRES: IT WILL BE UPDATED BY PYSCAFFOLD! 3 | requires = ["setuptools>=46.1.0", "setuptools_scm[toml]>=5", "cmake", "pybind11", "numpy"] 4 | build-backend = "setuptools.build_meta" 5 | 6 | [tool.setuptools_scm] 7 | # See configuration details in https://github.com/pypa/setuptools_scm 8 | version_scheme = "no-guess-dev" 9 | 10 | [tool.ruff] 11 | line-length = 120 12 | src = ["src"] 13 | exclude = ["tests"] 14 | extend-ignore = ["F821"] 15 | 16 | [tool.ruff.pydocstyle] 17 | convention = "google" 18 | 19 | [tool.ruff.per-file-ignores] 20 | "__init__.py" = ["E402", "F401"] 21 | 22 | [tool.black] 23 | force-exclude = "__init__.py" 24 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | # This file is used to configure your project. 2 | # Read more about the various options under: 3 | # https://setuptools.pypa.io/en/latest/userguide/declarative_config.html 4 | # https://setuptools.pypa.io/en/latest/references/keywords.html 5 | 6 | [metadata] 7 | name = rds2py 8 | description = Parse and construct Python representations for datasets stored in RDS files 9 | author = jkanche 10 | author_email = jayaram.kancherla@gmail.com 11 | license = MIT 12 | license_files = LICENSE.txt 13 | long_description = file: README.md 14 | long_description_content_type = text/markdown; charset=UTF-8; variant=GFM 15 | url = https://github.com/biocpy/rds2py 16 | # Add here related links, for example: 17 | project_urls = 18 | Documentation = https://biocpy.github.io/rds2py/ 19 | Source = https://github.com/biocpy/rds2py 20 | # Changelog = https://pyscaffold.org/en/latest/changelog.html 21 | # Tracker = https://github.com/pyscaffold/pyscaffold/issues 22 | # Conda-Forge = https://anaconda.org/conda-forge/pyscaffold 23 | # Download = https://pypi.org/project/PyScaffold/#files 24 | # Twitter = https://twitter.com/PyScaffold 25 | 26 | # Change if running only on Windows, Mac or Linux (comma-separated) 27 | platforms = Mac, Linux 28 | 29 | # Add here all kinds of additional classifiers as defined under 30 | # https://pypi.org/classifiers/ 31 | classifiers = 32 | Development Status :: 4 - Beta 33 | Programming Language :: Python 34 | 35 | 36 | [options] 37 | zip_safe = False 38 | packages = find_namespace: 39 | include_package_data = True 40 | package_dir = 41 | =src 42 | 43 | # Require a min/specific Python version (comma-separated conditions) 44 | python_requires = >=3.9 45 | 46 | # Add here dependencies of your project (line-separated), e.g. requests>=2.2,<3.0. 47 | # Version specifiers like >=2.2,<3.0 avoid problems due to API changes in 48 | # new major versions. This works if the required packages follow Semantic Versioning. 49 | # For more information, check out https://semver.org/. 50 | install_requires = 51 | importlib-metadata; python_version<"3.8" 52 | numpy 53 | biocutils>=0.1.5 54 | 55 | [options.packages.find] 56 | where = src 57 | exclude = 58 | tests 59 | 60 | [options.extras_require] 61 | # Add here additional requirements for extra features, to install with: 62 | # `pip install rds2py[PDF]` like: 63 | # PDF = ReportLab; RXP 64 | optional = 65 | pandas 66 | hdf5array 67 | scipy 68 | biocframe 69 | genomicranges>=0.4.9 70 | summarizedexperiment>=0.4.1 71 | singlecellexperiment>=0.4.1 72 | multiassayexperiment 73 | 74 | # Add here test requirements (semicolon/line-separated) 75 | testing = 76 | setuptools 77 | pytest 78 | pytest-cov 79 | %(optional)s 80 | 81 | [options.entry_points] 82 | # Add here console scripts like: 83 | # console_scripts = 84 | # script_name = rds2py.module:function 85 | # For example: 86 | # console_scripts = 87 | # fibonacci = rds2py.skeleton:run 88 | # And any other entry points, for example: 89 | # pyscaffold.cli = 90 | # awesome = pyscaffoldext.awesome.extension:AwesomeExtension 91 | 92 | [tool:pytest] 93 | # Specify command line options as you would do when invoking pytest directly. 94 | # e.g. --cov-report html (or xml) for html/xml output or --junitxml junit.xml 95 | # in order to write a coverage file that can be read by Jenkins. 96 | # CAUTION: --cov flags may prohibit setting breakpoints while debugging. 97 | # Comment those flags to avoid this pytest issue. 98 | addopts = 99 | --cov rds2py --cov-report term-missing 100 | --verbose 101 | norecursedirs = 102 | dist 103 | build 104 | .tox 105 | testpaths = tests 106 | # Use pytest markers to select/deselect specific tests 107 | # markers = 108 | # slow: mark tests as slow (deselect with '-m "not slow"') 109 | # system: mark end-to-end system tests 110 | 111 | [devpi:upload] 112 | # Options for the devpi: PyPI server and packaging tool 113 | # VCS export must be deactivated since we are using setuptools-scm 114 | no_vcs = 1 115 | formats = bdist_wheel 116 | 117 | [flake8] 118 | # Some sane defaults for the code style checker flake8 119 | max_line_length = 100 120 | extend_ignore = E203, W503 121 | # ^ Black-compatible 122 | # E203 and W503 have edge cases handled by black 123 | exclude = 124 | .tox 125 | build 126 | dist 127 | .eggs 128 | docs/conf.py 129 | per-file-ignores = __init__.py:F401 130 | 131 | [pyscaffold] 132 | # PyScaffold's parameters when the project was created. 133 | # This will be used when updating. Do not change! 134 | version = 4.5 135 | package = rds2py 136 | extensions = 137 | markdown 138 | pre_commit 139 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """Setup file for rds2py. Use setup.cfg to configure your project. 2 | 3 | This file was generated with PyScaffold 4.5. 4 | PyScaffold helps you to put up the scaffold of your new Python project. 5 | Learn more under: https://pyscaffold.org/ 6 | """ 7 | 8 | from setuptools import setup, Extension 9 | from setuptools.command.build_ext import build_ext as build_ext_orig 10 | import pathlib 11 | import os 12 | import shutil 13 | import sys 14 | import pybind11 15 | 16 | 17 | ### Adapted from dolomite_base: https://github.com/ArtifactDB/dolomite-base/blob/master/setup.py 18 | ## Adapted from https://stackoverflow.com/questions/42585210/extending-setuptools-extension-to-use-cmake-in-setup-py. 19 | class CMakeExtension(Extension): 20 | def __init__(self, name): 21 | super().__init__(name, sources=[]) 22 | 23 | 24 | class build_ext(build_ext_orig): 25 | def run(self): 26 | for ext in self.extensions: 27 | self.build_cmake(ext) 28 | 29 | def build_cmake(self, ext): 30 | build_temp = pathlib.Path(self.build_temp) 31 | build_lib = pathlib.Path(self.build_lib) 32 | outpath = os.path.join(build_lib.absolute(), ext.name) 33 | 34 | if not os.path.exists(build_temp): 35 | cmd = [ 36 | "cmake", 37 | "-S", 38 | "lib", 39 | "-B", 40 | build_temp, 41 | "-Dpybind11_DIR=" + os.path.join(os.path.dirname(pybind11.__file__), "share", "cmake", "pybind11"), 42 | "-DPYTHON_EXECUTABLE=" + sys.executable, 43 | ] 44 | if os.name != "nt": 45 | cmd.append("-DCMAKE_BUILD_TYPE=Release") 46 | cmd.append("-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=" + outpath) 47 | 48 | if "MORE_CMAKE_OPTIONS" in os.environ: 49 | cmd += os.environ["MORE_CMAKE_OPTIONS"].split() 50 | self.spawn(cmd) 51 | 52 | if not self.dry_run: 53 | cmd = ["cmake", "--build", build_temp] 54 | if os.name == "nt": 55 | cmd += ["--config", "Release"] 56 | self.spawn(cmd) 57 | if os.name == "nt": 58 | # Gave up trying to get MSVC to respect the output directory. 59 | # Delvewheel also needs it to have a 'pyd' suffix... whatever. 60 | shutil.copyfile( 61 | os.path.join(build_temp, "Release", "_core.dll"), 62 | os.path.join(outpath, "_core.pyd"), 63 | ) 64 | 65 | 66 | if __name__ == "__main__": 67 | try: 68 | setup( 69 | use_scm_version={"version_scheme": "no-guess-dev"}, 70 | ext_modules=[CMakeExtension("rds2py")], 71 | cmdclass={"build_ext": build_ext}, 72 | ) 73 | except: # noqa 74 | print( 75 | "\n\nAn error occurred while building the project, " 76 | "please ensure you have the most updated version of setuptools, " 77 | "setuptools_scm and wheel with:\n" 78 | " pip install -U setuptools setuptools_scm wheel\n\n" 79 | ) 80 | raise 81 | -------------------------------------------------------------------------------- /src/rds2py/PyRdsReader.py: -------------------------------------------------------------------------------- 1 | """Low-level interface for reading RDS file format. 2 | 3 | This module provides the core functionality for parsing RDS files at a binary level and converting them into a 4 | dictionary representation that can be further processed by higher-level functions. 5 | """ 6 | 7 | from typing import Any, Dict, List, Union 8 | from warnings import warn 9 | 10 | import numpy as np 11 | 12 | from .lib_rds_parser import RdsObject, RdsReader 13 | 14 | 15 | class PyRdsParserError(Exception): 16 | """Exception raised for errors during RDS parsing.""" 17 | 18 | pass 19 | 20 | 21 | class PyRdsParser: 22 | """Parser for reading RDS files. 23 | 24 | This class provides low-level access to RDS file contents, handling the binary 25 | format and converting it into Python data structures. It supports various R 26 | data types and handles special R cases like NA values, integer sequences and 27 | range functions. 28 | 29 | Attributes: 30 | R_MIN: 31 | Minimum integer value in R, used for handling NA values. 32 | 33 | rds_object: 34 | Internal representation of the RDS file. 35 | 36 | root_object: 37 | Root object of the parsed RDS file. 38 | """ 39 | 40 | R_MIN: int = -2147483648 41 | 42 | def __init__(self, file_path: str): 43 | """Initialize the class. 44 | 45 | Args: 46 | file_path: 47 | Path to the RDS file to be read. 48 | """ 49 | try: 50 | self.rds_object = RdsObject(file_path) 51 | robject = self.rds_object.get_robject() 52 | 53 | if not isinstance(robject, RdsReader): 54 | raise TypeError(f"Expected 'RdsReader' object, got {type(robject)}") 55 | 56 | self.root_object = robject 57 | except Exception as e: 58 | raise PyRdsParserError(f"Error initializing 'PyRdsParser': {str(e)}") 59 | 60 | def parse(self) -> Dict[str, Any]: 61 | """Parse the entire RDS file into a dictionary structure. 62 | 63 | Returns: 64 | A dictionary containing the parsed data with keys: 65 | - 'type': The R object type 66 | - 'data': The actual data (if applicable) 67 | - 'attributes': R object attributes (if any) 68 | - 'class_name': The R class name 69 | - Additional keys depending on the object type 70 | 71 | Raises: 72 | PyRdsParserError: If there's an error during parsing. 73 | """ 74 | try: 75 | return self._process_object(self.root_object) 76 | except Exception as e: 77 | raise PyRdsParserError(f"Error parsing RDS object: {str(e)}") 78 | 79 | def _process_object(self, obj: RdsReader) -> Dict[str, Any]: 80 | try: 81 | rtype = obj.get_rtype() 82 | result: Dict[str, Any] = {"type": rtype} 83 | 84 | if rtype == "S4": 85 | result["package_name"] = obj.get_package_name() 86 | result["class_name"] = obj.get_class_name() 87 | result["attributes"] = self._process_attributes(obj) 88 | elif rtype in ["integer", "boolean", "double"]: 89 | result["data"] = self._handle_r_special_cases( 90 | self._get_numeric_data(obj, rtype), rtype, obj.get_rsize() 91 | ) 92 | result["attributes"] = self._process_attributes(obj) 93 | result["class_name"] = f"{rtype}_vector" 94 | elif rtype == "string": 95 | result["data"] = obj.get_string_arr() 96 | result["class_name"] = "string_vector" 97 | elif rtype == "vector": 98 | result["data"] = self._process_vector(obj) 99 | result["attributes"] = self._process_attributes(obj) 100 | result["class_name"] = "vector" 101 | elif rtype == "null": 102 | pass 103 | else: 104 | # raise ValueError 105 | warn(f"Unsupported R object type: {rtype}", RuntimeWarning) 106 | result["data"] = None 107 | result["attributes"] = None 108 | result["class_name"] = None 109 | 110 | return result 111 | except Exception as e: 112 | raise PyRdsParserError(f"Error processing object: {str(e)}") 113 | 114 | def _handle_r_special_cases(self, data: np.ndarray, rtype: str, size: int) -> Union[np.ndarray, range]: 115 | """Handle special R data representations.""" 116 | try: 117 | # Special handling for R integer containing NA 118 | if size != 2: 119 | if any(data == self.R_MIN): 120 | return np.array([np.nan if x == self.R_MIN else x for x in data]) 121 | 122 | # Special handling for R integer sequences 123 | if rtype == "integer" and size == 2 and data[0] == self.R_MIN and data[1] < 0: 124 | if data[1] == self.R_MIN: 125 | return [None, None] 126 | return range(data[1] * -1) 127 | 128 | return data 129 | except Exception as e: 130 | raise PyRdsParserError(f"Error handling R special cases: {str(e)}") 131 | 132 | def _get_numeric_data(self, obj: RdsReader, rtype: str) -> np.ndarray: 133 | try: 134 | data = obj.get_numeric_data() 135 | if rtype == "boolean": 136 | return data.astype(bool) 137 | 138 | return data 139 | except Exception as e: 140 | raise PyRdsParserError(f"Error getting numeric data: {str(e)}") 141 | 142 | def _process_vector(self, obj: RdsReader) -> List[Dict[str, Any]]: 143 | return [self._process_object(obj.load_vec_element(i)) for i in range(obj.get_rsize())] 144 | 145 | def _process_attributes(self, obj: RdsReader) -> Dict[str, Dict[str, Any]]: 146 | try: 147 | attributes = {} 148 | for name in obj.get_attribute_names(): 149 | attr_obj = obj.load_attribute_by_name(name) 150 | attributes[name] = self._process_object(attr_obj) 151 | 152 | return attributes 153 | except Exception as e: 154 | raise PyRdsParserError(f"Error processing attributes: {str(e)}") 155 | 156 | def get_dimensions(self) -> Union[tuple, None]: 157 | try: 158 | return self.root_object.get_dimensions() 159 | except Exception as e: 160 | raise PyRdsParserError(f"Error getting dimensions: {str(e)}") 161 | -------------------------------------------------------------------------------- /src/rds2py/__init__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | if sys.version_info[:2] >= (3, 8): 4 | # TODO: Import directly (no need for conditional) when `python_requires = >= 3.8` 5 | from importlib.metadata import PackageNotFoundError, version # pragma: no cover 6 | else: 7 | from importlib_metadata import PackageNotFoundError, version # pragma: no cover 8 | 9 | try: 10 | # Change here if project is renamed and does not equal the package name 11 | dist_name = __name__ 12 | __version__ = version(dist_name) 13 | except PackageNotFoundError: # pragma: no cover 14 | __version__ = "unknown" 15 | finally: 16 | del version, PackageNotFoundError 17 | 18 | from .generics import read_rds 19 | from .rdsutils import parse_rds 20 | -------------------------------------------------------------------------------- /src/rds2py/generics.py: -------------------------------------------------------------------------------- 1 | """Core functionality for reading RDS files in Python. 2 | 3 | This module provides the main interface for reading RDS files and converting them 4 | to appropriate Python objects. It maintains a registry of supported R object types 5 | and their corresponding Python parser functions. 6 | 7 | The module supports various R object types including vectors, matrices, data frames, 8 | and specialized Bioconductor objects like GenomicRanges and SummarizedExperiment. 9 | 10 | Example: 11 | 12 | .. code-block:: python 13 | 14 | data = read_rds("example.rds") 15 | print(type(data)) 16 | """ 17 | 18 | from importlib import import_module 19 | from warnings import warn 20 | 21 | from .rdsutils import get_class, parse_rds 22 | 23 | __author__ = "jkanche" 24 | __copyright__ = "jkanche" 25 | __license__ = "MIT" 26 | 27 | REGISTRY = { 28 | # typed vectors 29 | "integer_vector": "rds2py.read_atomic.read_integer_vector", 30 | "boolean_vector": "rds2py.read_atomic.read_boolean_vector", 31 | "string_vector": "rds2py.read_atomic.read_string_vector", 32 | "double_vector": "rds2py.read_atomic.read_double_vector", 33 | # dictionary 34 | "vector": "rds2py.read_dict.read_dict", 35 | # factors 36 | "factor": "rds2py.read_factor.read_factor", 37 | # Rle 38 | "Rle": "rds2py.read_rle.read_rle", 39 | # matrices 40 | "dgCMatrix": "rds2py.read_matrix.read_dgcmatrix", 41 | "dgRMatrix": "rds2py.read_matrix.read_dgrmatrix", 42 | "dgTMatrix": "rds2py.read_matrix.read_dgtmatrix", 43 | "ndarray": "rds2py.read_matrix.read_ndarray", 44 | # data frames 45 | "data.frame": "rds2py.read_frame.read_data_frame", 46 | "DFrame": "rds2py.read_frame.read_dframe", 47 | # genomic ranges 48 | "GRanges": "rds2py.read_granges.read_genomic_ranges", 49 | "GenomicRanges": "rds2py.read_granges.read_genomic_ranges", 50 | "CompressedGRangesList": "rds2py.read_granges.read_granges_list", 51 | "GRangesList": "rds2py.read_granges.read_granges_list", 52 | # summarized experiment 53 | "SummarizedExperiment": "rds2py.read_se.read_summarized_experiment", 54 | "RangedSummarizedExperiment": "rds2py.read_se.read_ranged_summarized_experiment", 55 | # single-cell experiment 56 | "SingleCellExperiment": "rds2py.read_sce.read_single_cell_experiment", 57 | "SummarizedExperimentByColumn": "rds2py.read_sce.read_alts_summarized_experiment_by_column", 58 | # multi assay experiment 59 | "MultiAssayExperiment": "rds2py.read_mae.read_multi_assay_experiment", 60 | "ExperimentList": "rds2py.read_dict.read_dict", 61 | # delayed matrices 62 | "H5SparseMatrix": "rds2py.read_delayed_matrix.read_hdf5_sparse", 63 | } 64 | 65 | 66 | # @singledispatch 67 | # def save_rds(x, path: str): 68 | # """Save a Python object as RDS file. 69 | 70 | # Args: 71 | # x: 72 | # Object to save. 73 | 74 | # path: 75 | # Path to save the object. 76 | # """ 77 | # raise NotImplementedError( 78 | # f"No `save_rds` method implemented for '{type(x).__name__}' objects." 79 | # ) 80 | 81 | 82 | def read_rds(path: str, **kwargs): 83 | """Read an RDS file and convert it to an appropriate Python object. 84 | 85 | Args: 86 | path: 87 | Path to the RDS file to be read. 88 | 89 | **kwargs: 90 | Additional arguments passed to specific parser functions. 91 | 92 | Returns: 93 | A Python object representing the data in the RDS file. The exact type 94 | depends on the contents of the RDS file and the available parsers. 95 | """ 96 | _robj = parse_rds(path=path) 97 | return _dispatcher(_robj, **kwargs) 98 | 99 | 100 | def _dispatcher(robject: dict, **kwargs): 101 | """Internal function to dispatch R objects to appropriate parser functions. 102 | 103 | Args: 104 | robject: 105 | Dictionary containing parsed R object data. 106 | 107 | **kwargs: 108 | Additional arguments passed to specific parser functions. 109 | 110 | Returns: 111 | Parsed Python object corresponding to the R data structure. 112 | Returns the original dictionary if no appropriate parser is found. 113 | """ 114 | _class_name = get_class(robject) 115 | 116 | if _class_name is None: 117 | return None 118 | 119 | # if a class is registered, coerce the object 120 | # to the representation. 121 | if _class_name in REGISTRY: 122 | try: 123 | command = REGISTRY[_class_name] 124 | if isinstance(command, str): 125 | last_period = command.rfind(".") 126 | mod = import_module(command[:last_period]) 127 | command = getattr(mod, command[last_period + 1 :]) 128 | REGISTRY[_class_name] = command 129 | 130 | return command(robject, **kwargs) 131 | except Exception as e: 132 | warn( 133 | f"Failed to coerce RDS object to class: '{_class_name}', returning the dictionary, {str(e)}", 134 | RuntimeWarning, 135 | ) 136 | else: 137 | warn( 138 | f"RDS file contains an unknown class: '{_class_name}', returning the dictionary", 139 | RuntimeWarning, 140 | ) 141 | 142 | return robject 143 | -------------------------------------------------------------------------------- /src/rds2py/rdsutils.py: -------------------------------------------------------------------------------- 1 | """Utility functions for RDS file parsing and class inference. 2 | 3 | This module provides helper functions for parsing RDS files and inferring the appropriate R class information from 4 | parsed objects. 5 | """ 6 | 7 | from .PyRdsReader import PyRdsParser 8 | 9 | __author__ = "jkanche" 10 | __copyright__ = "jkanche" 11 | __license__ = "MIT" 12 | 13 | 14 | def parse_rds(path: str) -> dict: 15 | """Parse an RDS file into a dictionary representation. 16 | 17 | Args: 18 | path: 19 | Path to the RDS file to be parsed. 20 | 21 | Returns: 22 | A dictionary containing the parsed contents of the RDS file. 23 | The structure depends on the type of R object stored in the file. 24 | """ 25 | parsed_obj = PyRdsParser(path) 26 | realized = parsed_obj.parse() 27 | 28 | return realized 29 | 30 | 31 | def get_class(robj: dict) -> str: 32 | """Infer the R class name from a parsed RDS object. 33 | 34 | Notes: 35 | - Handles both S4 and non-S4 R objects 36 | - Special handling for vectors and matrices 37 | - Checks for class information in object attributes 38 | 39 | Args: 40 | robj: 41 | Dictionary containing parsed RDS data, typically 42 | the output of :py:func:`~.parse_rds`. 43 | 44 | Returns: 45 | The inferred R class name, or None if no class can be determined. 46 | """ 47 | _inferred_cls_name = None 48 | if robj["type"] != "S4": 49 | if "class_name" in robj: 50 | _inferred_cls_name = robj["class_name"] 51 | if _inferred_cls_name is not None and ( 52 | "integer" in _inferred_cls_name or "double" in _inferred_cls_name or _inferred_cls_name == "vector" 53 | ): 54 | if "attributes" in robj: 55 | obj_attr = robj["attributes"] 56 | 57 | # kind of making this assumption, if we ever see a dim, its a matrix 58 | if obj_attr is not None: 59 | if "dim" in obj_attr: 60 | _inferred_cls_name = "ndarray" 61 | elif "class" in obj_attr: 62 | _inferred_cls_name = obj_attr["class"]["data"][0] 63 | 64 | else: 65 | _inferred_cls_name = robj["class_name"] 66 | 67 | return _inferred_cls_name 68 | -------------------------------------------------------------------------------- /src/rds2py/read_atomic.py: -------------------------------------------------------------------------------- 1 | """Functions for parsing atomic R vector types into Python objects. 2 | 3 | This module provides parser functions for converting R's atomic vector types (boolean, integer, string, and double) into 4 | appropriate Python objects using the biocutils package's specialized list classes. 5 | """ 6 | 7 | from biocutils import BooleanList, FloatList, IntegerList, StringList 8 | 9 | from .generics import _dispatcher 10 | 11 | __author__ = "jkanche" 12 | __copyright__ = "jkanche" 13 | __license__ = "MIT" 14 | 15 | 16 | def _extract_names(robject: dict, **kwargs): 17 | """Extract names attribute from an R object if present. 18 | 19 | Args: 20 | robject: 21 | Dictionary containing parsed R object data. 22 | 23 | **kwargs: 24 | Additional arguments. 25 | 26 | Returns: 27 | List of names if present in the object's attributes, 28 | None otherwise. 29 | """ 30 | _names = None 31 | if "attributes" in robject and robject["attributes"] is not None: 32 | if "names" in robject["attributes"]: 33 | _names = _dispatcher(robject["attributes"]["names"]) 34 | 35 | return _names 36 | 37 | 38 | def read_boolean_vector(robject: dict, **kwargs) -> BooleanList: 39 | """Convert an R boolean vector to a Python :py:class:`~biocutils.BooleanList`. 40 | 41 | Args: 42 | robject: 43 | Dictionary containing parsed R boolean vector data. 44 | 45 | **kwargs: 46 | Additional arguments. 47 | 48 | Returns: 49 | A `BooleanList` object containing the vector data 50 | and any associated names. 51 | """ 52 | _names = _extract_names(robject, **kwargs) 53 | 54 | obj = BooleanList(robject["data"], names=_names) 55 | return obj 56 | 57 | 58 | def read_integer_vector(robject: dict, **kwargs) -> IntegerList: 59 | """Convert an R integer vector to a Python :py:class:`~biocutils.IntegerList`. 60 | 61 | Args: 62 | robject: 63 | Dictionary containing parsed R integer vector data. 64 | 65 | **kwargs: 66 | Additional arguments. 67 | 68 | Returns: 69 | A `IntegerList` object containing the vector data 70 | and any associated names. 71 | """ 72 | _names = _extract_names(robject, **kwargs) 73 | 74 | obj = IntegerList(robject["data"], names=_names) 75 | return obj 76 | 77 | 78 | def read_string_vector(robject: dict, **kwargs) -> StringList: 79 | """Convert an R string vector to a Python :py:class:`~biocutils.StringList`. 80 | 81 | Args: 82 | robject: 83 | Dictionary containing parsed R string vector data. 84 | 85 | **kwargs: 86 | Additional arguments. 87 | 88 | Returns: 89 | A `StringList` object containing the vector data 90 | and any associated names. 91 | """ 92 | _names = _extract_names(robject, **kwargs) 93 | 94 | obj = StringList(robject["data"], names=_names) 95 | return obj 96 | 97 | 98 | def read_double_vector(robject: dict, **kwargs) -> FloatList: 99 | """Convert an R double vector to a Python :py:class:`~biocutils.FloatList`. 100 | 101 | Args: 102 | robject: 103 | Dictionary containing parsed R double vector data. 104 | 105 | **kwargs: 106 | Additional arguments. 107 | 108 | Returns: 109 | A `FloatList` object containing the vector data 110 | and any associated names. 111 | """ 112 | _names = _extract_names(robject, **kwargs) 113 | 114 | obj = FloatList(robject["data"], names=_names) 115 | return obj 116 | -------------------------------------------------------------------------------- /src/rds2py/read_delayed_matrix.py: -------------------------------------------------------------------------------- 1 | """Functions and classes for parsing R delayed matrix objects from HDF5Array.""" 2 | 3 | from .generics import _dispatcher 4 | from .rdsutils import get_class 5 | 6 | __author__ = "jkanche" 7 | __copyright__ = "jkanche" 8 | __license__ = "MIT" 9 | 10 | 11 | def read_hdf5_sparse(robject: dict, **kwargs): 12 | """Convert an R delayed sparse array (H5-backed). 13 | 14 | Args: 15 | robject: 16 | Dictionary containing parsed delayed sparse array. 17 | 18 | **kwargs: 19 | Additional arguments. 20 | 21 | Returns: 22 | A Hdf5CompressedSparseMatrix from the 'hdf5array' package. 23 | """ 24 | _cls = get_class(robject) 25 | if _cls not in ["H5SparseMatrix"]: 26 | raise RuntimeError(f"`robject` does not contain not a 'H5SparseMatrix' object, contains `{_cls}`.") 27 | 28 | by_column = False 29 | # get seed package name 30 | _seed_cls = get_class(robject["attributes"]["seed"]) 31 | if _seed_cls in ["CSC_H5SparseMatrixSeed"]: 32 | by_column = True 33 | 34 | _seed_obj = robject["attributes"]["seed"] 35 | shape = tuple(_dispatcher(_seed_obj["attributes"]["dim"], **kwargs)) 36 | fpath = list(_dispatcher(_seed_obj["attributes"]["filepath"], **kwargs))[0] 37 | group_name = list(_dispatcher(_seed_obj["attributes"]["group"], **kwargs))[0] 38 | 39 | from hdf5array import Hdf5CompressedSparseMatrix 40 | 41 | return Hdf5CompressedSparseMatrix(path=fpath, group_name=group_name, shape=shape, by_column=by_column) 42 | -------------------------------------------------------------------------------- /src/rds2py/read_dict.py: -------------------------------------------------------------------------------- 1 | """Functions for parsing R vector and dictionary-like objects. 2 | 3 | This module provides functionality to convert R named vectors and list objects into Python dictionaries or lists, 4 | maintaining the structure and names of the original R objects. 5 | """ 6 | 7 | from .generics import _dispatcher 8 | from .rdsutils import get_class 9 | 10 | __author__ = "jkanche" 11 | __copyright__ = "jkanche" 12 | __license__ = "MIT" 13 | 14 | 15 | def read_dict(robject: dict, **kwargs) -> dict: 16 | """Convert an R named vector or list to a Python dictionary or list. 17 | 18 | Args: 19 | robject: 20 | Dictionary containing parsed R vector/list data. 21 | 22 | **kwargs: 23 | Additional arguments. 24 | 25 | Returns: 26 | If the R object has names, returns a dictionary mapping 27 | names to values. Otherwise, returns a list of parsed values. 28 | 29 | Example: 30 | >>> # For a named R vector c(a=1, b=2) 31 | >>> result = read_dict(robject) 32 | >>> print(result) 33 | {'a': 1, 'b': 2} 34 | """ 35 | _cls = get_class(robject) 36 | 37 | if _cls not in ["vector"]: 38 | raise RuntimeError(f"`robject` does not contain not a vector/dictionary object, contains `{_cls}`.") 39 | 40 | if "names" not in robject["attributes"]: 41 | return [_dispatcher(x, **kwargs) for x in robject["data"]] 42 | 43 | dict_keys = list(_dispatcher(robject["attributes"]["names"], **kwargs)) 44 | 45 | final_vec = {} 46 | for idx, dkey in enumerate(dict_keys): 47 | final_vec[dkey] = _dispatcher(robject["data"][idx], **kwargs) 48 | 49 | return final_vec 50 | -------------------------------------------------------------------------------- /src/rds2py/read_factor.py: -------------------------------------------------------------------------------- 1 | """Functions for parsing R factor objects. 2 | 3 | This module handles the conversion of R factors (categorical variables) into Python lists, preserving the levels and 4 | maintaining the order of the factor levels. 5 | """ 6 | 7 | from .generics import _dispatcher 8 | from .rdsutils import get_class 9 | 10 | __author__ = "jkanche" 11 | __copyright__ = "jkanche" 12 | __license__ = "MIT" 13 | 14 | 15 | def read_factor(robject: dict, **kwargs) -> list: 16 | """Convert an R factor to a Python list. 17 | 18 | Args: 19 | robject: 20 | Dictionary containing parsed R factor data. 21 | 22 | **kwargs: 23 | Additional arguments. 24 | 25 | Returns: 26 | A list containing the factor values, with each value repeated 27 | according to its length if specified. 28 | """ 29 | _cls = get_class(robject) 30 | 31 | if _cls not in ["factor"]: 32 | raise RuntimeError(f"`robject` does not contain not a factor object, contains `{_cls}`.") 33 | 34 | data = robject["data"] 35 | 36 | levels = None 37 | if "levels" in robject["attributes"]: 38 | levels = _dispatcher(robject["attributes"]["levels"], **kwargs) 39 | level_vec = [levels[x - 1] for x in data] 40 | 41 | if "lengths" in robject["attributes"]: 42 | lengths = _dispatcher(robject["attributes"]["lengths"], **kwargs) 43 | else: 44 | lengths = [1] * len(data) 45 | 46 | final_vec = [] 47 | for i, x in enumerate(lengths): 48 | final_vec.extend([level_vec[i]] * x) 49 | 50 | return final_vec 51 | -------------------------------------------------------------------------------- /src/rds2py/read_frame.py: -------------------------------------------------------------------------------- 1 | """Functions for parsing R data frame objects. 2 | 3 | This module provides parsers for converting both base R `data.frame` objects 4 | and Bioconductor `DataFrame` objects into Python `BiocFrame` objects, preserving 5 | row names, column names, and data types. 6 | """ 7 | 8 | from .generics import _dispatcher 9 | from .rdsutils import get_class 10 | 11 | __author__ = "jkanche" 12 | __copyright__ = "jkanche" 13 | __license__ = "MIT" 14 | 15 | 16 | def read_data_frame(robject: dict, **kwargs): 17 | """Convert an R data.frame to a :py:class:`~biocframe.BiocFrame` object. 18 | 19 | Args: 20 | robject: 21 | Dictionary containing parsed R `data.frame` object. 22 | 23 | **kwargs: 24 | Additional arguments. 25 | 26 | Returns: 27 | A BiocFrame object containing the data frame's contents, 28 | with preserved column and row names. 29 | """ 30 | cls = get_class(robject) 31 | 32 | if cls != "data.frame": 33 | raise RuntimeError("`robject` does not contain a 'data.frame'.") 34 | 35 | col_names = _dispatcher(robject["attributes"]["names"], **kwargs) 36 | 37 | bframe_obj = {} 38 | for idx, rd in enumerate(robject["data"]): 39 | bframe_obj[col_names[idx]] = _dispatcher(rd, **kwargs) 40 | 41 | from biocframe import BiocFrame 42 | 43 | df = BiocFrame( 44 | bframe_obj, 45 | row_names=_dispatcher(robject["attributes"]["row.names"], **kwargs), 46 | ) 47 | 48 | return df 49 | 50 | 51 | def read_dframe(robject: dict, **kwargs): 52 | """Convert an R DFrame (Bioconductor's `DataFrame`) to a `BiocFrame` object. 53 | 54 | Args: 55 | robject: 56 | Dictionary containing parsed R `DFrame` object. 57 | 58 | **kwargs: 59 | Additional arguments. 60 | 61 | Returns: 62 | A BiocFrame object containing the DataFrame's contents, 63 | with preserved metadata and structure. 64 | """ 65 | from biocframe import BiocFrame 66 | 67 | cls = get_class(robject) 68 | 69 | if cls != "DFrame": 70 | raise RuntimeError("`robject` does not contain a 'DFrame'.") 71 | 72 | data = {} 73 | col_names = _dispatcher(robject["attributes"]["listData"]["attributes"]["names"], **kwargs) 74 | for idx, colname in enumerate(col_names): 75 | data[colname] = _dispatcher(robject["attributes"]["listData"]["data"][idx], **kwargs) 76 | 77 | index = None 78 | if robject["attributes"]["rownames"]["data"]: 79 | index = _dispatcher(robject["attributes"]["rownames"], **kwargs) 80 | 81 | nrows = None 82 | if robject["attributes"]["nrows"]["data"]: 83 | nrows = list(_dispatcher(robject["attributes"]["nrows"]), **kwargs)[0] 84 | 85 | df = BiocFrame( 86 | data, 87 | # column_names=col_names, 88 | row_names=index, 89 | number_of_rows=nrows, 90 | ) 91 | 92 | return df 93 | -------------------------------------------------------------------------------- /src/rds2py/read_granges.py: -------------------------------------------------------------------------------- 1 | """Functions for parsing Bioconductor GenomicRanges objects. 2 | 3 | This module provides parsers for converting Bioconductor's GenomicRanges and GenomicRangesList objects into their Python 4 | equivalents, preserving all genomic coordinates and associated metadata. 5 | """ 6 | 7 | from .generics import _dispatcher 8 | from .rdsutils import get_class 9 | 10 | __author__ = "jkanche" 11 | __copyright__ = "jkanche" 12 | __license__ = "MIT" 13 | 14 | 15 | def read_genomic_ranges(robject: dict, **kwargs): 16 | """Convert an R `GenomicRanges` object to a Python :py:class:`~genomicranges.GenomicRanges` object. 17 | 18 | Args: 19 | robject: 20 | Dictionary containing parsed `GenomicRanges` data. 21 | 22 | **kwargs: 23 | Additional arguments. 24 | 25 | Returns: 26 | A Python `GenomicRanges` object containing genomic intervals 27 | with associated annotations. 28 | """ 29 | 30 | from genomicranges import GenomicRanges, SeqInfo 31 | from iranges import IRanges 32 | 33 | _cls = get_class(robject) 34 | 35 | if _cls not in ["GenomicRanges", "GRanges"]: 36 | raise TypeError(f"obj is not 'GenomicRanges', but is `{_cls}`.") 37 | 38 | _range_start = _dispatcher(robject["attributes"]["ranges"]["attributes"]["start"], **kwargs) 39 | _range_width = _dispatcher(robject["attributes"]["ranges"]["attributes"]["width"], **kwargs) 40 | _range_names = None 41 | if "NAMES" in robject["attributes"]["ranges"]["attributes"]: 42 | _tmp_names = robject["attributes"]["ranges"]["attributes"]["NAMES"] 43 | _range_names = _dispatcher(_tmp_names, **kwargs) 44 | if _range_names is not None: 45 | _range_names = list(_range_names) 46 | 47 | _ranges = IRanges(_range_start, _range_width, names=_range_names) 48 | 49 | _strands = _dispatcher(robject["attributes"]["strand"], **kwargs) 50 | _seqnames = _dispatcher(robject["attributes"]["seqnames"], **kwargs) 51 | _seqinfo_seqnames = _dispatcher(robject["attributes"]["seqinfo"]["attributes"]["seqnames"], **kwargs) 52 | _seqinfo_seqlengths = _dispatcher(robject["attributes"]["seqinfo"]["attributes"]["seqlengths"], **kwargs) 53 | _seqinfo_is_circular = _dispatcher(robject["attributes"]["seqinfo"]["attributes"]["is_circular"], **kwargs) 54 | _seqinfo_genome = _dispatcher(robject["attributes"]["seqinfo"]["attributes"]["genome"], **kwargs) 55 | _seqinfo = SeqInfo( 56 | seqnames=_seqinfo_seqnames, 57 | seqlengths=_seqinfo_seqlengths, 58 | is_circular=_seqinfo_is_circular, 59 | genome=_seqinfo_genome, 60 | ) 61 | _mcols = _dispatcher(robject["attributes"]["elementMetadata"], **kwargs) 62 | 63 | _gr_names = None 64 | if "NAMES" in robject["attributes"]: 65 | _tmp_names = robject["attributes"]["NAMES"] 66 | _gr_names = None if _tmp_names is None else _dispatcher(_tmp_names, **kwargs) 67 | 68 | return GenomicRanges( 69 | seqnames=_seqnames, 70 | ranges=_ranges, 71 | strand=_strands, 72 | names=_gr_names, 73 | mcols=_mcols, 74 | seqinfo=_seqinfo, 75 | ) 76 | 77 | 78 | def read_granges_list(robject: dict, **kwargs): 79 | """Convert an R `GenomicRangesList` object to a Python :py:class:`~genomicranges.GenomicRangesList`. 80 | 81 | Args: 82 | robject: 83 | Dictionary containing parsed GenomicRangesList data. 84 | 85 | **kwargs: 86 | Additional arguments. 87 | 88 | Returns: 89 | A Python `GenomicRangesList` object containing containing multiple 90 | `GenomicRanges` objects. 91 | """ 92 | 93 | from genomicranges import GenomicRangesList 94 | 95 | _cls = get_class(robject) 96 | 97 | if _cls not in ["CompressedGRangesList", "GRangesList"]: 98 | raise TypeError(f"`robject` is not genomic ranges list, but is `{_cls}`.") 99 | 100 | _gre = _dispatcher(robject["attributes"]["unlistData"], **kwargs) 101 | 102 | _groups = None 103 | if "NAMES" in robject["attributes"]["partitioning"]["attributes"]: 104 | _tmp_names = robject["attributes"]["partitioning"]["attributes"]["NAMES"] 105 | _groups = None if _tmp_names is None else _dispatcher(_tmp_names, **kwargs) 106 | 107 | _partitionends = _dispatcher(robject["attributes"]["partitioning"]["attributes"]["end"], **kwargs) 108 | 109 | _grelist = [] 110 | 111 | current = 0 112 | for _pend in _partitionends: 113 | _grelist.append(_gre[current:_pend]) 114 | current = _pend 115 | 116 | return GenomicRangesList(ranges=_grelist, names=_groups) 117 | -------------------------------------------------------------------------------- /src/rds2py/read_mae.py: -------------------------------------------------------------------------------- 1 | """Functions for parsing Bioconductor MultiAssayExperiment objects. 2 | 3 | This module handles the conversion of Bioconductor's MultiAssayExperiment container format into its Python equivalent, 4 | preserving the complex relationships between multiple experimental assays and sample metadata. 5 | """ 6 | 7 | from .generics import _dispatcher 8 | from .rdsutils import get_class 9 | from .read_matrix import MatrixWrapper 10 | 11 | __author__ = "jkanche" 12 | __copyright__ = "jkanche" 13 | __license__ = "MIT" 14 | 15 | 16 | def _sanitize_expts(expts, **kwargs): 17 | """Convert raw experiment objects into SummarizedExperiment format. 18 | 19 | Args: 20 | expts: 21 | Dictionary of experiment objects. 22 | 23 | Returns: 24 | Dictionary of converted experiments, with matrix-like objects 25 | wrapped in SummarizedExperiment containers. 26 | """ 27 | from biocframe import BiocFrame 28 | from summarizedexperiment import SummarizedExperiment 29 | 30 | res = {} 31 | for k, v in expts.items(): 32 | if isinstance(v, MatrixWrapper): 33 | res[k] = SummarizedExperiment( 34 | assays={"matrix": v.matrix}, 35 | row_data=BiocFrame(row_names=v.dimnames[0]), 36 | column_data=BiocFrame(row_names=v.dimnames[1]), 37 | ) 38 | else: 39 | res[k] = v 40 | 41 | return res 42 | 43 | 44 | def read_multi_assay_experiment(robject: dict, **kwargs): 45 | """Convert an R `MultiAssayExperiment` to a Python :py:class:`~multiassayexperiment.MultiAssayExperiment` object. 46 | 47 | Args: 48 | robject: 49 | Dictionary containing parsed MultiAssayExperiment data. 50 | 51 | **kwargs: 52 | Additional arguments. 53 | 54 | Returns: 55 | A Python `MultiAssayExperiment` object containing 56 | multiple experimental assays with associated metadata. 57 | """ 58 | 59 | _cls = get_class(robject) 60 | 61 | if _cls not in ["MultiAssayExperiment"]: 62 | raise RuntimeError(f"`robject` does not contain a 'MultiAssayExperiment' object, contains `{_cls}`.") 63 | 64 | # parse experiment names 65 | _expt_obj = robject["attributes"]["ExperimentList"]["attributes"]["listData"] 66 | robj_expts = _dispatcher(_expt_obj, **kwargs) 67 | 68 | # parse sample_map 69 | robj_samplemap = _dispatcher(robject["attributes"]["sampleMap"], **kwargs) 70 | 71 | # parse coldata 72 | robj_coldata = _dispatcher(robject["attributes"]["colData"], **kwargs) 73 | 74 | from multiassayexperiment import MultiAssayExperiment 75 | 76 | return MultiAssayExperiment( 77 | experiments=_sanitize_expts(robj_expts), 78 | sample_map=robj_samplemap, 79 | column_data=robj_coldata, 80 | ) 81 | -------------------------------------------------------------------------------- /src/rds2py/read_matrix.py: -------------------------------------------------------------------------------- 1 | """Functions and classes for parsing R matrix objects. 2 | 3 | This module provides functionality to convert R matrix objects (both dense and sparse) into their Python equivalents 4 | using NumPy and SciPy sparse matrix formats. It handles various R matrix types including dgCMatrix, dgRMatrix, and 5 | dgTMatrix. 6 | """ 7 | 8 | from typing import Literal 9 | 10 | from numpy import ndarray 11 | 12 | from .generics import _dispatcher 13 | from .rdsutils import get_class 14 | 15 | __author__ = "jkanche" 16 | __copyright__ = "jkanche" 17 | __license__ = "MIT" 18 | 19 | 20 | class MatrixWrapper: 21 | """A simple wrapper class for matrices that preserves dimension names. 22 | 23 | This class bundles a matrix (dense or sparse) with its dimension names, 24 | maintaining the R-style naming of rows and columns. 25 | 26 | Attributes: 27 | matrix: 28 | The underlying matrix object (numpy.ndarray or scipy.sparse matrix). 29 | 30 | dimnames: 31 | A tuple of (row_names, column_names), each being a list of strings or None. 32 | """ 33 | 34 | def __init__(self, matrix, dimnames=None) -> None: 35 | self.matrix = matrix 36 | self.dimnames = dimnames 37 | 38 | @property 39 | def shape(self): 40 | return self.matrix.shape 41 | 42 | 43 | def _as_sparse_matrix(robject: dict, **kwargs): 44 | """Convert an R sparse matrix to a SciPy sparse matrix. 45 | 46 | Notes: 47 | - Supports dgCMatrix (column-sparse) 48 | - Supports dgRMatrix (row-sparse) 49 | - Supports dgTMatrix (triplet format) 50 | - Preserves dimension names if present 51 | 52 | Args: 53 | robject: 54 | Dictionary containing parsed R sparse matrix data. 55 | 56 | **kwargs: 57 | Additional arguments. 58 | 59 | Returns: 60 | A SciPy sparse matrix or wrapped matrix if dimension names exist. 61 | """ 62 | 63 | from scipy.sparse import csc_matrix, csr_matrix 64 | 65 | _cls = get_class(robject) 66 | 67 | if _cls not in ["dgCMatrix", "dgRMatrix", "dgTMatrix"]: 68 | raise RuntimeError(f"`robject` does not contain not a supported sparse matrix format, contains `{_cls}`.") 69 | 70 | if _cls == "dgCMatrix": 71 | mat = csc_matrix( 72 | ( 73 | robject["attributes"]["x"]["data"], 74 | robject["attributes"]["i"]["data"], 75 | robject["attributes"]["p"]["data"], 76 | ), 77 | shape=tuple(robject["attributes"]["Dim"]["data"].tolist()), 78 | ) 79 | elif _cls == "dgRMatrix": 80 | mat = csr_matrix( 81 | ( 82 | robject["attributes"]["x"]["data"], 83 | robject["attributes"]["i"]["data"], 84 | robject["attributes"]["p"]["data"], 85 | ), 86 | shape=tuple(robject["attributes"]["Dim"]["data"].tolist()), 87 | ) 88 | elif _cls == "dgTMatrix": 89 | mat = csr_matrix( 90 | ( 91 | robject["attributes"]["x"]["data"], 92 | ( 93 | robject["attributes"]["i"]["data"], 94 | robject["attributes"]["j"]["data"], 95 | ), 96 | ), 97 | shape=tuple(robject["attributes"]["Dim"]["data"].tolist()), 98 | ) 99 | 100 | names = None 101 | if "Dimnames" in robject["attributes"]: 102 | names = _dispatcher(robject["attributes"]["Dimnames"], **kwargs) 103 | if names is not None and len(names) > 0: 104 | # Use the wrapper class onyly if names are available 105 | # for atleast one dimension 106 | if not all(x is None for x in names): 107 | return MatrixWrapper(mat, names) 108 | 109 | return mat 110 | 111 | 112 | def _as_dense_matrix(robject, order: Literal["C", "F"] = "F", **kwargs) -> ndarray: 113 | """Convert an R matrix to a `NumPy` array. 114 | 115 | Args: 116 | robject: 117 | Dictionary containing parsed R matrix data. 118 | 119 | order: 120 | Memory layout for the array. 121 | 'C' for row-major, 'F' for column-major (default). 122 | 123 | **kwargs: 124 | Additional arguments. 125 | 126 | Returns: 127 | A NumPy array or wrapped array if dimension names exist. 128 | """ 129 | _cls = get_class(robject) 130 | 131 | if order not in ["C", "F"]: 132 | raise ValueError("order must be either 'C' or 'F'.") 133 | 134 | if _cls not in ["ndarray"]: 135 | raise TypeError(f"obj is not a supported dense matrix format, but is `{_cls}`.") 136 | 137 | mat = ndarray( 138 | shape=tuple(robject["attributes"]["dim"]["data"].tolist()), 139 | dtype=robject["data"].dtype, 140 | buffer=robject["data"], 141 | order=order, 142 | ) 143 | 144 | names = None 145 | if "dimnames" in robject["attributes"]: 146 | names = _dispatcher(robject["attributes"]["dimnames"], **kwargs) 147 | if names is not None and len(names) > 0: 148 | return MatrixWrapper(mat, names) 149 | 150 | return mat 151 | 152 | 153 | def read_dgcmatrix(robject: dict, **kwargs): 154 | """Parse an R dgCMatrix (sparse column matrix). 155 | 156 | Args: 157 | robject: 158 | Dictionary containing parsed dgCMatrix data. 159 | 160 | **kwargs: 161 | Additional arguments. 162 | 163 | Returns: 164 | Parsed sparse column matrix. 165 | """ 166 | return _as_sparse_matrix(robject, **kwargs) 167 | 168 | 169 | def read_dgrmatrix(robject: dict, **kwargs): 170 | """Parse an R dgRMatrix (sparse row matrix). 171 | 172 | Args: 173 | robject: 174 | Dictionary containing parsed dgRMatrix data. 175 | 176 | **kwargs: 177 | Additional arguments. 178 | 179 | Returns: 180 | Parsed sparse row matrix. 181 | """ 182 | return _as_sparse_matrix(robject, **kwargs) 183 | 184 | 185 | def read_dgtmatrix(robject: dict, **kwargs): 186 | """Parse an R dgTMatrix (sparse triplet matrix).. 187 | 188 | Args: 189 | robject: 190 | Dictionary containing parsed dgTMatrix data. 191 | 192 | **kwargs: 193 | Additional arguments. 194 | 195 | Returns: 196 | Parsed sparse matrix. 197 | """ 198 | return _as_sparse_matrix(robject, **kwargs) 199 | 200 | 201 | def read_ndarray(robject: dict, order: Literal["C", "F"] = "F", **kwargs) -> ndarray: 202 | """Parse an R matrix as a NumPy array. 203 | 204 | Args: 205 | robject: 206 | Dictionary containing parsed dgCMatrix data. 207 | 208 | order: 209 | Memory layout for the array. 210 | 211 | **kwargs: 212 | Additional arguments. 213 | 214 | Returns: 215 | Parsed dense array. 216 | """ 217 | return _as_dense_matrix(robject, order=order, **kwargs) 218 | -------------------------------------------------------------------------------- /src/rds2py/read_rle.py: -------------------------------------------------------------------------------- 1 | """Functions for parsing R's Rle (Run-length encoding) objects. 2 | 3 | This module provides functionality to convert R's Rle (Run-length encoding) objects into Python lists, expanding the 4 | compressed representation into its full form. 5 | """ 6 | 7 | from .generics import _dispatcher 8 | from .rdsutils import get_class 9 | 10 | __author__ = "jkanche" 11 | __copyright__ = "jkanche" 12 | __license__ = "MIT" 13 | 14 | 15 | def read_rle(robject: dict, **kwargs) -> list: 16 | """Convert an R Rle object to a Python list. 17 | 18 | Args: 19 | robject: 20 | Dictionary containing parsed Rle data. 21 | 22 | **kwargs: 23 | Additional arguments. 24 | 25 | Returns: 26 | Expanded list where each value is repeated according to its run length. 27 | 28 | Example: 29 | >>> # For Rle with values=[1,2] and lengths=[3,2] 30 | >>> result = read_rle(robject) 31 | >>> print(result) 32 | [1, 1, 1, 2, 2] 33 | """ 34 | _cls = get_class(robject) 35 | 36 | if _cls != "Rle": 37 | raise RuntimeError(f"`robject` does not contain a 'Rle' object, contains `{_cls}`.") 38 | 39 | data = list(_dispatcher(robject["attributes"]["values"], **kwargs)) 40 | 41 | if "lengths" in robject["attributes"]: 42 | lengths = _dispatcher(robject["attributes"]["lengths"], **kwargs) 43 | else: 44 | lengths = [1] * len(data) 45 | 46 | final_vec = [] 47 | for i, x in enumerate(lengths): 48 | final_vec.extend([data[i]] * x) 49 | 50 | return final_vec 51 | -------------------------------------------------------------------------------- /src/rds2py/read_sce.py: -------------------------------------------------------------------------------- 1 | """Functions for parsing Bioconductor `SingleCellExperiment` objects. 2 | 3 | This module provides parsers for converting Bioconductor's `SingleCellExperiment` 4 | objects into their Python equivalents, handling the complex structure of single-cell 5 | data including multiple assays, reduced dimensions, and alternative experiments. 6 | """ 7 | 8 | from .generics import _dispatcher 9 | from .rdsutils import get_class 10 | 11 | __author__ = "jkanche" 12 | __copyright__ = "jkanche" 13 | __license__ = "MIT" 14 | 15 | 16 | def read_alts_summarized_experiment_by_column(robject: dict, **kwargs): 17 | """Parse alternative experiments in a SingleCellExperiment.""" 18 | _cls = get_class(robject) 19 | 20 | if _cls not in ["SummarizedExperimentByColumn"]: 21 | raise RuntimeError(f"`robject` does not contain a 'SummarizedExperimentByColumn' object, contains `{_cls}`.") 22 | 23 | objs = {} 24 | 25 | for key, val in robject["attributes"].items(): 26 | objs[key] = _dispatcher(val, **kwargs) 27 | 28 | return objs 29 | 30 | 31 | def read_single_cell_experiment(robject: dict, **kwargs): 32 | """Convert an R SingleCellExperiment to Python SingleCellExperiment. 33 | 34 | Args: 35 | robject: 36 | Dictionary containing parsed SingleCellExperiment data. 37 | 38 | **kwargs: 39 | Additional arguments. 40 | 41 | Returns: 42 | A Python SingleCellExperiment object containing 43 | the assay data and associated metadata. 44 | """ 45 | 46 | _cls = get_class(robject) 47 | 48 | if _cls not in ["SingleCellExperiment"]: 49 | raise RuntimeError(f"`robject` does not contain a 'SingleCellExperiment' object, contains `{_cls}`.") 50 | 51 | robject["class_name"] = "RangedSummarizedExperiment" 52 | _rse = _dispatcher(robject, **kwargs) 53 | 54 | # check red. dims, alternative expts 55 | robj_reduced_dims = None 56 | robj_alt_exps = None 57 | col_attrs = list( 58 | _dispatcher(robject["attributes"]["int_colData"]["attributes"]["listData"]["attributes"]["names"], **kwargs) 59 | ) 60 | 61 | for idx in range(len(col_attrs)): 62 | idx_col = col_attrs[idx] 63 | idx_value = robject["attributes"]["int_colData"]["attributes"]["listData"]["data"][idx] 64 | 65 | if idx_col == "reducedDims" and idx_value.get("data", None) is not None: 66 | robj_reduced_dims = _dispatcher(idx_value, **kwargs) 67 | 68 | if idx_col == "altExps": 69 | alt_names = list(_dispatcher(idx_value["attributes"]["listData"]["attributes"]["names"], **kwargs)) 70 | robj_alt_exps = {} 71 | for idx, altn in enumerate(alt_names): 72 | robj_alt_exps[altn] = _dispatcher(idx_value["attributes"]["listData"]["data"][idx], **kwargs)["se"] 73 | 74 | # ignore colpairs for now, does anyone even use this ? 75 | # if col == "colPairs": 76 | 77 | from singlecellexperiment import SingleCellExperiment 78 | 79 | return SingleCellExperiment( 80 | assays=_rse.assays, 81 | row_data=_rse.row_data, 82 | column_data=_rse.column_data, 83 | row_ranges=_rse.row_ranges, 84 | alternative_experiments=robj_alt_exps, 85 | reduced_dims=robj_reduced_dims, 86 | ) 87 | -------------------------------------------------------------------------------- /src/rds2py/read_se.py: -------------------------------------------------------------------------------- 1 | """Functions for parsing Bioconductor `SummarizedExperiment` objects. 2 | 3 | This module provides parsers for converting Bioconductor's `SummarizedExperiment` 4 | objects into their Python equivalents. 5 | """ 6 | 7 | from .generics import _dispatcher 8 | from .rdsutils import get_class 9 | from .read_matrix import MatrixWrapper 10 | 11 | __author__ = "jkanche" 12 | __copyright__ = "jkanche" 13 | __license__ = "MIT" 14 | 15 | 16 | def _sanitize_empty_frame(frame, nrows): 17 | if frame.shape == (0, 0): 18 | from biocframe import BiocFrame 19 | 20 | return BiocFrame(number_of_rows=nrows) 21 | 22 | return frame 23 | 24 | 25 | def _sanitize_assays(assays): 26 | res = {} 27 | for k, v in assays.items(): 28 | if isinstance(v, MatrixWrapper): 29 | res[k] = v.matrix 30 | else: 31 | res[k] = v 32 | 33 | return res 34 | 35 | 36 | def read_summarized_experiment(robject: dict, **kwargs): 37 | """Convert an R SummarizedExperiment to Python 38 | :py:class:`~summarizedexperiment.SummarizedExperiment.SummarizedExperiment`. 39 | 40 | Args: 41 | robject: 42 | Dictionary containing parsed SummarizedExperiment data. 43 | 44 | **kwargs: 45 | Additional arguments. 46 | 47 | Returns: 48 | A `SummarizedExperiment` from the R object. 49 | """ 50 | 51 | _cls = get_class(robject) 52 | 53 | if _cls not in ["SummarizedExperiment"]: 54 | raise RuntimeError(f"`robject` does not contain a 'SummarizedExperiment' object, contains `{_cls}`.") 55 | # parse assays names 56 | robj_asys = {} 57 | assay_dims = None 58 | asy_names = list( 59 | _dispatcher( 60 | robject["attributes"]["assays"]["attributes"]["data"]["attributes"]["listData"]["attributes"]["names"], 61 | **kwargs, 62 | ) 63 | ) 64 | for idx, asyname in enumerate(asy_names): 65 | idx_asy = robject["attributes"]["assays"]["attributes"]["data"]["attributes"]["listData"]["data"][idx] 66 | 67 | robj_asys[asyname] = _dispatcher(idx_asy, **kwargs) 68 | if assay_dims is None: 69 | assay_dims = robj_asys[asyname].shape 70 | 71 | # parse coldata 72 | robj_coldata = _sanitize_empty_frame(_dispatcher(robject["attributes"]["colData"], **kwargs), assay_dims[1]) 73 | 74 | # parse rowdata 75 | robj_rowdata = _sanitize_empty_frame(_dispatcher(robject["attributes"]["elementMetadata"], **kwargs), assay_dims[0]) 76 | 77 | from summarizedexperiment import SummarizedExperiment 78 | 79 | return SummarizedExperiment( 80 | assays=_sanitize_assays(robj_asys), 81 | row_data=robj_rowdata, 82 | column_data=robj_coldata, 83 | ) 84 | 85 | 86 | def read_ranged_summarized_experiment(robject: dict, **kwargs): 87 | """Convert an R RangedSummarizedExperiment to its Python equivalent. 88 | 89 | Args: 90 | robject: 91 | Dictionary containing parsed SummarizedExperiment data. 92 | 93 | **kwargs: 94 | Additional arguments. 95 | 96 | Returns: 97 | A Python RangedSummarizedExperiment object. 98 | """ 99 | 100 | _cls = get_class(robject) 101 | 102 | if _cls not in ["RangedSummarizedExperiment"]: 103 | raise RuntimeError(f"`robject` does not contain a 'RangedSummarizedExperiment' object, contains `{_cls}`.") 104 | 105 | robject["class_name"] = "SummarizedExperiment" 106 | _se = _dispatcher(robject, **kwargs) 107 | 108 | # parse rowRanges 109 | row_ranges_data = None 110 | if "rowRanges" in robject["attributes"]: 111 | row_ranges_data = _dispatcher(robject["attributes"]["rowRanges"], **kwargs) 112 | 113 | from summarizedexperiment import RangedSummarizedExperiment 114 | 115 | return RangedSummarizedExperiment( 116 | assays=_se.assays, 117 | row_data=_se.row_data, 118 | column_data=_se.column_data, 119 | row_ranges=row_ranges_data, 120 | ) 121 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | """Dummy conftest.py for rds2py. 2 | 3 | If you don't know what this is for, just leave it empty. 4 | Read more about conftest.py under: 5 | - https://docs.pytest.org/en/stable/fixture.html 6 | - https://docs.pytest.org/en/stable/writing_plugins.html 7 | """ 8 | 9 | # import pytest 10 | -------------------------------------------------------------------------------- /tests/data/atomic_attr.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/atomic_attr.rds -------------------------------------------------------------------------------- /tests/data/atomic_chars.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/atomic_chars.rds -------------------------------------------------------------------------------- /tests/data/atomic_chars_unicode.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/atomic_chars_unicode.rds -------------------------------------------------------------------------------- /tests/data/atomic_complex.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/atomic_complex.rds -------------------------------------------------------------------------------- /tests/data/atomic_double.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/atomic_double.rds -------------------------------------------------------------------------------- /tests/data/atomic_ints.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/atomic_ints.rds -------------------------------------------------------------------------------- /tests/data/atomic_ints_with_names.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/atomic_ints_with_names.rds -------------------------------------------------------------------------------- /tests/data/atomic_logical.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/atomic_logical.rds -------------------------------------------------------------------------------- /tests/data/atomic_logical_wNA.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/atomic_logical_wNA.rds -------------------------------------------------------------------------------- /tests/data/atomic_raw.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/atomic_raw.rds -------------------------------------------------------------------------------- /tests/data/data.frame.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/data.frame.rds -------------------------------------------------------------------------------- /tests/data/example_anndata.h5ad: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/example_anndata.h5ad -------------------------------------------------------------------------------- /tests/data/generate_files.R: -------------------------------------------------------------------------------- 1 | # # pairlist 2 | 3 | # y <- pairlist(runif(10), runif(20), runif(30)) 4 | # saveRDS(y, file="pairlist.rds") 5 | 6 | # y <- pairlist(sample(letters), pairlist(sample(11), runif(12))) 7 | # saveRDS(y, file="pairlist_nested.rds") 8 | 9 | # y <- pairlist(foo=sample(letters), bar=pairlist(whee=sample(11), bum=runif(12))) # with names 10 | # saveRDS(y, file="pairlist_names.rds") 11 | 12 | # y <- pairlist(aaron=sample(letters), bar=list(sample(11), runif(12))) 13 | # attr(y, "foo") <- "bar" 14 | # saveRDS(y, file="pairlist_attr.rds") 15 | 16 | 17 | # altrep 18 | 19 | # scenarios <- 1:15 20 | # saveRDS(y, file="altrep_series.rds") 21 | 22 | # x <- 1:100 23 | # names(x) <- sprintf("GENE_%s", seq_along(x)) 24 | # saveRDS(x, file="altrep_attr.rds") 25 | 26 | # x <- as.character(1:100) 27 | # saveRDS(x, file="altrep_strings_deferred.rds") 28 | 29 | # x <- c(NA_integer_, 1:10, NA_integer_) 30 | # x <- as.character(x) 31 | # saveRDS(x, file="altrep_strings_wNA.rds") 32 | 33 | # x <- as.character(1:100 * 2) 34 | # saveRDS(x, file="altrep_double_deferred.rds") 35 | 36 | # x <- c(NaN, 1:10, Inf, -Inf, NA) 37 | # x <- as.character(x) 38 | # saveRDS(x, file="altrep_double_wNA.rds") 39 | 40 | # atomic 41 | 42 | y <- rpois(112, lambda=8) 43 | saveRDS(y, file="atomic_ints.rds") 44 | 45 | y <- rbinom(55, 1, 0.5) == 0 46 | saveRDS(y, file="atomic_logical.rds") 47 | 48 | y <- rbinom(999, 1, 0.5) == 0 49 | y[sample(length(y), 10)] <- NA 50 | saveRDS(y, file="atomic_logical_wNA.rds") 51 | 52 | y <- rnorm(99) 53 | saveRDS(y, file="atomic_double.rds") 54 | 55 | y <- as.raw(sample(256, 99, replace=TRUE) - 1) 56 | saveRDS(y, file="atomic_raw.rds") 57 | 58 | y <- rnorm(99) + rnorm(99) * 1i 59 | saveRDS(y, file="atomic_complex.rds") 60 | 61 | y <- sample(LETTERS) 62 | saveRDS(y, file="atomic_chars.rds") 63 | 64 | y <- c("α-globin", "😀😀😀", "fußball", "Hervé Pagès") 65 | saveRDS(y, file="atomic_chars_unicode.rds") 66 | 67 | vals <- sample(.Machine$integer.max, 1000) 68 | names(vals) <- sprintf("GENE_%i", seq_along(vals)) 69 | attr(vals, "foo") <- c("BAR", "bar", "Bar") 70 | class(vals) <- "frog" 71 | saveRDS(vals, file="atomic_attr.rds") 72 | 73 | # scalars 74 | 75 | y <- 10 76 | saveRDS(y, file="scalar_int.rds") 77 | 78 | # lists 79 | 80 | y <- list(runif(10), runif(20), runif(30)) 81 | saveRDS(y, file="lists.rds") 82 | 83 | y <- list(sample(letters), list(sample(11), runif(12))) 84 | saveRDS(y, file="lists_nested.rds") 85 | 86 | y <- list(list(2, 6), list(5, c("cat", "dog", "bouse"), list(sample(99), runif(20)))) 87 | saveRDS(y, file="lists_nested_deep.rds") 88 | 89 | df <- data.frame(xxx=runif(19), YYY=sample(letters, 19), ZZZ=rbinom(19, 1, 0.4) == 0) 90 | saveRDS(df, file="lists_df.rds") 91 | 92 | rownames(df) <- paste0("FOO-", LETTERS[1:19]) 93 | saveRDS(df, file="lists_df_rownames.rds") 94 | 95 | # S4 96 | 97 | y <- Matrix::rsparsematrix(100, 10, 0.05) 98 | saveRDS(y, file="s4_matrix.rds") 99 | 100 | rownames(y) <- paste("row", 1:nrow(y), sep="_") 101 | saveRDS(y, file="matrix_with_row_names.rds") 102 | 103 | colnames(y) <- paste("col", 1:ncol(y), sep="_") 104 | saveRDS(y, file="matrix_with_dim_names.rds") 105 | 106 | setClass("FOO", slots=c(bar="integer")) 107 | y <- new("FOO", bar=2L) 108 | saveRDS(y, file="s4_class.rds") 109 | 110 | # GenomicRanges 111 | 112 | gr <- GRanges( 113 | seqnames = Rle(c("chr1", "chr2", "chr1", "chr3"), c(1, 3, 2, 4)), 114 | ranges = IRanges(101:110, end = 111:120, names = head(letters, 10)), 115 | strand = Rle(strand(c("-", "+", "*", "+", "-")), c(1, 2, 2, 3, 2)), 116 | score = 1:10, 117 | GC = seq(1, 0, length=10)) 118 | 119 | saveRDS(gr, file="granges.rds") 120 | 121 | # factors 122 | 123 | f1 <- factor(c("chr1", "chr2", "chr1", "chr3")) 124 | saveRDS(f1, "simple_factors.rds") 125 | 126 | # Rle 127 | x2 <- Rle(LETTERS[c(21:26, 25:26)], 8:1) 128 | saveRDS(x2, "simple_rle.rds") 129 | 130 | 131 | # SummarizedExperiment 132 | 133 | nrows <- 200 134 | ncols <- 6 135 | counts <- matrix(runif(nrows * ncols, 1, 1e4), nrows) 136 | rowRanges <- GRanges(rep(c("chr1", "chr2"), c(50, 150)), 137 | IRanges(floor(runif(200, 1e5, 1e6)), width=100), 138 | strand=sample(c("+", "-"), 200, TRUE), 139 | feature_id=sprintf("ID%03d", 1:200)) 140 | rowd <- DataFrame(seqs = rep(c("chr1", "chr2"), c(50, 150))) 141 | colData <- DataFrame(Treatment=rep(c("ChIP", "Input"), 3), 142 | row.names=LETTERS[1:6]) 143 | 144 | se <- SummarizedExperiment(assays=list(counts=counts), 145 | rowData = rowd, colData=colData) 146 | 147 | rse <- SummarizedExperiment(assays=list(counts=counts), 148 | rowRanges = rowRanges, colData=colData) 149 | saveRDS(se, "sumexpt.rds") 150 | saveRDS(rse, "ranged_se.rds") 151 | 152 | # SingleCell Experiment 153 | 154 | library(scRNAseq) 155 | sce <- ReprocessedAllenData("tophat_counts") 156 | sce_subset <- sce[1:100, 1:100] 157 | saveRDS(sce_subset, "simple_sce.rds") 158 | 159 | # lists 160 | 161 | x <- list(github = "jkanche", fullname=c("Kancherla", "Jayaram"), 162 | collab=list(github = "ltla", fullname=c("Lun", "Aaron"))) 163 | saveRDS(x, "simple_list.rds") 164 | 165 | # frames 166 | dframe <- as.data.frame(lists_df) 167 | saveRDS(dframe, "data.frame.rds") 168 | 169 | # MAE 170 | library(MultiAssayExperiment) 171 | patient.data <- data.frame(sex=c("M", "F", "M", "F"), 172 | age=38:41, 173 | row.names=c("Jack", "Jill", "Bob", "Barbara")) 174 | 175 | exprss1 <- matrix(rnorm(16), ncol = 4, 176 | dimnames = list(sprintf("ENST00000%i", sample(288754:290000, 4)), 177 | c("Jack", "Jill", "Bob", "Bobby"))) 178 | exprss2 <- matrix(rnorm(12), ncol = 3, 179 | dimnames = list(sprintf("ENST00000%i", sample(288754:290000, 4)), 180 | c("Jack", "Jane", "Bob"))) 181 | doubleExp <- list("methyl 2k" = exprss1, "methyl 3k" = exprss2) 182 | simpleMultiAssay <- MultiAssayExperiment(experiments=doubleExp) 183 | simpleMultiAssay2 <- MultiAssayExperiment(experiments=doubleExp, 184 | colData=patient.data) 185 | saveRDS(simpleMultiAssay2, "simple_mae.rds") 186 | 187 | ## Delayed Arrays 188 | 189 | library(zellkonverter) 190 | h5ad_file <- system.file("extdata", "example_anndata.h5ad", 191 | package="zellkonverter") 192 | h5ls(h5ad_file) 193 | 194 | M <- H5SparseMatrix(h5ad_file, "/obsp/connectivities") 195 | saveRDS(M, "h5sparse.rds") 196 | -------------------------------------------------------------------------------- /tests/data/granges.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/granges.rds -------------------------------------------------------------------------------- /tests/data/grangeslist.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/grangeslist.rds -------------------------------------------------------------------------------- /tests/data/h5sparse.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/h5sparse.rds -------------------------------------------------------------------------------- /tests/data/lists.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/lists.rds -------------------------------------------------------------------------------- /tests/data/lists_df.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/lists_df.rds -------------------------------------------------------------------------------- /tests/data/lists_df_rownames.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/lists_df_rownames.rds -------------------------------------------------------------------------------- /tests/data/lists_nested.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/lists_nested.rds -------------------------------------------------------------------------------- /tests/data/lists_nested_deep.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/lists_nested_deep.rds -------------------------------------------------------------------------------- /tests/data/matrix_with_dim_names.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/matrix_with_dim_names.rds -------------------------------------------------------------------------------- /tests/data/matrix_with_row_names.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/matrix_with_row_names.rds -------------------------------------------------------------------------------- /tests/data/numpy_dtype.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/numpy_dtype.rds -------------------------------------------------------------------------------- /tests/data/ranged_se.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/ranged_se.rds -------------------------------------------------------------------------------- /tests/data/s4_class.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/s4_class.rds -------------------------------------------------------------------------------- /tests/data/s4_dense_matrix.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/s4_dense_matrix.rds -------------------------------------------------------------------------------- /tests/data/s4_matrix.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/s4_matrix.rds -------------------------------------------------------------------------------- /tests/data/s4_matrix_dgt.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/s4_matrix_dgt.rds -------------------------------------------------------------------------------- /tests/data/scalar_int.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/scalar_int.rds -------------------------------------------------------------------------------- /tests/data/simple_factors.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/simple_factors.rds -------------------------------------------------------------------------------- /tests/data/simple_list.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/simple_list.rds -------------------------------------------------------------------------------- /tests/data/simple_mae.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/simple_mae.rds -------------------------------------------------------------------------------- /tests/data/simple_rle.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/simple_rle.rds -------------------------------------------------------------------------------- /tests/data/simple_sce.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/simple_sce.rds -------------------------------------------------------------------------------- /tests/data/sumexpt.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BiocPy/rds2py/22ea37cd32202acebc1dd8a28d2a4974f2d4316f/tests/data/sumexpt.rds -------------------------------------------------------------------------------- /tests/test_atomics.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from rds2py import read_rds 4 | 5 | from biocutils import BooleanList, FloatList, IntegerList, StringList 6 | 7 | __author__ = "jkanche" 8 | __copyright__ = "jkanche" 9 | __license__ = "MIT" 10 | 11 | ## With attributes 12 | 13 | 14 | def test_read_atomic_attrs(): 15 | data = read_rds("tests/data/atomic_attr.rds") 16 | 17 | assert data is not None 18 | assert isinstance(data, dict) 19 | assert data["attributes"]["class"]["data"][0] == "frog" 20 | 21 | 22 | ## Booleans 23 | 24 | 25 | def test_read_atomic_logical(): 26 | arr = read_rds("tests/data/atomic_logical.rds") 27 | 28 | assert arr is not None 29 | assert isinstance(arr, BooleanList) 30 | assert len(arr) > 0 31 | 32 | 33 | def test_read_atomic_logical_na(): 34 | arr = read_rds("tests/data/atomic_logical_wNA.rds") 35 | 36 | assert arr is not None 37 | assert isinstance(arr, BooleanList) 38 | assert len(arr) > 0 39 | 40 | 41 | ## Doubles/Floats 42 | 43 | 44 | def test_read_atomic_double(): 45 | obj = read_rds("tests/data/atomic_double.rds") 46 | 47 | assert obj is not None 48 | assert isinstance(obj, FloatList) 49 | assert len(obj) == 99 50 | 51 | 52 | ## Ints 53 | 54 | 55 | def test_read_atomic_ints(): 56 | arr = read_rds("tests/data/atomic_ints.rds") 57 | 58 | assert arr is not None 59 | assert isinstance(arr, IntegerList) 60 | assert len(arr) == 112 61 | assert arr.names is None 62 | 63 | 64 | def test_read_atomic_ints_with_names(): 65 | arr = read_rds("tests/data/atomic_ints_with_names.rds") 66 | 67 | assert arr is not None 68 | assert isinstance(arr, IntegerList) 69 | assert arr.names is not None 70 | assert len(arr) == 112 71 | 72 | 73 | ## Strings 74 | 75 | 76 | def test_read_atomic_chars(): 77 | arr = read_rds("tests/data/atomic_chars.rds") 78 | 79 | assert arr is not None 80 | assert isinstance(arr, StringList) 81 | assert len(arr) == 26 82 | assert arr.names is None 83 | 84 | 85 | def test_read_atomic_chars_unicode(): 86 | arr = read_rds("tests/data/atomic_chars_unicode.rds") 87 | 88 | assert arr is not None 89 | assert isinstance(arr, StringList) 90 | assert len(arr) == 4 91 | assert arr.names is None 92 | 93 | 94 | ## Test scalar values, defaults to a vector 95 | 96 | 97 | def test_read_scalar_float(): 98 | obj = read_rds("tests/data/scalar_int.rds") 99 | 100 | assert obj is not None 101 | assert isinstance(obj, FloatList) 102 | assert len(obj) == 1 103 | assert obj[0] == 10.0 104 | -------------------------------------------------------------------------------- /tests/test_delayedmatrices.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from rds2py import read_rds 4 | from hdf5array import Hdf5CompressedSparseMatrix 5 | 6 | __author__ = "jkanche" 7 | __copyright__ = "jkanche" 8 | __license__ = "MIT" 9 | 10 | @pytest.mark.skip(reason="delayedarray uses full file paths. this should be run locally.") 11 | def test_read_h5sparse(): 12 | array = read_rds("tests/data/h5sparse.rds") 13 | 14 | assert array is not None 15 | assert isinstance(array, Hdf5CompressedSparseMatrix) 16 | assert array.shape == (200, 200) 17 | -------------------------------------------------------------------------------- /tests/test_dict.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from rds2py import read_rds 4 | 5 | __author__ = "jkanche" 6 | __copyright__ = "jkanche" 7 | __license__ = "MIT" 8 | 9 | 10 | def test_read_simple_lists(): 11 | obj = read_rds("tests/data/simple_list.rds") 12 | 13 | assert obj is not None 14 | assert len(obj) > 0 15 | 16 | assert "collab" in obj 17 | assert len(obj["collab"]) > 0 18 | 19 | 20 | def test_read_atomic_lists(): 21 | obj = read_rds("tests/data/lists.rds") 22 | 23 | assert obj is not None 24 | assert len(obj) > 0 25 | 26 | 27 | def test_read_atomic_lists_nested(): 28 | obj = read_rds("tests/data/lists_nested.rds") 29 | 30 | assert obj is not None 31 | assert len(obj) > 0 32 | 33 | 34 | def test_read_atomic_lists_nested_deep(): 35 | obj = read_rds("tests/data/lists_nested_deep.rds") 36 | 37 | assert obj is not None 38 | assert len(obj) > 0 39 | 40 | 41 | def test_read_atomic_lists_df(): 42 | obj = read_rds("tests/data/lists_df.rds") 43 | 44 | assert obj is not None 45 | assert len(obj) > 0 46 | 47 | 48 | def test_read_atomic_lists_nested_deep_rownames(): 49 | obj = read_rds("tests/data/lists_df_rownames.rds") 50 | 51 | assert obj is not None 52 | assert len(obj) > 0 53 | -------------------------------------------------------------------------------- /tests/test_factors.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from rds2py import read_rds 4 | 5 | __author__ = "jkanche" 6 | __copyright__ = "jkanche" 7 | __license__ = "MIT" 8 | 9 | ## With attributes 10 | 11 | 12 | def test_read_simple_factors(): 13 | data = read_rds("tests/data/simple_factors.rds") 14 | 15 | assert data is not None 16 | assert len(data) == 4 17 | -------------------------------------------------------------------------------- /tests/test_frames.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from rds2py import read_rds 4 | from biocframe import BiocFrame 5 | 6 | __author__ = "jkanche" 7 | __copyright__ = "jkanche" 8 | __license__ = "MIT" 9 | 10 | 11 | def test_read_atomic_lists_df(): 12 | frame = read_rds("tests/data/lists_df.rds") 13 | 14 | assert frame is not None 15 | assert isinstance(frame, BiocFrame) 16 | assert len(frame) > 0 17 | 18 | 19 | def test_read_atomic_lists_nested_deep_rownames(): 20 | frame = read_rds("tests/data/lists_df_rownames.rds") 21 | 22 | assert frame is not None 23 | assert isinstance(frame, BiocFrame) 24 | assert len(frame) > 0 25 | -------------------------------------------------------------------------------- /tests/test_granges.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from rds2py import read_rds 4 | 5 | from genomicranges import GenomicRanges, GenomicRangesList 6 | import numpy as np 7 | 8 | __author__ = "jkanche" 9 | __copyright__ = "jkanche" 10 | __license__ = "MIT" 11 | 12 | 13 | def test_granges(): 14 | gr = read_rds("tests/data/granges.rds") 15 | 16 | assert isinstance(gr, GenomicRanges) 17 | assert gr.get_seqnames("list") == [ 18 | "chr1", 19 | "chr2", 20 | "chr2", 21 | "chr2", 22 | "chr1", 23 | "chr1", 24 | "chr3", 25 | "chr3", 26 | "chr3", 27 | "chr3", 28 | ] 29 | assert np.allclose(gr.get_start(), range(101, 111)) 30 | assert len(gr.get_mcols().get_column_names()) == 2 31 | assert gr.get_strand("list") == ["-", "+", "+", "*", "*", "+", "+", "+", "-", "-"] 32 | 33 | 34 | def test_granges_list(): 35 | gr = read_rds("tests/data/grangeslist.rds") 36 | 37 | assert isinstance(gr, GenomicRangesList) 38 | assert len(gr) == 5 39 | -------------------------------------------------------------------------------- /tests/test_mae.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from rds2py import read_rds 4 | 5 | from multiassayexperiment import MultiAssayExperiment 6 | 7 | __author__ = "jkanche" 8 | __copyright__ = "jkanche" 9 | __license__ = "MIT" 10 | 11 | 12 | def test_read_sce(): 13 | data = read_rds("tests/data/simple_mae.rds") 14 | 15 | assert data is not None 16 | assert isinstance(data, MultiAssayExperiment) 17 | assert len(data.get_experiment_names()) == 2 18 | -------------------------------------------------------------------------------- /tests/test_matrices.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from rds2py import read_rds 4 | import numpy as np 5 | from scipy import sparse as sp 6 | 7 | from rds2py.read_matrix import MatrixWrapper 8 | 9 | __author__ = "jkanche" 10 | __copyright__ = "jkanche" 11 | __license__ = "MIT" 12 | 13 | 14 | def test_read_s4_matrix_dgc(): 15 | array = read_rds("tests/data/s4_matrix.rds") 16 | 17 | assert array is not None 18 | assert isinstance(array, sp.spmatrix) 19 | 20 | def test_read_s4_matrix_dgc_with_rownames(): 21 | array = read_rds("tests/data/matrix_with_row_names.rds") 22 | 23 | assert array is not None 24 | assert isinstance(array, MatrixWrapper) 25 | assert len(array.dimnames[0]) == 100 26 | assert array.dimnames[1] is None 27 | 28 | 29 | def test_read_s4_matrix_dgc_with_bothnames(): 30 | array = read_rds("tests/data/matrix_with_dim_names.rds") 31 | 32 | assert array is not None 33 | assert isinstance(array, MatrixWrapper) 34 | assert len(array.dimnames[0]) == 100 35 | assert len(array.dimnames[1]) == 10 36 | 37 | def test_read_s4_matrix_dgt(): 38 | array = read_rds("tests/data/s4_matrix_dgt.rds") 39 | 40 | assert array is not None 41 | assert isinstance(array, sp.spmatrix) 42 | 43 | 44 | def test_read_dense_numpy_dtype(): 45 | array = read_rds("tests/data/numpy_dtype.rds") 46 | 47 | assert array is not None 48 | assert isinstance(array, MatrixWrapper) 49 | assert isinstance(array.matrix, np.ndarray) 50 | assert array.dimnames is not None 51 | assert len(array.dimnames) == len(array.matrix.shape) 52 | -------------------------------------------------------------------------------- /tests/test_rle.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from rds2py import read_rds 4 | 5 | from biocutils import BooleanList, FloatList, IntegerList, StringList 6 | 7 | __author__ = "jkanche" 8 | __copyright__ = "jkanche" 9 | __license__ = "MIT" 10 | 11 | ## With attributes 12 | 13 | 14 | def test_read_simple_rle(): 15 | data = read_rds("tests/data/simple_rle.rds") 16 | 17 | assert data is not None 18 | assert len(data) == 36 19 | -------------------------------------------------------------------------------- /tests/test_s4.py: -------------------------------------------------------------------------------- 1 | # import pytest 2 | 3 | from rds2py.PyRdsReader import PyRdsParser 4 | 5 | # __author__ = "jkanche" 6 | # __copyright__ = "jkanche" 7 | # __license__ = "MIT" 8 | 9 | 10 | def test_read_s4_class(): 11 | parsed_obj = PyRdsParser("tests/data/s4_class.rds") 12 | robject_obj = parsed_obj.parse() 13 | 14 | assert robject_obj is not None 15 | 16 | 17 | def test_read_s4_matrix(): 18 | parsed_obj = PyRdsParser("tests/data/s4_matrix.rds") 19 | robject_obj = parsed_obj.parse() 20 | 21 | assert robject_obj is not None 22 | 23 | 24 | def test_read_s4_matrix_dgt(): 25 | parsed_obj = PyRdsParser("tests/data/s4_matrix_dgt.rds") 26 | robject_obj = parsed_obj.parse() 27 | 28 | assert robject_obj is not None 29 | -------------------------------------------------------------------------------- /tests/test_sce.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from rds2py import read_rds 4 | 5 | from singlecellexperiment import SingleCellExperiment 6 | 7 | __author__ = "jkanche" 8 | __copyright__ = "jkanche" 9 | __license__ = "MIT" 10 | 11 | 12 | def test_read_sce(): 13 | data = read_rds("tests/data/simple_sce.rds") 14 | 15 | assert data is not None 16 | assert isinstance(data, SingleCellExperiment) 17 | assert data.shape == (100, 100) 18 | -------------------------------------------------------------------------------- /tests/test_se.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from rds2py import read_rds 4 | 5 | from summarizedexperiment import SummarizedExperiment, RangedSummarizedExperiment 6 | 7 | __author__ = "jkanche" 8 | __copyright__ = "jkanche" 9 | __license__ = "MIT" 10 | 11 | 12 | def test_read_summ_expt(): 13 | data = read_rds("tests/data/sumexpt.rds") 14 | 15 | assert data is not None 16 | assert isinstance(data, SummarizedExperiment) 17 | assert data.shape == (200, 6) 18 | 19 | 20 | def test_read_ranged_summ_expt(): 21 | data = read_rds("tests/data/ranged_se.rds") 22 | 23 | assert data is not None 24 | assert isinstance(data, RangedSummarizedExperiment) 25 | assert data.shape == (200, 6) 26 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | # Tox configuration file 2 | # Read more under https://tox.wiki/ 3 | # THIS SCRIPT IS SUPPOSED TO BE AN EXAMPLE. MODIFY IT ACCORDING TO YOUR NEEDS! 4 | 5 | [tox] 6 | minversion = 3.24 7 | envlist = default 8 | isolated_build = True 9 | 10 | 11 | [testenv] 12 | description = Invoke pytest to run automated tests 13 | setenv = 14 | TOXINIDIR = {toxinidir} 15 | passenv = 16 | HOME 17 | SETUPTOOLS_* 18 | extras = 19 | testing 20 | commands = 21 | pytest {posargs} 22 | 23 | 24 | # # To run `tox -e lint` you need to make sure you have a 25 | # # `.pre-commit-config.yaml` file. See https://pre-commit.com 26 | # [testenv:lint] 27 | # description = Perform static analysis and style checks 28 | # skip_install = True 29 | # deps = pre-commit 30 | # passenv = 31 | # HOMEPATH 32 | # PROGRAMDATA 33 | # SETUPTOOLS_* 34 | # commands = 35 | # pre-commit run --all-files {posargs:--show-diff-on-failure} 36 | 37 | 38 | [testenv:{build,clean}] 39 | description = 40 | build: Build the package in isolation according to PEP517, see https://github.com/pypa/build 41 | clean: Remove old distribution files and temporary build artifacts (./build and ./dist) 42 | # https://setuptools.pypa.io/en/stable/build_meta.html#how-to-use-it 43 | skip_install = True 44 | changedir = {toxinidir} 45 | deps = 46 | build: build[virtualenv] 47 | passenv = 48 | SETUPTOOLS_* 49 | commands = 50 | clean: python -c 'import shutil; [shutil.rmtree(p, True) for p in ("build", "dist", "docs/_build")]' 51 | clean: python -c 'import pathlib, shutil; [shutil.rmtree(p, True) for p in pathlib.Path("src").glob("*.egg-info")]' 52 | build: python -m build {posargs} 53 | 54 | 55 | [testenv:{docs,doctests,linkcheck}] 56 | description = 57 | docs: Invoke sphinx-build to build the docs 58 | doctests: Invoke sphinx-build to run doctests 59 | linkcheck: Check for broken links in the documentation 60 | passenv = 61 | SETUPTOOLS_* 62 | setenv = 63 | DOCSDIR = {toxinidir}/docs 64 | BUILDDIR = {toxinidir}/docs/_build 65 | docs: BUILD = html 66 | doctests: BUILD = doctest 67 | linkcheck: BUILD = linkcheck 68 | deps = 69 | -r {toxinidir}/docs/requirements.txt 70 | # ^ requirements.txt shared with Read The Docs 71 | commands = 72 | sphinx-build --color -b {env:BUILD} -d "{env:BUILDDIR}/doctrees" "{env:DOCSDIR}" "{env:BUILDDIR}/{env:BUILD}" {posargs} 73 | 74 | 75 | [testenv:publish] 76 | description = 77 | Publish the package you have been developing to a package index server. 78 | By default, it uses testpypi. If you really want to publish your package 79 | to be publicly accessible in PyPI, use the `-- --repository pypi` option. 80 | skip_install = True 81 | changedir = {toxinidir} 82 | passenv = 83 | # See: https://twine.readthedocs.io/en/latest/ 84 | TWINE_USERNAME 85 | TWINE_PASSWORD 86 | TWINE_REPOSITORY 87 | deps = twine 88 | commands = 89 | python -m twine check dist/* 90 | python -m twine upload {posargs:--repository {env:TWINE_REPOSITORY:testpypi}} dist/* 91 | --------------------------------------------------------------------------------