├── .bandit.yml ├── .cookietemple.yml ├── .darglint ├── .editorconfig ├── .flake8 ├── .gitattributes ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ ├── feature_request.md │ └── general_question.md ├── dependabot.yml ├── labels.yml ├── pull_request_template.md ├── release-drafter.yml └── workflows │ ├── build_package.yml │ ├── labeler.yml │ ├── main_master_branch_protection.yml │ ├── publish_docs.yml │ ├── publish_package.yml │ ├── release-drafter.yml │ ├── run_cookietemple_lint.yml │ ├── run_tests.yml │ └── sync_project.yml ├── .gitignore ├── .pre-commit-config.yaml ├── .prettierignore ├── .readthedocs.yml ├── CODE_OF_CONDUCT.rst ├── Dockerfile ├── LICENSE ├── MANIFEST.in ├── Makefile ├── README.md ├── README.rst ├── batchglm ├── __init__.py ├── __main__.py ├── _version.py ├── api │ └── __init__.py ├── log_cfg.py ├── models │ ├── __init__.py │ ├── base_glm │ │ ├── __init__.py │ │ ├── external.py │ │ ├── model.py │ │ └── utils.py │ ├── glm_beta │ │ ├── __init__.py │ │ ├── external.py │ │ ├── model.py │ │ └── utils.py │ ├── glm_nb │ │ ├── __init__.py │ │ ├── external.py │ │ ├── model.py │ │ └── utils.py │ ├── glm_norm │ │ ├── __init__.py │ │ ├── external.py │ │ ├── model.py │ │ └── utils.py │ └── glm_poisson │ │ ├── __init__.py │ │ ├── external.py │ │ ├── model.py │ │ └── utils.py ├── pkg_constants.py ├── py.typed ├── train │ ├── __init__.py │ ├── base │ │ ├── __init__.py │ │ ├── estimator.py │ │ └── model_container.py │ └── numpy │ │ ├── __init__.py │ │ ├── base_glm │ │ ├── __init__.py │ │ ├── estimator.py │ │ ├── external.py │ │ ├── model_container.py │ │ └── training_strategies.py │ │ ├── glm_nb │ │ ├── __init__.py │ │ ├── estimator.py │ │ ├── external.py │ │ └── model_container.py │ │ ├── glm_norm │ │ ├── __init__.py │ │ ├── estimator.py │ │ ├── external.py │ │ ├── model_container.py │ │ └── utils.py │ │ └── glm_poisson │ │ ├── __init__.py │ │ ├── estimator.py │ │ ├── exceptions.py │ │ ├── external.py │ │ └── model_container.py └── utils │ ├── __init__.py │ ├── data.py │ ├── input.py │ ├── linalg.py │ └── plotting.py ├── codecov.yml ├── cookietemple.cfg ├── docs ├── Makefile ├── _static │ ├── css │ │ └── custom.css │ └── custom_cookietemple.css ├── api │ ├── .gitignore │ └── index.rst ├── authors.rst ├── code_of_conduct.rst ├── conf.py ├── contributing.rst ├── index.rst ├── installation.rst ├── make.bat ├── readme.rst ├── reference.rst ├── references.rst ├── requirements.txt ├── tutorials.rst └── usage.rst ├── makefiles ├── Linux.mk └── Windows.mk ├── noxfile.py ├── poetry.lock ├── pyproject.toml ├── requirements.txt ├── setup.cfg ├── setup.py ├── tests ├── __init__.py ├── numpy │ ├── test_accuracy.py │ ├── test_accuracy_extreme_values.py │ └── utils.py ├── run_data_utils_test.py ├── test_main.py └── test_types_dmat.py └── versioneer.py /.bandit.yml: -------------------------------------------------------------------------------- 1 | # (optional) list included tests here: 2 | tests: [] 3 | 4 | # (optional) list skipped tests here: 5 | skips: ["B403", "B404", "B603", "B607"] 6 | -------------------------------------------------------------------------------- /.cookietemple.yml: -------------------------------------------------------------------------------- 1 | cookietemple_version: '1.3.11 # <>' 2 | domain: cli 3 | language: python 4 | project_slug: batchglm 5 | project_slug_no_hyphen: batchglm 6 | template_version: '2.0.2 # <>' 7 | template_handle: cli-python 8 | github_username: theislab 9 | creator_github_username: picciama 10 | is_github_repo: true 11 | is_repo_private: false 12 | is_github_orga: true 13 | github_orga: theislab 14 | full_name: Mario Picciani 15 | email: mario.picciani@tum.de 16 | project_name: batchglm 17 | project_short_description: batchglm. A cookietemple based . 18 | version: 0.7.4 19 | license: BSD 20 | -------------------------------------------------------------------------------- /.darglint: -------------------------------------------------------------------------------- 1 | [darglint] 2 | strictness = short 3 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | # http://editorconfig.org 2 | 3 | root = true 4 | 5 | [*] 6 | indent_style = space 7 | indent_size = 4 8 | trim_trailing_whitespace = true 9 | insert_final_newline = true 10 | charset = utf-8 11 | end_of_line = lf 12 | 13 | [*.bat] 14 | indent_style = tab 15 | end_of_line = crlf 16 | 17 | [LICENSE] 18 | insert_final_newline = false 19 | 20 | [Makefile] 21 | indent_style = tab 22 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | select = B,B9,C,E,F,N,RST,S,W 3 | ignore = C901,E203,F401,RST201,RST301,S101,W503 4 | max-line-length = 120 5 | max-complexity = 10 6 | docstring-convention = all 7 | docstring_style=sphinx 8 | per-file-ignores = 9 | tests/*:S101 10 | versioneer.py:B,B9,C,N,RST202,S404,S603,W605 11 | batchglm/_version.py:B,C,N,S404,S603 12 | batchglm/utils/linalg.py:N803 13 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | batchglm/_version.py export-subst 2 | * text=auto eol=lf 3 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a bug report to help us improve 4 | title: "Bug Summary" 5 | labels: "bug" 6 | assignees: "" 7 | --- 8 | 9 | **Describe the bug** 10 | 11 | 12 | 13 | **To Reproduce** 14 | 15 | Steps to reproduce the behavior: 16 | 17 | 1. ... 18 | 2. ... 19 | 3. ... 20 | 21 | **Expected behavior** 22 | 23 | 24 | 25 | **System [please complete the following information]:** 26 | 27 | - OS: e.g. [Ubuntu 18.04] 28 | - Language Version: [e.g. Python 3.8] 29 | - Virtual environment: [e.g. Conda] 30 | 31 | **Additional context** 32 | 33 | 34 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest a new feature 4 | title: "Feature Request Summary" 5 | labels: "enhancement" 6 | assignees: "" 7 | --- 8 | 9 | **Is your feature request related to a problem? Please describe.** 10 | 11 | 12 | 13 | **Describe the solution you would like** 14 | 15 | 16 | 17 | **Additional context** 18 | 19 | 20 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/general_question.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: General question 3 | about: Ask a question about anything related to this project 4 | title: "Question" 5 | labels: "question" 6 | assignees: "" 7 | --- 8 | 9 | **Question** 10 | 11 | 13 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: github-actions 4 | directory: "/" 5 | schedule: 6 | interval: weekly 7 | open-pull-requests-limit: 3 8 | target-branch: development 9 | labels: 10 | - DEPENDABOT 11 | commit-message: 12 | prefix: "[DEPENDABOT]" 13 | 14 | - package-ecosystem: pip 15 | directory: "/.github/workflows" 16 | schedule: 17 | interval: weekly 18 | open-pull-requests-limit: 3 19 | target-branch: development 20 | labels: 21 | - DEPENDABOT 22 | commit-message: 23 | prefix: "[DEPENDABOT]" 24 | 25 | - package-ecosystem: pip 26 | directory: "/docs" 27 | schedule: 28 | interval: weekly 29 | open-pull-requests-limit: 3 30 | target-branch: development 31 | labels: 32 | - DEPENDABOT 33 | commit-message: 34 | prefix: "[DEPENDABOT]" 35 | 36 | - package-ecosystem: pip 37 | directory: "/" 38 | schedule: 39 | interval: weekly 40 | open-pull-requests-limit: 3 41 | target-branch: development 42 | labels: 43 | - DEPENDABOT 44 | commit-message: 45 | prefix: "[DEPENDABOT]" 46 | -------------------------------------------------------------------------------- /.github/labels.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # Labels names are important as they are used by Release Drafter to decide 3 | # regarding where to record them in changelog or if to skip them. 4 | # 5 | # The repository labels will be automatically configured using this file and 6 | # the GitHub Action https://github.com/marketplace/actions/github-labeler. 7 | - name: breaking 8 | description: Breaking Changes 9 | color: bfd4f2 10 | - name: bug 11 | description: Something isn't working 12 | color: d73a4a 13 | - name: build 14 | description: Build System and Dependencies 15 | color: bfdadc 16 | - name: ci 17 | description: Continuous Integration 18 | color: 4a97d6 19 | - name: dependencies 20 | description: Pull requests that update a dependency file 21 | color: 0366d6 22 | - name: documentation 23 | description: Improvements or additions to documentation 24 | color: 0075ca 25 | - name: duplicate 26 | description: This issue or pull request already exists 27 | color: cfd3d7 28 | - name: enhancement 29 | description: New feature or request 30 | color: a2eeef 31 | - name: github_actions 32 | description: Pull requests that update Github_actions code 33 | color: "000000" 34 | - name: good first issue 35 | description: Good for newcomers 36 | color: 7057ff 37 | - name: help wanted 38 | description: Extra attention is needed 39 | color: 008672 40 | - name: invalid 41 | description: This doesn't seem right 42 | color: e4e669 43 | - name: performance 44 | description: Performance 45 | color: "016175" 46 | - name: python 47 | description: Pull requests that update Python code 48 | color: 2b67c6 49 | - name: question 50 | description: Further information is requested 51 | color: d876e3 52 | - name: refactoring 53 | description: Refactoring 54 | color: ef67c4 55 | - name: removal 56 | description: Removals and Deprecations 57 | color: 9ae7ea 58 | - name: style 59 | description: Style 60 | color: c120e5 61 | - name: testing 62 | description: Testing 63 | color: b1fc6f 64 | - name: wontfix 65 | description: This will not be worked on 66 | color: ffffff 67 | - name: skip-changelog 68 | description: Changes that should be omitted from the release notes 69 | color: ededed 70 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | **PR Checklist** 4 | 5 | 6 | 7 | - [ ] This comment contains a description of changes (with reason) 8 | - [ ] Referenced issue is linked 9 | - [ ] If you've fixed a bug or added code that should be tested, add tests! 10 | - [ ] Documentation in `docs` is updated 11 | 12 | **Description of changes** 13 | 14 | 15 | 16 | **Technical details** 17 | 18 | 19 | 20 | **Additional context** 21 | 22 | 23 | -------------------------------------------------------------------------------- /.github/release-drafter.yml: -------------------------------------------------------------------------------- 1 | name-template: "0.7.4 🌈" # <> 2 | tag-template: 0.7.4 # <> 3 | exclude-labels: 4 | - "skip-changelog" 5 | 6 | categories: 7 | - title: "🚀 Features" 8 | labels: 9 | - feature 10 | - enhancement 11 | - title: "🐛 Bug Fixes" 12 | labels: 13 | - fix 14 | - bugfix 15 | - bug 16 | - title: "🧰 Maintenance" 17 | label: chore 18 | - title: ":package: Dependencies" 19 | labels: 20 | - dependencies 21 | - build 22 | - dependabot 23 | - DEPENDABOT 24 | version-resolver: 25 | major: 26 | labels: 27 | - major 28 | minor: 29 | labels: 30 | - minor 31 | patch: 32 | labels: 33 | - patch 34 | default: patch 35 | autolabeler: 36 | - label: chore 37 | files: 38 | - "*.md" 39 | branch: 40 | - '/docs{0,1}\/.+/' 41 | - label: bug 42 | branch: 43 | - /fix\/.+/ 44 | title: 45 | - /fix/i 46 | - label: enhancement 47 | branch: 48 | - /feature\/.+/ 49 | body: 50 | - "/JIRA-[0-9]{1,4}/" 51 | template: | 52 | ## Changes 53 | 54 | $CHANGES 55 | -------------------------------------------------------------------------------- /.github/workflows/build_package.yml: -------------------------------------------------------------------------------- 1 | name: Build batchglm Package 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | build: 7 | runs-on: ${{ matrix.os }} 8 | if: "!contains(github.event.head_commit.message, '[skip ci]') && !contains(github.event.head_commit.message, '[ci skip]')" 9 | strategy: 10 | matrix: 11 | os: [macos-latest, ubuntu-latest, windows-latest] 12 | python: [3.8, 3.9] 13 | 14 | steps: 15 | - uses: actions/checkout@v2 16 | name: Check out source-code repository 17 | 18 | - name: Setup Python 19 | uses: actions/setup-python@v2 20 | with: 21 | python-version: ${{ matrix.python }} 22 | 23 | - name: Install Poetry 24 | run: | 25 | pip install poetry 26 | poetry --version 27 | 28 | - name: Build package 29 | run: poetry build --ansi 30 | 31 | - name: Install required twine packaging dependencies 32 | run: pip install setuptools wheel twine 33 | 34 | - name: Check twine package 35 | run: twine check dist/* 36 | -------------------------------------------------------------------------------- /.github/workflows/labeler.yml: -------------------------------------------------------------------------------- 1 | name: Labeler 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | - master 8 | 9 | jobs: 10 | labeler: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - name: Check out the repository 14 | uses: actions/checkout@v2.3.3 15 | 16 | - name: Run Labeler 17 | uses: crazy-max/ghaction-github-labeler@v3.1.1 18 | with: 19 | skip-delete: true 20 | -------------------------------------------------------------------------------- /.github/workflows/main_master_branch_protection.yml: -------------------------------------------------------------------------------- 1 | name: PR to master branch from patch/release branch only 2 | 3 | on: 4 | pull_request: 5 | branches: 6 | - master 7 | - main 8 | 9 | jobs: 10 | check_target: 11 | runs-on: ubuntu-latest 12 | name: Check Target branch 13 | steps: 14 | # PRs to the repository master branch are only ok if coming from any patch or release branch 15 | - name: Check PRs 16 | run: | 17 | { [[ $GITHUB_HEAD_REF = *"release"* ]]; } || [[ $GITHUB_HEAD_REF == *"patch"* ]] 18 | 19 | # If the above check failed, post a comment on the PR explaining the failure 20 | # NOTE - this may not work if the PR is coming from a fork, due to limitations in GitHub actions secrets 21 | - name: Post PR comment 22 | if: failure() 23 | uses: mshick/add-pr-comment@v1 24 | with: 25 | message: | 26 | Hi @${{ github.event.pull_request.user.login }}, 27 | 28 | It looks like this pull-request is has been made against the ${{github.event.pull_request.head.repo.full_name}} `master` or `main` branch. 29 | The `master`/`main` branch should always contain code from the latest release. 30 | Because of this, PRs to `master`/`main` are only allowed if they come from any ${{github.event.pull_request.head.repo.full_name}} `release` or `patch` branch. 31 | 32 | You do not need to close this PR, you can change the target branch to `development` by clicking the _"Edit"_ button at the top of this page. 33 | 34 | Thanks again for your contribution! 35 | repo-token: ${{ secrets.GITHUB_TOKEN }} 36 | allow-repeats: false 37 | 38 | check_version: 39 | name: No SNAPSHOT version on master branch 40 | runs-on: ubuntu-latest 41 | steps: 42 | - name: Set up Python 43 | uses: actions/setup-python@v2 44 | with: 45 | python-version: "3.8" 46 | # PRs to the repository master branch are only ok if coming from any patch or release branch 47 | - name: Install mlf-core 48 | run: pip install mlf-core 49 | 50 | - name: Check project version 51 | run: | 52 | PROJECTVERSION=$(mlf-core bump-version --project-version . | tail -n1) 53 | echo $PROJECTVERSION; 54 | if [[ $PROJECTVERSION == *"SNAPSHOT"* ]];then 55 | exit -1 56 | else 57 | exit 0 58 | fi 59 | 60 | # If the above check failed, post a comment on the PR explaining the failure 61 | # NOTE - this may not work if the PR is coming from a fork, due to limitations in GitHub actions secrets 62 | - name: Post PR comment 63 | if: failure() 64 | uses: mshick/add-pr-comment@v1 65 | with: 66 | message: | 67 | Hi @${{ github.event.pull_request.user.login }}, 68 | 69 | It looks like this pull-request is has been made against the ${{github.event.pull_request.head.repo.full_name}} `master`/`main` branch. 70 | A version check determined that you are using a SNAPSHOT version. 71 | The `master`/`main` branch should never have any SNAPSHOT versions, since only fully stable code should be on the `master`/`main` branch. 72 | repo-token: ${{ secrets.GITHUB_TOKEN }} 73 | allow-repeats: false 74 | -------------------------------------------------------------------------------- /.github/workflows/publish_docs.yml: -------------------------------------------------------------------------------- 1 | name: Build Documentation 2 | 3 | on: [push] 4 | 5 | jobs: 6 | build: 7 | runs-on: ubuntu-latest 8 | 9 | steps: 10 | - uses: actions/checkout@v2 11 | name: Check out source-code repository 12 | 13 | - name: Setup Python 14 | uses: actions/setup-python@v1 15 | with: 16 | python-version: 3.8 17 | 18 | - name: Install pip 19 | run: | 20 | python -m pip install --upgrade pip 21 | 22 | - name: Install doc dependencies 23 | run: | 24 | pip install -r docs/requirements.txt 25 | 26 | - name: Build docs 27 | run: | 28 | cd docs 29 | make html 30 | 31 | - name: Deploy 32 | if: ${{ github.ref == 'refs/heads/master' || github.ref == 'refs/heads/main'}} 33 | uses: peaceiris/actions-gh-pages@v3 34 | with: 35 | github_token: ${{ secrets.GITHUB_TOKEN }} 36 | publish_dir: ./docs/_build/html 37 | -------------------------------------------------------------------------------- /.github/workflows/publish_package.yml: -------------------------------------------------------------------------------- 1 | name: Publish batchglm to PyPI 2 | 3 | on: 4 | release: 5 | types: [published] 6 | 7 | jobs: 8 | release: 9 | name: Release 10 | runs-on: ubuntu-latest 11 | steps: 12 | - name: Check out the repository 13 | uses: actions/checkout@v2.3.4 14 | with: 15 | fetch-depth: 2 16 | 17 | - name: Set up Python 18 | uses: actions/setup-python@v2.1.4 19 | with: 20 | python-version: "3.9" 21 | 22 | - name: Install Poetry 23 | run: | 24 | pip install poetry 25 | poetry --version 26 | 27 | - name: Build package 28 | run: | 29 | poetry build --ansi 30 | 31 | - name: Publish package on PyPI 32 | uses: pypa/gh-action-pypi-publish@v1.4.2 33 | with: 34 | # TODO COOKIETEMPLE: Configure your PyPI Token to enable automatic deployment to PyPi on releases 35 | # https://help.github.com/en/actions/configuring-and-managing-workflows/creating-and-storing-encrypted-secrets 36 | user: __token__ 37 | password: ${{ secrets.PYPI_TOKEN }} 38 | -------------------------------------------------------------------------------- /.github/workflows/release-drafter.yml: -------------------------------------------------------------------------------- 1 | name: Release Drafter 2 | on: 3 | push: 4 | branches: 5 | - development 6 | pull_request: 7 | branches: 8 | - development 9 | types: 10 | - opened 11 | - reopened 12 | - synchronize 13 | jobs: 14 | update_release_draft: 15 | runs-on: ubuntu-latest 16 | steps: 17 | - uses: release-drafter/release-drafter@v5 18 | env: 19 | GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}" 20 | -------------------------------------------------------------------------------- /.github/workflows/run_cookietemple_lint.yml: -------------------------------------------------------------------------------- 1 | name: cookietemple lint 2 | 3 | on: [push] 4 | 5 | jobs: 6 | run: 7 | runs-on: ubuntu-latest 8 | 9 | steps: 10 | - uses: actions/checkout@v2 11 | name: Check out source-code repository 12 | 13 | - name: Setup Python 14 | uses: actions/setup-python@v1 15 | with: 16 | python-version: 3.9 17 | 18 | - name: Install cookietemple 19 | run: pip install cookietemple==1.3.11 20 | 21 | - name: Run cookietemple lint 22 | run: cookietemple lint . 23 | -------------------------------------------------------------------------------- /.github/workflows/run_tests.yml: -------------------------------------------------------------------------------- 1 | name: Run batchglm Tests 2 | 3 | on: 4 | - push 5 | - pull_request 6 | 7 | jobs: 8 | tests: 9 | name: ${{ matrix.session }} ${{ matrix.python-version }} / ${{ matrix.os }} 10 | runs-on: ${{ matrix.os }} 11 | strategy: 12 | fail-fast: false 13 | matrix: 14 | include: 15 | - { 16 | python-version: 3.8, 17 | os: ubuntu-latest, 18 | session: "pre-commit", 19 | } 20 | - { 21 | python-version: 3.8, 22 | os: ubuntu-latest, 23 | session: "safety", 24 | } 25 | - { 26 | python-version: 3.8, 27 | os: ubuntu-latest, 28 | session: "mypy", 29 | } 30 | - { 31 | python-version: 3.8, 32 | os: ubuntu-latest, 33 | session: "tests", 34 | } 35 | - { 36 | python-version: 3.8, 37 | os: windows-latest, 38 | session: "tests", 39 | } 40 | - { 41 | python-version: 3.8, 42 | os: macos-latest, 43 | session: "tests", 44 | } 45 | - { 46 | python-version: 3.8, 47 | os: ubuntu-latest, 48 | session: "typeguard", 49 | } 50 | - { 51 | python-version: 3.8, 52 | os: ubuntu-latest, 53 | session: "xdoctest", 54 | } 55 | - { 56 | python-version: 3.8, 57 | os: ubuntu-latest, 58 | session: "docs-build", 59 | } 60 | 61 | env: 62 | NOXSESSION: ${{ matrix.session }} 63 | 64 | steps: 65 | - name: Check out the repository 66 | uses: actions/checkout@v2.3.4 67 | 68 | - name: Set up Python ${{ matrix.python-version }} 69 | uses: actions/setup-python@v2.2.2 70 | with: 71 | python-version: ${{ matrix.python-version }} 72 | 73 | - name: Install Poetry 74 | run: | 75 | pipx install poetry 76 | poetry --version 77 | 78 | - name: Install nox nox-poetry rich 79 | run: | 80 | pipx install nox 81 | pipx inject nox nox-poetry 82 | pipx inject nox rich 83 | nox --version 84 | 85 | - name: Compute pre-commit cache key 86 | if: matrix.session == 'pre-commit' 87 | id: pre-commit-cache 88 | shell: python 89 | run: | 90 | import hashlib 91 | import sys 92 | 93 | python = "py{}.{}".format(*sys.version_info[:2]) 94 | payload = sys.version.encode() + sys.executable.encode() 95 | digest = hashlib.sha256(payload).hexdigest() 96 | result = "${{ runner.os }}-{}-{}-pre-commit".format(python, digest[:8]) 97 | 98 | print("::set-output name=result::{}".format(result)) 99 | 100 | - name: Restore pre-commit cache 101 | uses: actions/cache@v2.1.6 102 | if: matrix.session == 'pre-commit' 103 | with: 104 | path: ~/.cache/pre-commit 105 | key: ${{ steps.pre-commit-cache.outputs.result }}-${{ hashFiles('.pre-commit-config.yaml') }} 106 | restore-keys: | 107 | ${{ steps.pre-commit-cache.outputs.result }}- 108 | 109 | - name: Run Nox 110 | run: nox --force-color --python=${{ matrix.python-version }} 111 | 112 | - name: Upload coverage data 113 | if: always() && matrix.session == 'tests' 114 | uses: "actions/upload-artifact@v2.2.3" 115 | with: 116 | name: coverage-data 117 | path: ".coverage.*" 118 | 119 | - name: Upload documentation 120 | if: matrix.session == 'docs-build' 121 | uses: actions/upload-artifact@v2.2.4 122 | with: 123 | name: docs 124 | path: docs/_build 125 | 126 | coverage: 127 | runs-on: ubuntu-latest 128 | needs: tests 129 | steps: 130 | - name: Check out the repository 131 | uses: actions/checkout@v2.3.4 132 | 133 | - name: Set up Python 3.8 134 | uses: actions/setup-python@v2.2.2 135 | with: 136 | python-version: 3.8 137 | 138 | - name: Install Poetry 139 | run: | 140 | pipx install poetry 141 | poetry --version 142 | 143 | - name: Install nox nox-poetry rich 144 | run: | 145 | pipx install nox 146 | pipx inject nox nox-poetry 147 | pipx inject nox rich 148 | nox --version 149 | 150 | - name: Download coverage data 151 | uses: actions/download-artifact@v2.0.10 152 | with: 153 | name: coverage-data 154 | 155 | - name: Combine coverage data and display human readable report 156 | run: nox --force-color --session=coverage 157 | 158 | - name: Create coverage report 159 | run: nox --force-color --session=coverage -- xml -i 160 | 161 | - name: Upload coverage report 162 | uses: codecov/codecov-action@v2.1.0 163 | -------------------------------------------------------------------------------- /.github/workflows/sync_project.yml: -------------------------------------------------------------------------------- 1 | name: cookietemple sync 2 | 3 | on: 4 | schedule: 5 | - cron: "0 1 * * *" # 1 am UTC 6 | workflow_dispatch: 7 | 8 | jobs: 9 | build: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - name: Setup Python 13 | uses: actions/setup-python@v1 14 | with: 15 | python-version: 3.8 16 | 17 | - name: Install cookietemple 18 | run: pip install cookietemple 19 | 20 | - uses: actions/checkout@v2 21 | with: 22 | fetch-depth: 0 23 | token: "${{ secrets.CT_SYNC_TOKEN }}" 24 | name: Check out source-code repository 25 | 26 | - uses: oleksiyrudenko/gha-git-credentials@v2.1 27 | with: 28 | name: "picciama" 29 | email: "mario.picciani@tum.de" 30 | actor: "picciama" 31 | token: "${{ secrets.CT_SYNC_TOKEN}}" 32 | 33 | - name: Sync project 34 | run: cookietemple sync . ${{ secrets.CT_SYNC_TOKEN }} zethson 35 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | cache 2 | data 3 | config.ini 4 | .metadata 5 | .Rhistory 6 | playground/* 7 | resources/* 8 | **/__pycache__ 9 | **/.DS_Store 10 | #**/*.ipynb 11 | tutorials 12 | 13 | !**/.gitignore 14 | 15 | # Byte-compiled / optimized / DLL files 16 | __pycache__/ 17 | *.py[cod] 18 | *$py.class 19 | 20 | # C extensions 21 | *.so 22 | 23 | # Distribution / packaging 24 | .Python 25 | build/ 26 | develop-eggs/ 27 | dist/ 28 | downloads/ 29 | eggs/ 30 | .eggs/ 31 | lib/ 32 | lib64/ 33 | parts/ 34 | sdist/ 35 | var/ 36 | wheels/ 37 | pip-wheel-metadata/ 38 | share/python-wheels/ 39 | *.egg-info/ 40 | .installed.cfg 41 | *.egg 42 | MANIFEST 43 | 44 | 45 | # PyInstaller 46 | # Usually these files are written by a python script from a template 47 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 48 | *.manifest 49 | *.spec 50 | 51 | # Installer logs 52 | pip-log.txt 53 | pip-delete-this-directory.txt 54 | 55 | # Unit test / coverage reports 56 | htmlcov/ 57 | .tox/ 58 | .nox/ 59 | .coverage 60 | .coverage.* 61 | .cache 62 | nosetests.xml 63 | coverage.xml 64 | *.cover 65 | *.py,cover 66 | .hypothesis/ 67 | .pytest_cache/ 68 | 69 | # Translations 70 | *.mo 71 | *.pot 72 | 73 | # Django stuff: 74 | *.log 75 | local_settings.py 76 | db.sqlite3 77 | db.sqlite3-journal 78 | 79 | # Flask stuff: 80 | instance/ 81 | .webassets-cache 82 | 83 | # Scrapy stuff: 84 | .scrapy 85 | 86 | # Sphinx documentation 87 | docs/_build/ 88 | 89 | # PyBuilder 90 | target/ 91 | 92 | # Jupyter Notebook 93 | .ipynb_checkpoints 94 | 95 | # IPython 96 | profile_default/ 97 | ipython_config.py 98 | 99 | # pyenv 100 | .python-version 101 | 102 | 103 | # pipenv 104 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 105 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 106 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 107 | # install all needed dependencies. 108 | #Pipfile.lock 109 | 110 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 111 | __pypackages__/ 112 | 113 | # Celery stuff 114 | celerybeat-schedule 115 | celerybeat.pid 116 | 117 | # SageMath parsed files 118 | *.sage.py 119 | 120 | # Environments 121 | .env 122 | .venv 123 | env/ 124 | venv/ 125 | ENV/ 126 | env.bak/ 127 | venv.bak/ 128 | 129 | # Spyder project settings 130 | .spyderproject 131 | .spyproject 132 | 133 | # Rope project settings 134 | .ropeproject 135 | 136 | # mkdocs documentation 137 | /site 138 | 139 | # mypy 140 | .mypy_cache/ 141 | .dmypy.json 142 | dmypy.json 143 | .pytype/ 144 | 145 | # Pyre type checker 146 | .pyre/ 147 | 148 | # Jetbrains IDE 149 | .idea/ 150 | 151 | # Coala 152 | *.orig 153 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: local 3 | hooks: 4 | - id: black 5 | name: black 6 | entry: black 7 | language: system 8 | types: [python] 9 | require_serial: true 10 | - id: check-added-large-files 11 | name: Check for added large files 12 | entry: check-added-large-files 13 | language: system 14 | - id: check-toml 15 | name: Check Toml 16 | entry: check-toml 17 | language: system 18 | types: [toml] 19 | - id: check-yaml 20 | name: Check Yaml 21 | entry: check-yaml 22 | language: system 23 | types: [yaml] 24 | - id: end-of-file-fixer 25 | name: Fix End of Files 26 | entry: end-of-file-fixer 27 | language: system 28 | types: [text] 29 | stages: [commit, push, manual] 30 | exclude: docs/ 31 | - id: flake8 32 | name: flake8 33 | entry: flake8 34 | language: system 35 | types: [python] 36 | require_serial: true 37 | - id: trailing-whitespace 38 | name: Trim Trailing Whitespace 39 | entry: trailing-whitespace-fixer 40 | language: system 41 | types: [text] 42 | stages: [commit, push, manual] 43 | - repo: https://github.com/pre-commit/mirrors-prettier 44 | rev: v2.7.1 45 | hooks: 46 | - id: prettier 47 | - repo: https://github.com/pycqa/isort 48 | rev: 5.10.1 49 | hooks: 50 | - id: isort 51 | name: isort (python) 52 | args: ["--profile", "black"] 53 | - id: isort 54 | name: isort (cython) 55 | types: [cython] 56 | - id: isort 57 | name: isort (pyi) 58 | types: [pyi] 59 | -------------------------------------------------------------------------------- /.prettierignore: -------------------------------------------------------------------------------- 1 | .cookietemple.yml 2 | -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | # Read the Docs configuration file 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 3 | 4 | version: 2 5 | 6 | # Build documentation in the docs/ directory with Sphinx 7 | sphinx: 8 | configuration: docs/conf.py 9 | 10 | # Build documentation with MkDocs 11 | #mkdocs: 12 | # configuration: mkdocs.yml 13 | 14 | # Optionally build your docs in additional formats such as PDF and ePub 15 | formats: all 16 | 17 | # Optionally set the version of Python and requirements required to build your docs 18 | python: 19 | version: 3.8 20 | install: 21 | - requirements: docs/requirements.txt 22 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.rst: -------------------------------------------------------------------------------- 1 | Contributor Covenant Code of Conduct 2 | ==================================== 3 | 4 | Our Pledge 5 | ---------- 6 | 7 | In the interest of fostering an open and welcoming environment, we as 8 | contributors and maintainers pledge to making participation in our 9 | project and our community a harassment-free experience for everyone, 10 | regardless of age, body size, disability, ethnicity, gender identity and 11 | expression, level of experience, nationality, personal appearance, race, 12 | religion, or sexual identity and orientation. 13 | 14 | Our Standards 15 | ------------- 16 | 17 | Examples of behavior that contributes to creating a positive environment 18 | include: 19 | 20 | - Using welcoming and inclusive language 21 | - Being respectful of differing viewpoints and experiences 22 | - Gracefully accepting constructive criticism 23 | - Focusing on what is best for the community 24 | - Showing empathy towards other community members 25 | 26 | Examples of unacceptable behavior by participants include: 27 | 28 | - The use of sexualized language or imagery and unwelcome sexual 29 | attention or advances 30 | - Trolling, insulting/derogatory comments, and personal or political 31 | attacks 32 | - Public or private harassment 33 | - Publishing others’ private information, such as a physical or 34 | electronic address, without explicit permission 35 | - Other conduct which could reasonably be considered inappropriate in a 36 | professional setting 37 | 38 | Our Responsibilities 39 | -------------------- 40 | 41 | Project maintainers are responsible for clarifying the standards of 42 | acceptable behavior and are expected to take appropriate and fair 43 | corrective action in response to any instances of unacceptable behavior. 44 | 45 | Project maintainers have the right and responsibility to remove, edit, 46 | or reject comments, commits, code, wiki edits, issues, and other 47 | contributions that are not aligned to this Code of Conduct, or to ban 48 | temporarily or permanently any contributor for other behaviors that they 49 | deem inappropriate, threatening, offensive, or harmful. 50 | 51 | Scope 52 | ----- 53 | 54 | This Code of Conduct applies both within project spaces and in public 55 | spaces when an individual is representing the project or its community. 56 | Examples of representing a project or community include using an 57 | official project e-mail address, posting via an official social media 58 | account, or acting as an appointed representative at an online or 59 | offline event. Representation of a project may be further defined and 60 | clarified by project maintainers. 61 | 62 | Enforcement 63 | ----------- 64 | 65 | Instances of abusive, harassing, or otherwise unacceptable behavior may 66 | be reported by opening an issue. The project team 67 | will review and investigate all complaints, and will respond in a way 68 | that it deems appropriate to the circumstances. The project team is 69 | obligated to maintain confidentiality with regard to the reporter of an 70 | incident. Further details of specific enforcement policies may be posted 71 | separately. 72 | 73 | Project maintainers who do not follow or enforce the Code of Conduct in 74 | good faith may face temporary or permanent repercussions as determined 75 | by other members of the project’s leadership. 76 | 77 | Attribution 78 | ------------------- 79 | 80 | This Code of Conduct is adapted from the Contributor Covenant, version 1.4, 81 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html 82 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.8.1-alpine 2 | 3 | # A few Utilities to able to install C based libraries such as numpy 4 | RUN apk update 5 | RUN apk add make automake gcc g++ git 6 | 7 | RUN pip install batchglm 8 | 9 | CMD batchglm 10 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2018, David S. Fischer, Florian R. Hölzlwimmer. 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | * Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include versioneer.py 2 | include batchglm/_version.py 3 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | ifeq ($(OS),Windows_NT) 2 | include makefiles/Windows.mk 3 | else 4 | include makefiles/Linux.mk 5 | endif 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Fast and scalable fitting of over-determined generalized-linear models (GLMs) 2 | 3 | batchglm was developed in the context of [diffxpy](https://github.com/theislab/diffxpy) to allow fast model fitting for differential expression analysis for single-cell RNA-seq data. However, one can use batchglm or its concepts in other scenarios where over-determined GLMs are encountered. 4 | 5 | ``` 6 | pip install -r requirements.txt 7 | ``` 8 | 9 | To run unit tests: 10 | 11 | ``` 12 | pip install -e . 13 | python -m unittest 14 | ``` 15 | 16 | 37 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | batchglm 2 | =========================== 3 | 4 | |PyPI| |Python Version| |License| |Read the Docs| |Build| |Tests| |Codecov| |pre-commit| |Black| 5 | 6 | .. |PyPI| image:: https://img.shields.io/pypi/v/batchglm.svg 7 | :target: https://pypi.org/project/batchglm/ 8 | :alt: PyPI 9 | .. |Python Version| image:: https://img.shields.io/pypi/pyversions/batchglm 10 | :target: https://pypi.org/project/batchglm 11 | :alt: Python Version 12 | .. |License| image:: https://img.shields.io/github/license/theislab/batchglm 13 | :target: https://opensource.org/licenses/BSD 14 | :alt: License 15 | .. |Read the Docs| image:: https://img.shields.io/readthedocs/batchglm/latest.svg?label=Read%20the%20Docs 16 | :target: https://batchglm.readthedocs.io/ 17 | :alt: Read the documentation at https://batchglm.readthedocs.io/ 18 | .. |Build| image:: https://github.com/theislab/batchglm/workflows/Build%20batchglm%20Package/badge.svg 19 | :target: https://github.com/theislab/batchglm/actions?workflow=Package 20 | :alt: Build Package Status 21 | .. |Tests| image:: https://github.com/theislab/batchglm/workflows/Run%20batchglm%20Tests/badge.svg 22 | :target: https://github.com/theislab/batchglm/actions?workflow=Tests 23 | :alt: Run Tests Status 24 | .. |Codecov| image:: https://codecov.io/gh/theislab/batchglm/branch/master/graph/badge.svg 25 | :target: https://codecov.io/gh/theislab/batchglm 26 | :alt: Codecov 27 | .. |pre-commit| image:: https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white 28 | :target: https://github.com/pre-commit/pre-commit 29 | :alt: pre-commit 30 | .. |Black| image:: https://img.shields.io/badge/code%20style-black-000000.svg 31 | :target: https://github.com/psf/black 32 | :alt: Black 33 | 34 | 35 | Features 36 | -------- 37 | 38 | - Fit many (i.e a batch!) of GLM's all at once using `numpy` (coming soon: `tensorflow2` or `statsmodels`) with a simple API 39 | - Integrates with and provides utilities for working with familiar libraries like `patsy` and `dask`. 40 | 41 | Installation 42 | ------------ 43 | 44 | You can install *batchglm* via pip_ from PyPI_: 45 | 46 | .. code:: console 47 | 48 | $ pip install batchglm 49 | 50 | 51 | Usage 52 | ----- 53 | 54 | Please see the API documentation for details or the jupyter notebook tutorials (TODO: need notebooks - separate docs?) 55 | 56 | 57 | Credits 58 | ------- 59 | 60 | This package was created with cookietemple_ using Cookiecutter_ based on Hypermodern_Python_Cookiecutter_. 61 | 62 | .. _cookietemple: https://cookietemple.com 63 | .. _Cookiecutter: https://github.com/audreyr/cookiecutter 64 | .. _PyPI: https://pypi.org/ 65 | .. _Hypermodern_Python_Cookiecutter: https://github.com/cjolowicz/cookiecutter-hypermodern-python 66 | .. _pip: https://pip.pypa.io/ 67 | .. _Usage: https://batchglm.readthedocs.io/en/latest/usage.html 68 | -------------------------------------------------------------------------------- /batchglm/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from . import models, pkg_constants, train, utils 4 | 5 | # from ._version import get_versions 6 | from .log_cfg import logger, setup_logging, unconfigure_logging 7 | 8 | # __version__ = _version.get_versions()["version"] 9 | # del get_versions 10 | 11 | # we need this for the sparse package, see https://github.com/pydata/sparse/issues/10 12 | os.environ["SPARSE_AUTO_DENSIFY"] = "1" 13 | -------------------------------------------------------------------------------- /batchglm/__main__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Command-line interface.""" 3 | import click 4 | from rich import traceback 5 | 6 | 7 | @click.command() 8 | @click.version_option(version="0.7.4", message=click.style("batchglm Version: 0.7.4")) 9 | def main() -> None: 10 | """batchglm.""" 11 | 12 | 13 | if __name__ == "__main__": 14 | traceback.install() 15 | main(prog_name="batchglm") # pragma: no cover 16 | -------------------------------------------------------------------------------- /batchglm/api/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from .. import models, pkg_constants, train, utils 4 | 5 | # from .._version import get_versions 6 | from ..log_cfg import logger, setup_logging, unconfigure_logging 7 | 8 | # __version__ = get_versions()["version"] 9 | # del get_versions 10 | 11 | # we need this for the sparse package, see https://github.com/pydata/sparse/issues/10 12 | os.environ["SPARSE_AUTO_DENSIFY"] = "1" 13 | -------------------------------------------------------------------------------- /batchglm/log_cfg.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import sys 3 | 4 | logger = logging.getLogger(".".join(__name__.split(".")[:-1])) 5 | 6 | _is_interactive = bool(getattr(sys, "ps1", sys.flags.interactive)) 7 | 8 | 9 | def unconfigure_logging(): 10 | if logger.hasHandlers(): 11 | for handler in logger.handlers: 12 | logger.removeHandler(handler) 13 | 14 | logger.setLevel(logging.NOTSET) 15 | 16 | 17 | def setup_logging(verbosity="WARNING", stream=None, format=logging.BASIC_FORMAT): 18 | unconfigure_logging() 19 | 20 | if isinstance(verbosity, str): 21 | verbosity = getattr(logging, verbosity) 22 | 23 | logger.setLevel(verbosity) 24 | 25 | if stream is not None: 26 | if isinstance(stream, str): 27 | if stream.lower() == "stdout": 28 | stream = sys.stdout 29 | elif stream.lower() == "stderr": 30 | stream = sys.stderr 31 | else: 32 | raise ValueError("Unknown stream %s" % stream) 33 | 34 | handler = logging.StreamHandler(stream) 35 | handler.setFormatter(logging.Formatter(format, None)) 36 | logger.addHandler(handler) 37 | 38 | 39 | # If we are in an interactive environment (like Jupyter), set loglevel to INFO and pipe the output to stdout. 40 | if _is_interactive: 41 | setup_logging(logging.INFO) 42 | else: 43 | setup_logging(logging.WARNING) 44 | -------------------------------------------------------------------------------- /batchglm/models/__init__.py: -------------------------------------------------------------------------------- 1 | from . import base_glm, glm_beta, glm_nb, glm_norm, glm_poisson 2 | -------------------------------------------------------------------------------- /batchglm/models/base_glm/__init__.py: -------------------------------------------------------------------------------- 1 | # from .estimator import _EstimatorGLM 2 | from ...utils.input import InputDataGLM 3 | from .model import ModelGLM 4 | from .utils import closedform_glm_mean, closedform_glm_scale 5 | -------------------------------------------------------------------------------- /batchglm/models/base_glm/external.py: -------------------------------------------------------------------------------- 1 | import batchglm.utils.data as data_utils 2 | from batchglm import pkg_constants 3 | from batchglm.utils.linalg import groupwise_solve_lm 4 | -------------------------------------------------------------------------------- /batchglm/models/base_glm/utils.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import math 3 | from typing import Callable, List, Optional, Tuple, Union 4 | 5 | import dask.array 6 | import numpy as np 7 | import pandas as pd 8 | import patsy 9 | import scipy.sparse 10 | import sparse 11 | 12 | from .external import groupwise_solve_lm 13 | 14 | logger = logging.getLogger("batchglm") 15 | 16 | 17 | def densify(arr): 18 | if isinstance(arr, dask.array.core.Array): 19 | arr = arr.compute() 20 | if isinstance(arr, sparse.COO) or isinstance(arr, scipy.sparse.csr_matrix): 21 | return arr.todense() 22 | else: 23 | return arr 24 | 25 | 26 | def generate_sample_description( 27 | num_observations: int, 28 | num_conditions: int, 29 | num_batches: int, 30 | intercept_scale: bool, 31 | shuffle_assignments: bool, 32 | ) -> Tuple[patsy.DesignMatrix, patsy.DesignMatrix, pd.DataFrame]: 33 | """Build a sample description. 34 | 35 | :param num_observations: Number of observations to simulate. 36 | :param num_conditions: number of conditions; will be repeated like [1,2,3,1,2,3] 37 | :param num_batches: number of conditions; will be repeated like [1,1,2,2,3,3] 38 | :param intercept_scale: If true, returns a single-coefficient design matrix (formula = "~1"). 39 | If false, returns a design matrix identical to the loc model. 40 | :param shuffle_assignments: If true, shuffle the assignments in the xarray. 41 | UNSUPPORTED: Must be removed as it is disfunctional!!! 42 | """ 43 | if num_conditions == 0: 44 | num_conditions = 1 45 | if num_batches == 0: 46 | num_batches = 1 47 | 48 | # condition column 49 | reps_conditions = math.ceil(num_observations / num_conditions) 50 | conditions = np.squeeze(np.tile([np.arange(num_conditions)], reps_conditions)) 51 | conditions = conditions[range(num_observations)].astype(str) 52 | 53 | # batch column 54 | reps_batches = math.ceil(num_observations / num_batches) 55 | batches = np.repeat(range(num_batches), reps_batches) 56 | batches = batches[range(num_observations)].astype(str) 57 | sample_description = pd.DataFrame({"condition": conditions, "batch": batches}) 58 | 59 | if shuffle_assignments: 60 | sample_description = sample_description.isel( 61 | observations=np.random.permutation(sample_description.observations.values) 62 | ) 63 | 64 | sim_design_loc = patsy.dmatrix("~1+condition+batch", sample_description) 65 | 66 | if intercept_scale: 67 | sim_design_scale = patsy.dmatrix("~1", sample_description) 68 | else: 69 | sim_design_scale = sim_design_loc 70 | 71 | return sim_design_loc, sim_design_scale, sample_description 72 | 73 | 74 | def closedform_glm_mean( 75 | x: Union[np.ndarray, scipy.sparse.csr_matrix, dask.array.core.Array], 76 | dmat: Union[np.ndarray, dask.array.core.Array], 77 | constraints: Optional[Union[np.ndarray, dask.array.core.Array]] = None, 78 | size_factors: Optional[np.ndarray] = None, 79 | link_fn: Optional[Callable] = None, 80 | inv_link_fn: Optional[Callable] = None, 81 | ): 82 | r""" 83 | Calculate a closed-form solution for the mean parameters of GLMs. 84 | 85 | :param x: The input data array 86 | :param dmat: some design matrix 87 | :param constraints: tensor (all parameters x dependent parameters) 88 | Tensor that encodes how complete parameter set which includes dependent 89 | parameters arises from indepedent parameters: all = . 90 | This form of constraints is used in vector generalized linear models (VGLMs). 91 | :param size_factors: size factors for X 92 | :param link_fn: linker function for GLM 93 | :param inv_link_fn: inverse linker function for GLM 94 | :return: tuple: (groupwise_means, mu, rmsd) 95 | """ 96 | if size_factors is not None: 97 | x = np.divide(x, size_factors) 98 | 99 | def apply_fun(grouping): 100 | 101 | groupwise_means = np.asarray( 102 | np.vstack([np.mean(densify(x[np.where(grouping == g)[0], :]), axis=0) for g in np.unique(grouping)]) 103 | ) 104 | if link_fn is None: 105 | return groupwise_means 106 | else: 107 | return link_fn(groupwise_means) 108 | 109 | linker_groupwise_means, mu, rmsd, rank, s = groupwise_solve_lm( 110 | dmat=dmat, apply_fun=apply_fun, constraints=constraints 111 | ) 112 | if inv_link_fn is not None: 113 | return inv_link_fn(linker_groupwise_means), mu, rmsd 114 | else: 115 | return linker_groupwise_means, mu, rmsd 116 | 117 | 118 | def closedform_glm_scale( 119 | x: Union[np.ndarray, scipy.sparse.csr_matrix, dask.array.core.Array], 120 | design_scale: Union[np.ndarray, dask.array.core.Array], 121 | constraints: Optional[Union[np.ndarray, dask.array.core.Array]] = None, 122 | size_factors: Optional[np.ndarray] = None, 123 | groupwise_means: Optional[np.ndarray] = None, 124 | link_fn: Optional[Callable] = None, 125 | inv_link_fn: Optional[Callable] = None, 126 | compute_scales_fun: Optional[Callable] = None, 127 | ): 128 | r""" 129 | Calculate a closed-form solution for the scale parameters of GLMs. 130 | 131 | :param x: The sample data 132 | :param design_scale: design matrix for scale 133 | :param constraints: some design constraints 134 | :param size_factors: size factors for X 135 | :param groupwise_means: optional, in case if already computed this can be specified to spare double-calculation 136 | :param compute_scales_fun: TODO 137 | :param inv_link_fn: TODO 138 | :param link_fn: TODO 139 | :return: tuple (groupwise_scales, logphi, rmsd) 140 | """ 141 | if size_factors is not None: 142 | x = x / size_factors 143 | 144 | # to circumvent nonlocal error 145 | provided_groupwise_means = groupwise_means 146 | 147 | def apply_fun(grouping): 148 | # Calculate group-wise means if not supplied. These are required for variance and MME computation. 149 | if provided_groupwise_means is None: 150 | gw_means = np.asarray( 151 | np.vstack([np.mean(densify(x[np.where(grouping == g)[0], :]), axis=0) for g in np.unique(grouping)]) 152 | ) 153 | else: 154 | gw_means = provided_groupwise_means 155 | 156 | # calculated variance via E(x)^2 or directly depending on whether `mu` was specified 157 | if isinstance(x, scipy.sparse.csr_matrix): 158 | expect_xsq = np.asarray( 159 | np.vstack( 160 | [ 161 | np.asarray(np.mean(densify(x[np.where(grouping == g)[0], :]).power(2), axis=0)) 162 | for g in np.unique(grouping) 163 | ] 164 | ) 165 | ) 166 | else: 167 | expect_xsq = np.vstack( 168 | [np.mean(np.square(densify(x[np.where(grouping == g)[0], :])), axis=0) for g in np.unique(grouping)] 169 | ) 170 | expect_x_sq = np.square(gw_means) 171 | variance = expect_xsq - expect_x_sq 172 | 173 | if compute_scales_fun is not None: 174 | groupwise_scales = compute_scales_fun(variance, gw_means) 175 | else: 176 | groupwise_scales = variance 177 | 178 | if link_fn is not None: 179 | return link_fn(groupwise_scales) 180 | else: 181 | return groupwise_scales 182 | 183 | linker_groupwise_scales, scaleparam, rmsd, rank, _ = groupwise_solve_lm( 184 | dmat=design_scale, apply_fun=apply_fun, constraints=constraints 185 | ) 186 | if inv_link_fn is not None: 187 | return inv_link_fn(linker_groupwise_scales), scaleparam, rmsd 188 | else: 189 | return linker_groupwise_scales, scaleparam, rmsd 190 | -------------------------------------------------------------------------------- /batchglm/models/glm_beta/__init__.py: -------------------------------------------------------------------------------- 1 | from .model import Model 2 | -------------------------------------------------------------------------------- /batchglm/models/glm_beta/external.py: -------------------------------------------------------------------------------- 1 | import batchglm.utils.data as data_utils 2 | from batchglm import pkg_constants 3 | from batchglm.models.base_glm import ModelGLM, closedform_glm_mean, closedform_glm_scale 4 | from batchglm.utils.linalg import groupwise_solve_lm 5 | -------------------------------------------------------------------------------- /batchglm/models/glm_beta/model.py: -------------------------------------------------------------------------------- 1 | import abc 2 | from typing import Any, Callable, Dict, Optional, Tuple, Union 3 | 4 | import dask 5 | import numpy as np 6 | 7 | from .external import ModelGLM 8 | 9 | 10 | class Model(ModelGLM, metaclass=abc.ABCMeta): 11 | """ 12 | Generalized Linear Model (GLM) with beta distributed noise, logit link for location and log link for scale. 13 | """ 14 | 15 | def link_loc(self, data) -> Union[np.ndarray, dask.array.core.Array]: 16 | return np.log(1 / (1 / data - 1)) 17 | 18 | def inverse_link_loc(self, data) -> Union[np.ndarray, dask.array.core.Array]: 19 | return 1 / (1 + np.exp(-data)) 20 | 21 | def link_scale(self, data) -> Union[np.ndarray, dask.array.core.Array]: 22 | return np.log(data) 23 | 24 | def inverse_link_scale(self, data) -> Union[np.ndarray, dask.array.core.Array]: 25 | return np.exp(data) 26 | 27 | @property 28 | def eta_loc(self) -> Union[np.ndarray, dask.array.core.Array]: 29 | eta = np.matmul(self.design_loc, self.theta_location_constrained) 30 | assert self.size_factors is None, "size factors not allowed" 31 | return eta 32 | 33 | def eta_loc_j(self, j) -> Union[np.ndarray, dask.array.core.Array]: 34 | # Make sure that dimensionality of sliced array is kept: 35 | if isinstance(j, int) or isinstance(j, np.int32) or isinstance(j, np.int64): 36 | j = [j] 37 | eta = np.matmul(self.design_loc, self.theta_location_constrained[:, j]) 38 | assert self.size_factors is None, "size factors not allowed" 39 | eta = self.np_clip_param(eta, "eta_loc") 40 | return eta 41 | 42 | # Re-parameterizations: 43 | 44 | @property 45 | def mean(self) -> Union[np.ndarray, dask.array.core.Array]: 46 | return self.location 47 | 48 | @property 49 | def samplesize(self) -> Union[np.ndarray, dask.array.core.Array]: 50 | return self.scale 51 | 52 | @property 53 | def p(self) -> Union[np.ndarray, dask.array.core.Array]: 54 | return self.mean * self.samplesize 55 | 56 | @property 57 | def q(self) -> Union[np.ndarray, dask.array.core.Array]: 58 | return (1 - self.mean) * self.samplesize 59 | 60 | # parameter contraints: 61 | 62 | def bounds(self, sf, dmax, dtype) -> Tuple[Dict[str, Any], Dict[str, Any]]: 63 | 64 | zero = np.nextafter(0, np.inf, dtype=dtype) 65 | one = np.nextafter(1, -np.inf, dtype=dtype) 66 | 67 | bounds_min = { 68 | "theta_location": np.log(zero / (1 - zero)) / sf, 69 | "theta_scale": np.log(zero) / sf, 70 | "eta_loc": np.log(zero / (1 - zero)) / sf, 71 | "eta_scale": np.log(zero) / sf, 72 | "mean": np.nextafter(0, np.inf, dtype=dtype), 73 | "samplesize": np.nextafter(0, np.inf, dtype=dtype), 74 | "probs": dtype(0), 75 | "log_probs": np.log(zero), 76 | } 77 | bounds_max = { 78 | "theta_location": np.log(one / (1 - one)) / sf, 79 | "theta_scale": np.nextafter(np.log(dmax), -np.inf, dtype=dtype) / sf, 80 | "eta_loc": np.log(one / (1 - one)) / sf, 81 | "eta_scale": np.nextafter(np.log(dmax), -np.inf, dtype=dtype) / sf, 82 | "mean": one, 83 | "samplesize": np.nextafter(dmax, -np.inf, dtype=dtype) / sf, 84 | "probs": dtype(1), 85 | "log_probs": dtype(0), 86 | } 87 | 88 | return bounds_min, bounds_max 89 | 90 | # simulator: 91 | 92 | @property 93 | def rand_fn_ave(self) -> Optional[Callable]: 94 | return lambda shape: np.random.uniform(0.2, 0.8, shape) 95 | 96 | @property 97 | def rand_fn(self) -> Optional[Callable]: 98 | return None 99 | 100 | @property 101 | def rand_fn_loc(self) -> Optional[Callable]: 102 | return lambda shape: np.random.uniform(0.05, 0.15, shape) 103 | 104 | @property 105 | def rand_fn_scale(self) -> Optional[Callable]: 106 | return lambda shape: np.random.uniform(0.2, 0.5, shape) 107 | 108 | def generate_data(self): 109 | """ 110 | Sample random data based on beta distribution and parameters. 111 | """ 112 | return np.random.beta(a=self.p, b=self.q, size=None) 113 | -------------------------------------------------------------------------------- /batchglm/models/glm_beta/utils.py: -------------------------------------------------------------------------------- 1 | from typing import Union 2 | 3 | import numpy as np 4 | import scipy.sparse 5 | 6 | from .external import closedform_glm_mean, closedform_glm_scale 7 | 8 | 9 | def closedform_beta_glm_logitmean( 10 | x: Union[np.ndarray, scipy.sparse.csr_matrix], 11 | design_loc: np.ndarray, 12 | constraints_loc, 13 | size_factors=None, 14 | link_fn=lambda x: np.log(1 / (1 / x - 1)), 15 | inv_link_fn=lambda x: 1 / (1 + np.exp(-x)), 16 | ): 17 | r""" 18 | Calculates a closed-form solution for the `mean` parameters of beta GLMs. 19 | 20 | :param x: The sample data 21 | :param design_loc: design matrix for location 22 | :param constraints: tensor (all parameters x dependent parameters) 23 | Tensor that encodes how complete parameter set which includes dependent 24 | parameters arises from indepedent parameters: all = . 25 | This form of constraints is used in vector generalized linear models (VGLMs). 26 | :param size_factors: size factors for X 27 | :return: tuple: (groupwise_means, mean, rmsd) 28 | """ 29 | return closedform_glm_mean( 30 | x=x, 31 | dmat=design_loc, 32 | constraints=constraints_loc, 33 | size_factors=size_factors, 34 | link_fn=link_fn, 35 | inv_link_fn=inv_link_fn, 36 | ) 37 | 38 | 39 | def closedform_beta_glm_logsamplesize( 40 | x: Union[np.ndarray, scipy.sparse.csr_matrix], 41 | design_scale: np.ndarray, 42 | constraints=None, 43 | size_factors=None, 44 | groupwise_means=None, 45 | link_fn=np.log, 46 | invlink_fn=np.exp, 47 | ): 48 | r""" 49 | Calculates a closed-form solution for the log-scale parameters of beta GLMs. 50 | 51 | :param x: The sample data 52 | :param design_scale: design matrix for scale 53 | :param constraints: some design constraints 54 | :param size_factors: size factors for X 55 | :param groupwise_means: optional, in case if already computed this can be specified to spare double-calculation 56 | :return: tuple (groupwise_scales, logsd, rmsd) 57 | """ 58 | 59 | def compute_scales_fun(variance, mean): 60 | groupwise_scales = mean * (1 - mean) / variance - 1 61 | return groupwise_scales 62 | 63 | return closedform_glm_scale( 64 | x=x, 65 | design_scale=design_scale, 66 | constraints=constraints, 67 | size_factors=size_factors, 68 | groupwise_means=groupwise_means, 69 | link_fn=link_fn, 70 | inv_link_fn=invlink_fn, 71 | compute_scales_fun=compute_scales_fun, 72 | ) 73 | -------------------------------------------------------------------------------- /batchglm/models/glm_nb/__init__.py: -------------------------------------------------------------------------------- 1 | from .model import Model 2 | -------------------------------------------------------------------------------- /batchglm/models/glm_nb/external.py: -------------------------------------------------------------------------------- 1 | import batchglm.utils.data as data_utils 2 | from batchglm import pkg_constants 3 | from batchglm.models.base_glm import ModelGLM, closedform_glm_mean, closedform_glm_scale 4 | from batchglm.utils.linalg import groupwise_solve_lm 5 | -------------------------------------------------------------------------------- /batchglm/models/glm_nb/model.py: -------------------------------------------------------------------------------- 1 | import abc 2 | from typing import Any, Callable, Dict, Optional, Tuple, Union 3 | 4 | import dask.array 5 | import numpy as np 6 | 7 | from .external import ModelGLM 8 | 9 | 10 | class Model(ModelGLM, metaclass=abc.ABCMeta): 11 | """ 12 | Generalized Linear Model (GLM) with negative binomial noise. 13 | """ 14 | 15 | def link_loc(self, data) -> Union[np.ndarray, dask.array.core.Array]: 16 | return np.log(data) 17 | 18 | def inverse_link_loc(self, data) -> Union[np.ndarray, dask.array.core.Array]: 19 | return np.exp(data) 20 | 21 | def link_scale(self, data) -> Union[np.ndarray, dask.array.core.Array]: 22 | return np.log(data) 23 | 24 | def inverse_link_scale(self, data) -> Union[np.ndarray, dask.array.core.Array]: 25 | return np.exp(data) 26 | 27 | @property 28 | def eta_loc(self) -> Union[np.ndarray, dask.array.core.Array]: 29 | eta = np.matmul(self.design_loc, self.theta_location_constrained) 30 | if self.size_factors is not None: 31 | eta += self.size_factors 32 | eta = self.np_clip_param(eta, "eta_loc") 33 | return eta 34 | 35 | def eta_loc_j(self, j) -> Union[np.ndarray, dask.array.core.Array]: 36 | # Make sure that dimensionality of sliced array is kept: 37 | if isinstance(j, int) or isinstance(j, np.int32) or isinstance(j, np.int64): 38 | j = [j] 39 | eta = np.matmul(self.design_loc, self.theta_location_constrained[:, j]) 40 | if self.size_factors is not None: 41 | eta += self.size_factors 42 | eta = self.np_clip_param(eta, "eta_loc") 43 | return eta 44 | 45 | # Re-parameterizations: 46 | 47 | @property 48 | def mu(self) -> Union[np.ndarray, dask.array.core.Array]: 49 | return self.location 50 | 51 | @property 52 | def phi(self) -> Union[np.ndarray, dask.array.core.Array]: 53 | return self.scale 54 | 55 | # param constraints: 56 | 57 | def bounds(self, sf, dmax, dtype) -> Tuple[Dict[str, Any], Dict[str, Any]]: 58 | 59 | bounds_min = { 60 | "theta_location": np.log(np.nextafter(0, np.inf, dtype=dtype)) / sf, 61 | "theta_scale": np.log(np.nextafter(0, np.inf, dtype=dtype)) / sf, 62 | "eta_loc": np.log(np.nextafter(0, np.inf, dtype=dtype)) / sf, 63 | "eta_scale": np.log(np.nextafter(0, np.inf, dtype=dtype)) / sf, 64 | "loc": np.nextafter(0, np.inf, dtype=dtype), 65 | "scale": np.nextafter(0, np.inf, dtype=dtype), 66 | "likelihood": dtype(0), 67 | "ll": np.log(np.nextafter(0, np.inf, dtype=dtype)), 68 | } 69 | bounds_max = { 70 | "theta_location": np.nextafter(np.log(dmax), -np.inf, dtype=dtype) / sf, 71 | "theta_scale": np.nextafter(np.log(dmax), -np.inf, dtype=dtype) / sf, 72 | "eta_loc": np.nextafter(np.log(dmax), -np.inf, dtype=dtype) / sf, 73 | "eta_scale": np.nextafter(np.log(dmax), -np.inf, dtype=dtype) / sf, 74 | "loc": np.nextafter(dmax, -np.inf, dtype=dtype) / sf, 75 | "scale": np.nextafter(dmax, -np.inf, dtype=dtype) / sf, 76 | "likelihood": dtype(1), 77 | "ll": dtype(0), 78 | } 79 | return bounds_min, bounds_max 80 | 81 | # simulator: 82 | 83 | @property 84 | def rand_fn_ave(self) -> Optional[Callable]: 85 | return lambda shape: np.random.poisson(500, shape) + 1 86 | 87 | @property 88 | def rand_fn(self) -> Optional[Callable]: 89 | return lambda shape: np.abs(np.random.uniform(0.5, 2, shape)) 90 | 91 | @property 92 | def rand_fn_loc(self) -> Optional[Callable]: 93 | return None 94 | 95 | @property 96 | def rand_fn_scale(self) -> Optional[Callable]: 97 | return None 98 | 99 | def generate_data(self) -> np.ndarray: 100 | """ 101 | Sample random data based on negative binomial distribution and parameters. 102 | """ 103 | return np.random.negative_binomial(n=self.phi, p=1 - self.mu / (self.phi + self.mu), size=None) 104 | -------------------------------------------------------------------------------- /batchglm/models/glm_nb/utils.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import Callable, Optional, Tuple, Union 3 | 4 | import dask 5 | import numpy as np 6 | import scipy.sparse 7 | 8 | from .external import closedform_glm_mean, closedform_glm_scale 9 | 10 | logger = logging.getLogger("batchglm") 11 | 12 | 13 | def closedform_nb_glm_logmu( 14 | x: Union[np.ndarray, scipy.sparse.csr_matrix, dask.array.core.Array], 15 | design_loc: Union[np.ndarray, dask.array.core.Array], 16 | constraints_loc: Union[np.ndarray, dask.array.core.Array], 17 | size_factors: Optional[np.ndarray] = None, 18 | link_fn: Callable = np.log, 19 | inv_link_fn: Callable = np.exp, 20 | ): 21 | r""" 22 | Calculates a closed-form solution for the `mu` parameters of negative-binomial GLMs. 23 | 24 | :param x: The sample data 25 | :param design_loc: design matrix for location 26 | :param constraints_loc: tensor (all parameters x dependent parameters) 27 | Tensor that encodes how complete parameter set which includes dependent 28 | parameters arises from indepedent parameters: all = . 29 | This form of constraints is used in vector generalized linear models (VGLMs). 30 | :param size_factors: size factors for X 31 | :return: tuple: (groupwise_means, mu, rmsd) 32 | """ 33 | return closedform_glm_mean( 34 | x=x, 35 | dmat=design_loc, 36 | constraints=constraints_loc, 37 | size_factors=size_factors, 38 | link_fn=link_fn, 39 | inv_link_fn=inv_link_fn, 40 | ) 41 | 42 | 43 | def closedform_nb_glm_logphi( 44 | x: Union[np.ndarray, scipy.sparse.csr_matrix, dask.array.core.Array], 45 | design_scale: Union[np.ndarray, dask.array.core.Array], 46 | constraints: Optional[Union[np.ndarray, dask.array.core.Array]] = None, 47 | size_factors: Optional[np.ndarray] = None, 48 | groupwise_means: Optional[np.ndarray] = None, 49 | link_fn: Callable = np.log, 50 | invlink_fn: Callable = np.exp, 51 | ): 52 | r""" 53 | Calculates a closed-form solution for the log-scale parameters of negative-binomial GLMs. 54 | Based on the Method-of-Moments estimator. 55 | 56 | :param x: The sample data 57 | :param design_scale: design matrix for scale 58 | :param constraints: some design constraints 59 | :param size_factors: size factors for X 60 | :param groupwise_means: optional, in case if already computed this can be specified to spare double-calculation 61 | :return: tuple (groupwise_scales, logphi, rmsd) 62 | """ 63 | 64 | def compute_scales_fun(variance, mean): 65 | denominator = np.fmax(variance - mean, np.sqrt(np.nextafter(0, 1, dtype=variance.dtype))) 66 | groupwise_scales = np.square(mean) / denominator 67 | return groupwise_scales 68 | 69 | return closedform_glm_scale( 70 | x=x, 71 | design_scale=design_scale, 72 | constraints=constraints, 73 | size_factors=size_factors, 74 | groupwise_means=groupwise_means, 75 | link_fn=link_fn, 76 | inv_link_fn=invlink_fn, 77 | compute_scales_fun=compute_scales_fun, 78 | ) 79 | 80 | 81 | def init_par(model, init_location: str, init_scale: str) -> Tuple[np.ndarray, np.ndarray, bool, bool]: 82 | r""" 83 | standard: 84 | Only initialise intercept and keep other coefficients as zero. 85 | 86 | closed-form: 87 | Initialize with Maximum Likelihood / Maximum of Momentum estimators 88 | 89 | Idea: 90 | $$ 91 | \theta &= f(x) \\ 92 | \Rightarrow f^{-1}(\theta) &= x \\ 93 | &= (D \cdot D^{+}) \cdot x \\ 94 | &= D \cdot (D^{+} \cdot x) \\ 95 | &= D \cdot x' = f^{-1}(\theta) 96 | $$ 97 | """ 98 | train_loc = False 99 | 100 | def auto_loc(dmat: Union[np.ndarray, dask.array.core.Array]) -> str: 101 | """ 102 | Checks if dmat is one-hot encoded and returns 'closed_form' if so, else 'standard' 103 | 104 | :param dmat The design matrix to check. 105 | """ 106 | unique_params = np.unique(dmat) 107 | if isinstance(unique_params, dask.array.core.Array): 108 | unique_params = unique_params.compute() 109 | if len(unique_params) == 2 and unique_params[0] == 0.0 and unique_params[1] == 1.0: 110 | return "closed_form" 111 | logger.warning( 112 | ( 113 | "Cannot use 'closed_form' init for loc model: " 114 | "design_loc is not one-hot encoded. Falling back to standard initialization." 115 | ) 116 | ) 117 | return "standard" 118 | 119 | groupwise_means = None 120 | 121 | init_location_str = init_location.lower() 122 | # Chose option if auto was chosen 123 | if init_location_str == "auto": 124 | 125 | init_location_str = auto_loc(model.design_loc) 126 | 127 | if init_location_str == "closed_form": 128 | groupwise_means, init_theta_location, rmsd_a = closedform_nb_glm_logmu( 129 | x=model.x, 130 | design_loc=model.design_loc, 131 | constraints_loc=model.constraints_loc, 132 | size_factors=model.size_factors, 133 | link_fn=lambda mu: np.log(mu + np.nextafter(0, 1, dtype=mu.dtype)), 134 | ) 135 | # train mu, if the closed-form solution is inaccurate 136 | train_loc = not (np.all(np.abs(rmsd_a) < 1e-20) or rmsd_a.size == 0) 137 | if model.size_factors is not None: 138 | if np.any(model.size_factors != 1): 139 | train_loc = True 140 | 141 | elif init_location_str == "standard": 142 | overall_means = np.mean(model.x, axis=0) # directly calculate the mean 143 | init_theta_location = np.zeros([model.num_loc_params, model.num_features]) 144 | init_theta_location[0, :] = np.log(overall_means) 145 | train_loc = True 146 | elif init_location_str == "all_zero": 147 | init_theta_location = np.zeros([model.num_loc_params, model.num_features]) 148 | train_loc = True 149 | else: 150 | raise ValueError("init_location string %s not recognized" % init_location) 151 | 152 | init_scale_str = init_scale.lower() 153 | if init_scale_str == "auto": 154 | init_scale_str = "standard" 155 | 156 | if init_scale_str == "standard": 157 | groupwise_scales, init_scale_intercept, rmsd_b = closedform_nb_glm_logphi( 158 | x=model.x, 159 | design_scale=model.design_scale[:, [0]], 160 | constraints=model.constraints_scale[[0], :][:, [0]], 161 | size_factors=model.size_factors, 162 | groupwise_means=None, 163 | link_fn=lambda r: np.log(r + np.nextafter(0, 1, dtype=r.dtype)), 164 | ) 165 | init_theta_scale = np.zeros([model.num_scale_params, model.num_features]) 166 | init_theta_scale[0, :] = init_scale_intercept 167 | elif init_scale_str == "closed_form": 168 | if not np.array_equal(model.design_loc, model.design_scale): 169 | raise ValueError("Cannot use 'closed_form' init for scale model: design_scale != design_loc.") 170 | if init_location_str is not None and init_location_str != init_scale_str: 171 | raise ValueError( 172 | "Cannot use 'closed_form' init for scale model: init_location != 'closed_form' which is required." 173 | ) 174 | 175 | groupwise_scales, init_theta_scale, rmsd_b = closedform_nb_glm_logphi( 176 | x=model.x, 177 | design_scale=model.design_scale, 178 | constraints=model.constraints_scale, 179 | size_factors=model.size_factors, 180 | groupwise_means=groupwise_means, 181 | link_fn=lambda r: np.log(r), 182 | ) 183 | elif init_scale_str == "all_zero": 184 | init_theta_scale = np.zeros([model.num_scale_params, model.x.shape[1]]) 185 | else: 186 | raise ValueError("init_scale string %s not recognized" % init_scale_str) 187 | 188 | return init_theta_location, init_theta_scale, train_loc, True 189 | -------------------------------------------------------------------------------- /batchglm/models/glm_norm/__init__.py: -------------------------------------------------------------------------------- 1 | from .model import Model 2 | -------------------------------------------------------------------------------- /batchglm/models/glm_norm/external.py: -------------------------------------------------------------------------------- 1 | import batchglm.utils.data as data_utils 2 | from batchglm import pkg_constants 3 | from batchglm.models.base_glm import ModelGLM, closedform_glm_mean, closedform_glm_scale 4 | from batchglm.utils.linalg import groupwise_solve_lm 5 | -------------------------------------------------------------------------------- /batchglm/models/glm_norm/model.py: -------------------------------------------------------------------------------- 1 | import abc 2 | from typing import Any, Callable, Dict, Optional, Tuple, Union 3 | 4 | import dask 5 | import numpy as np 6 | 7 | from .external import ModelGLM 8 | 9 | 10 | class Model(ModelGLM, metaclass=abc.ABCMeta): 11 | 12 | """Generalized Linear Model (GLM) with normal noise.""" 13 | 14 | def link_loc(self, data) -> Union[np.ndarray, dask.array.core.Array]: 15 | return data 16 | 17 | def inverse_link_loc(self, data) -> Union[np.ndarray, dask.array.core.Array]: 18 | return data 19 | 20 | def link_scale(self, data) -> Union[np.ndarray, dask.array.core.Array]: 21 | return np.log(data) 22 | 23 | def inverse_link_scale(self, data) -> Union[np.ndarray, dask.array.core.Array]: 24 | return np.exp(data) 25 | 26 | @property 27 | def eta_loc(self) -> Union[np.ndarray, dask.array.core.Array]: 28 | eta = np.matmul(self.design_loc, self.theta_location_constrained) 29 | if self.size_factors is not None: 30 | eta *= self.size_factors 31 | return eta 32 | 33 | def eta_loc_j(self, j) -> Union[np.ndarray, dask.array.core.Array]: 34 | # Make sure that dimensionality of sliced array is kept: 35 | if isinstance(j, int) or isinstance(j, np.int32) or isinstance(j, np.int64): 36 | j = [j] 37 | eta = np.matmul(self.design_loc, self.theta_location_constrained[:, j]) 38 | if self.size_factors is not None: 39 | eta *= self.size_factors 40 | eta = self.np_clip_param(eta, "eta_loc") 41 | return eta 42 | 43 | # Re-parameterizations: 44 | 45 | @property 46 | def mean(self) -> Union[np.ndarray, dask.array.core.Array]: 47 | return self.location 48 | 49 | @property 50 | def sd(self) -> Union[np.ndarray, dask.array.core.Array]: 51 | return self.scale 52 | 53 | # param constraints: 54 | 55 | def bounds(self, sf, dmax, dtype) -> Tuple[Dict[str, Any], Dict[str, Any]]: 56 | 57 | bounds_min = { 58 | "theta_location": np.nextafter(-dmax, np.inf, dtype=dtype) / sf, 59 | "theta_scale": np.log(np.nextafter(0, np.inf, dtype=dtype)) / sf, 60 | "eta_loc": np.nextafter(-dmax, np.inf, dtype=dtype) / sf, 61 | "eta_scale": np.log(np.nextafter(0, np.inf, dtype=dtype)) / sf, 62 | "mean": np.nextafter(-dmax, np.inf, dtype=dtype) / sf, 63 | "sd": np.nextafter(0, np.inf, dtype=dtype), 64 | "probs": dtype(0), 65 | "log_probs": np.log(np.nextafter(0, np.inf, dtype=dtype)), 66 | } 67 | bounds_max = { 68 | "theta_location": np.nextafter(dmax, -np.inf, dtype=dtype) / sf, 69 | "theta_scale": np.nextafter(np.log(dmax), -np.inf, dtype=dtype) / sf, 70 | "eta_loc": np.nextafter(dmax, -np.inf, dtype=dtype) / sf, 71 | "eta_scale": np.nextafter(np.log(dmax), -np.inf, dtype=dtype) / sf, 72 | "mean": np.nextafter(dmax, -np.inf, dtype=dtype) / sf, 73 | "sd": np.nextafter(dmax, -np.inf, dtype=dtype) / sf, 74 | "probs": dtype(1), 75 | "log_probs": dtype(0), 76 | } 77 | return bounds_min, bounds_max 78 | 79 | # simulator: 80 | 81 | @property 82 | def rand_fn_ave(self) -> Optional[Callable]: 83 | return lambda shape: np.random.uniform(10, 1000, shape) 84 | 85 | @property 86 | def rand_fn(self) -> Optional[Callable]: 87 | return None 88 | 89 | @property 90 | def rand_fn_loc(self) -> Optional[Callable]: 91 | return lambda shape: np.random.uniform(50, 100, shape) 92 | 93 | @property 94 | def rand_fn_scale(self) -> Optional[Callable]: 95 | return lambda shape: np.random.uniform(1.5, 10, shape) 96 | 97 | def generate_data(self): 98 | """ 99 | Sample random data based on normal distribution and parameters. 100 | """ 101 | return np.random.normal(loc=self.mean, scale=self.sd, size=None) 102 | -------------------------------------------------------------------------------- /batchglm/models/glm_norm/utils.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import Tuple, Union 3 | 4 | import dask 5 | import numpy as np 6 | import scipy.sparse 7 | 8 | from .external import closedform_glm_scale 9 | 10 | logger = logging.getLogger("batchglm") 11 | 12 | 13 | def closedform_norm_glm_logsd( 14 | x: Union[np.ndarray, scipy.sparse.csr_matrix, dask.array.core.Array], 15 | design_scale: Union[np.ndarray, dask.array.core.Array], 16 | constraints=None, 17 | size_factors=None, 18 | groupwise_means=None, 19 | link_fn=np.log, 20 | ): 21 | r""" 22 | Calculates a closed-form solution for the log-scale parameters of normal GLMs. 23 | 24 | :param x: The sample data 25 | :param design_scale: design matrix for scale 26 | :param constraints: some design constraints 27 | :param size_factors: size factors for X 28 | :param groupwise_means: optional, in case if already computed this can be specified to spare double-calculation 29 | :return: tuple (groupwise_scales, logsd, rmsd) 30 | """ 31 | 32 | def compute_scales_fun(variance, mean): 33 | groupwise_scales = np.sqrt(variance) 34 | return groupwise_scales 35 | 36 | return closedform_glm_scale( 37 | x=x, 38 | design_scale=design_scale, 39 | constraints=constraints, 40 | size_factors=size_factors, 41 | groupwise_means=groupwise_means, 42 | link_fn=link_fn, 43 | compute_scales_fun=compute_scales_fun, 44 | ) 45 | 46 | 47 | def init_par(model, init_location: str, init_scale: str) -> Tuple[np.ndarray, np.ndarray, bool, bool]: 48 | r""" 49 | standard: 50 | Only initialise intercept and keep other coefficients as zero. 51 | 52 | closed-form: 53 | Initialize with Maximum Likelihood / Maximum of Momentum estimators 54 | 55 | Idea: 56 | $$ 57 | \theta &= f(x) \\ 58 | \Rightarrow f^{-1}(\theta) &= x \\ 59 | &= (D \cdot D^{+}) \cdot x \\ 60 | &= D \cdot (D^{+} \cdot x) \\ 61 | &= D \cdot x' = f^{-1}(\theta) 62 | $$ 63 | """ 64 | groupwise_means = None 65 | 66 | init_location_str = init_location.lower() 67 | # Chose option if auto was chosen 68 | auto_or_closed_form = init_location_str == "auto" or init_location_str == "closed_form" 69 | if auto_or_closed_form or init_location_str == "all_zero": 70 | if auto_or_closed_form: 71 | logger.warning( 72 | ( 73 | "There is no need for closed form location model initialization" 74 | "because it is already closed form - falling back to zeros" 75 | ) 76 | ) 77 | init_theta_location = np.zeros([model.num_loc_params, model.num_features]) 78 | elif init_location_str == "standard": 79 | overall_means = np.mean(model.x, axis=0) # directly calculate the mean 80 | init_theta_location = np.zeros([model.num_loc_params, model.num_features]) 81 | init_theta_location[0, :] = overall_means # identity linked. 82 | else: 83 | raise ValueError("init_location string %s not recognized" % init_location) 84 | 85 | init_scale_str = init_scale.lower() 86 | if init_scale_str == "auto": 87 | init_scale_str = "standard" 88 | 89 | if init_scale_str == "standard": 90 | groupwise_scales, init_scale_intercept, rmsd_b = closedform_norm_glm_logsd( 91 | x=model.x, 92 | design_scale=model.design_scale[:, [0]], 93 | constraints=model.constraints_scale[[0], :][:, [0]], 94 | size_factors=model.size_factors, 95 | groupwise_means=None, 96 | link_fn=lambda r: np.log(r + np.nextafter(0, 1, dtype=r.dtype)), 97 | ) 98 | init_theta_scale = np.zeros([model.num_scale_params, model.num_features]) 99 | init_theta_scale[0, :] = init_scale_intercept 100 | elif init_scale_str == "closed_form": 101 | groupwise_scales, init_theta_scale, rmsd_b = closedform_norm_glm_logsd( 102 | x=model.x, 103 | design_scale=model.design_scale, 104 | constraints=model.constraints_scale, 105 | size_factors=model.size_factors, 106 | groupwise_means=groupwise_means, 107 | ) 108 | elif init_scale_str == "all_zero": 109 | init_theta_scale = np.zeros([model.num_scale_params, model.x.shape[1]]) 110 | else: 111 | raise ValueError("init_scale string %s not recognized" % init_scale_str) 112 | 113 | return init_theta_location, init_theta_scale, True, True 114 | -------------------------------------------------------------------------------- /batchglm/models/glm_poisson/__init__.py: -------------------------------------------------------------------------------- 1 | from .model import Model 2 | -------------------------------------------------------------------------------- /batchglm/models/glm_poisson/external.py: -------------------------------------------------------------------------------- 1 | import batchglm.utils.data as data_utils 2 | from batchglm import pkg_constants 3 | from batchglm.models.base_glm import ModelGLM, closedform_glm_mean, closedform_glm_scale 4 | from batchglm.utils.linalg import groupwise_solve_lm 5 | -------------------------------------------------------------------------------- /batchglm/models/glm_poisson/model.py: -------------------------------------------------------------------------------- 1 | import abc 2 | from typing import Any, Callable, Dict, Optional, Tuple, Union 3 | 4 | import dask.array 5 | import numpy as np 6 | 7 | from .external import ModelGLM 8 | 9 | 10 | class Model(ModelGLM, metaclass=abc.ABCMeta): 11 | """ 12 | Generalized Linear Model (GLM) with Poisson noise. 13 | """ 14 | 15 | def link_loc(self, data) -> Union[np.ndarray, dask.array.core.Array]: 16 | return np.log(data) 17 | 18 | def inverse_link_loc(self, data) -> Union[np.ndarray, dask.array.core.Array]: 19 | return np.exp(data) 20 | 21 | def link_scale(self, data) -> Union[np.ndarray, dask.array.core.Array]: 22 | return np.log(data) 23 | 24 | def inverse_link_scale(self, data) -> Union[np.ndarray, dask.array.core.Array]: 25 | return np.exp(data) 26 | 27 | @property 28 | def eta_loc(self) -> Union[np.ndarray, dask.array.core.Array]: 29 | eta = np.matmul(self.design_loc, self.theta_location_constrained) 30 | if self.size_factors is not None: 31 | eta += self.size_factors 32 | eta = self.np_clip_param(eta, "eta_loc") 33 | return eta 34 | 35 | def eta_loc_j(self, j) -> Union[np.ndarray, dask.array.core.Array]: 36 | # Make sure that dimensionality of sliced array is kept: 37 | if isinstance(j, int) or isinstance(j, np.int32) or isinstance(j, np.int64): 38 | j = [j] 39 | eta = np.matmul(self.design_loc, self.theta_location_constrained[:, j]) 40 | if self.size_factors is not None: 41 | eta += self.size_factors 42 | eta = self.np_clip_param(eta, "eta_loc") 43 | return eta 44 | 45 | # Re-parameterizations: 46 | 47 | @property 48 | def lam(self) -> Union[np.ndarray, dask.array.core.Array]: 49 | return self.location 50 | 51 | # param constraints: 52 | 53 | def bounds(self, sf, dmax, dtype) -> Tuple[Dict[str, Any], Dict[str, Any]]: 54 | 55 | bounds_min = { 56 | "theta_location": np.log(np.nextafter(0, np.inf, dtype=dtype)) / sf, 57 | "eta_loc": np.log(np.nextafter(0, np.inf, dtype=dtype)) / sf, 58 | "loc": np.nextafter(0, np.inf, dtype=dtype), 59 | "scale": np.nextafter(0, np.inf, dtype=dtype), 60 | "likelihood": dtype(0), 61 | "ll": np.log(np.nextafter(0, np.inf, dtype=dtype)), 62 | # Not used and should be removed: https://github.com/theislab/batchglm/issues/148 63 | "theta_scale": np.log(np.nextafter(0, np.inf, dtype=dtype)) / sf, 64 | "eta_scale": np.log(np.nextafter(0, np.inf, dtype=dtype)) / sf, 65 | } 66 | bounds_max = { 67 | "theta_location": np.nextafter(np.log(dmax), -np.inf, dtype=dtype) / sf, 68 | "eta_loc": np.nextafter(np.log(dmax), -np.inf, dtype=dtype) / sf, 69 | "loc": np.nextafter(dmax, -np.inf, dtype=dtype) / sf, 70 | "scale": np.nextafter(dmax, -np.inf, dtype=dtype) / sf, 71 | "likelihood": dtype(1), 72 | "ll": dtype(10000), # poisson models can have large log likelhoods initially 73 | # Not used and should be removed: https://github.com/theislab/batchglm/issues/148 74 | "theta_scale": np.log(dmax) / sf, 75 | "eta_scale": np.log(dmax) / sf, 76 | } 77 | return bounds_min, bounds_max 78 | 79 | # simulator: 80 | 81 | @property 82 | def rand_fn_ave(self) -> Optional[Callable]: 83 | return lambda shape: np.random.poisson(500, shape) + 1 84 | 85 | @property 86 | def rand_fn(self) -> Optional[Callable]: 87 | return lambda shape: np.abs(np.random.uniform(0.5, 2, shape)) 88 | 89 | @property 90 | def rand_fn_loc(self) -> Optional[Callable]: 91 | return None 92 | 93 | @property 94 | def rand_fn_scale(self) -> Optional[Callable]: 95 | return None 96 | 97 | def generate_data(self) -> np.ndarray: 98 | """ 99 | Sample random data based on poisson distribution and parameters. 100 | """ 101 | # see https://github.com/astronomyk/SimCADO/issues/59 for why we cast lam 102 | return np.random.poisson(lam=self.lam) 103 | -------------------------------------------------------------------------------- /batchglm/models/glm_poisson/utils.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import Callable, Optional, Tuple, Union 3 | 4 | import dask 5 | import numpy as np 6 | import scipy.sparse 7 | 8 | from .external import closedform_glm_mean 9 | 10 | logger = logging.getLogger("batchglm") 11 | 12 | 13 | def closedform_poisson_glm_loglam( 14 | x: Union[np.ndarray, scipy.sparse.csr_matrix, dask.array.core.Array], 15 | design_loc: Union[np.ndarray, dask.array.core.Array], 16 | constraints_loc: Union[np.ndarray, dask.array.core.Array], 17 | size_factors: Optional[np.ndarray] = None, 18 | link_fn: Callable = np.log, 19 | inv_link_fn: Callable = np.exp, 20 | ): 21 | r""" 22 | Calculates a closed-form solution for the `lam` parameters of poisson GLMs. 23 | 24 | :param x: The sample data 25 | :param design_loc: design matrix for location 26 | :param constraints_loc: tensor (all parameters x dependent parameters) 27 | Tensor that encodes how complete parameter set which includes dependent 28 | parameters arises from indepedent parameters: all = . 29 | This form of constraints is used in vector generalized linear models (VGLMs). 30 | :param size_factors: size factors for X 31 | :return: tuple: (groupwise_means, mu, rmsd) 32 | """ 33 | return closedform_glm_mean( 34 | x=x, 35 | dmat=design_loc, 36 | constraints=constraints_loc, 37 | size_factors=size_factors, 38 | link_fn=link_fn, 39 | inv_link_fn=inv_link_fn, 40 | ) 41 | 42 | 43 | def init_par(model, init_location: str) -> Tuple[np.ndarray, np.ndarray, bool, bool]: 44 | r""" 45 | standard: 46 | Only initialise intercept and keep other coefficients as zero. 47 | 48 | closed-form: 49 | Initialize with Maximum Likelihood / Maximum of Momentum estimators 50 | 51 | Idea: 52 | $$ 53 | \theta &= f(x) \\ 54 | \Rightarrow f^{-1}(\theta) &= x \\ 55 | &= (D \cdot D^{+}) \cdot x \\ 56 | &= D \cdot (D^{+} \cdot x) \\ 57 | &= D \cdot x' = f^{-1}(\theta) 58 | $$ 59 | """ 60 | train_loc = False 61 | 62 | def auto_loc(dmat: Union[np.ndarray, dask.array.core.Array]) -> str: 63 | """ 64 | Checks if dmat is one-hot encoded and returns 'closed_form' if so, else 'standard' 65 | 66 | :param dmat The design matrix to check. 67 | """ 68 | unique_params = np.unique(dmat) 69 | if isinstance(unique_params, dask.array.core.Array): 70 | unique_params = unique_params.compute() 71 | if len(unique_params) == 2 and unique_params[0] == 0.0 and unique_params[1] == 1.0: 72 | return "closed_form" 73 | logger.warning( 74 | ( 75 | "Cannot use 'closed_form' init for loc model: " 76 | "design_loc is not one-hot encoded. Falling back to standard initialization." 77 | ) 78 | ) 79 | return "standard" 80 | 81 | groupwise_means = None 82 | 83 | init_location_str = init_location.lower() 84 | # Chose option if auto was chosen 85 | if init_location_str == "auto": 86 | 87 | init_location_str = auto_loc(model.design_loc) 88 | 89 | if init_location_str == "closed_form": 90 | groupwise_means, init_theta_location, rmsd_a = closedform_poisson_glm_loglam( 91 | x=model.x, 92 | design_loc=model.design_loc, 93 | constraints_loc=model.constraints_loc, 94 | size_factors=model.size_factors, 95 | link_fn=lambda lam: np.log(lam + np.nextafter(0, 1, dtype=lam.dtype)), 96 | ) 97 | # train mu, if the closed-form solution is inaccurate 98 | train_loc = not (np.all(np.abs(rmsd_a) < 1e-20) or rmsd_a.size == 0) 99 | if model.size_factors is not None: 100 | if np.any(model.size_factors != 1): 101 | train_loc = True 102 | 103 | elif init_location_str == "standard": 104 | overall_means = np.mean(model.x, axis=0) # directly calculate the mean 105 | init_theta_location = np.zeros([model.num_loc_params, model.num_features]) 106 | init_theta_location[0, :] = np.log(overall_means) 107 | train_loc = True 108 | elif init_location_str == "all_zero": 109 | init_theta_location = np.zeros([model.num_loc_params, model.num_features]) 110 | train_loc = True 111 | else: 112 | raise ValueError("init_location string %s not recognized" % init_location) 113 | 114 | # Scale is not used so just return init_theta_location for what would be init_theta_scale 115 | return init_theta_location, init_theta_location, train_loc, True 116 | -------------------------------------------------------------------------------- /batchglm/pkg_constants.py: -------------------------------------------------------------------------------- 1 | import multiprocessing 2 | import os 3 | 4 | ACCURACY_MARGIN_RELATIVE_TO_LIMIT = float(os.environ.get("BATCHGLM_ACCURACY_MARGIN", 2.5)) 5 | FIM_MODE = str(os.environ.get("FIM_MODE", "analytic")) 6 | HESSIAN_MODE = str(os.environ.get("HESSIAN_MODE", "analytic")) 7 | JACOBIAN_MODE = str(os.environ.get("JACOBIAN_MODE", "analytic")) 8 | CHOLESKY_LSTSQS = False 9 | CHOLESKY_LSTSQS_BATCHED = False 10 | EVAL_ON_BATCHED = False 11 | 12 | # Trust region hyper parameters: 13 | TRUST_REGION_RADIUS_INIT = 100.0 14 | TRUST_REGION_ETA0 = 0.0 15 | TRUST_REGION_ETA1 = 0.25 16 | TRUST_REGION_ETA2 = 0.25 17 | TRUST_REGION_T1 = 0.5 # Fast collapse to avoid trailing. 18 | TRUST_REGION_T2 = 1.5 # Allow expansion if not shrinking. 19 | TRUST_REGION_UPPER_BOUND = 1e5 20 | 21 | TRUST_REGIONT_T1_IRLS_GD_TR_SCALE = 1 22 | 23 | # Convergence hyper-parameters: 24 | LLTOL_BY_FEATURE = 1e-10 25 | XTOL_BY_FEATURE_LOC = 1e-8 26 | XTOL_BY_FEATURE_SCALE = 1e-6 27 | GTOL_BY_FEATURE_LOC = 1e-8 28 | GTOL_BY_FEATURE_SCALE = 1e-8 29 | -------------------------------------------------------------------------------- /batchglm/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/theislab/batchglm/b893fd0ce020669ff38583e4ec135b10926093ae/batchglm/py.typed -------------------------------------------------------------------------------- /batchglm/train/__init__.py: -------------------------------------------------------------------------------- 1 | from . import numpy 2 | -------------------------------------------------------------------------------- /batchglm/train/base/__init__.py: -------------------------------------------------------------------------------- 1 | from .estimator import BaseEstimatorGlm 2 | from .model_container import BaseModelContainer 3 | -------------------------------------------------------------------------------- /batchglm/train/base/estimator.py: -------------------------------------------------------------------------------- 1 | import abc 2 | 3 | 4 | class BaseEstimatorGlm(metaclass=abc.ABCMeta): 5 | @abc.abstractmethod 6 | def model_container(self): 7 | pass 8 | 9 | @abc.abstractmethod 10 | def initialize(self): 11 | pass 12 | 13 | @abc.abstractmethod 14 | def train_sequence(self): 15 | pass 16 | 17 | @abc.abstractmethod 18 | def finalize(self): 19 | pass 20 | -------------------------------------------------------------------------------- /batchglm/train/base/model_container.py: -------------------------------------------------------------------------------- 1 | import abc 2 | 3 | from ...models.base_glm import ModelGLM 4 | 5 | 6 | class BaseModelContainer(metaclass=abc.ABCMeta): 7 | @abc.abstractmethod 8 | def error_codes(self): 9 | pass 10 | 11 | @abc.abstractmethod 12 | def niter(self): 13 | pass 14 | 15 | @abc.abstractmethod 16 | def ll(self): 17 | pass 18 | 19 | @abc.abstractmethod 20 | def jac(self): 21 | pass 22 | 23 | @abc.abstractmethod 24 | def hessian(self): 25 | pass 26 | 27 | @abc.abstractmethod 28 | def fisher_inv(self): 29 | pass 30 | 31 | @property 32 | @abc.abstractmethod 33 | def theta_location(self): 34 | pass 35 | 36 | @property 37 | @abc.abstractmethod 38 | def model(self) -> ModelGLM: 39 | pass 40 | 41 | def theta_location_constrained(self): 42 | pass 43 | 44 | def theta_scale_constrained(self): 45 | pass 46 | -------------------------------------------------------------------------------- /batchglm/train/numpy/__init__.py: -------------------------------------------------------------------------------- 1 | from . import glm_nb, glm_poisson 2 | -------------------------------------------------------------------------------- /batchglm/train/numpy/base_glm/__init__.py: -------------------------------------------------------------------------------- 1 | from .estimator import EstimatorGlm 2 | from .model_container import NumpyModelContainer 3 | -------------------------------------------------------------------------------- /batchglm/train/numpy/base_glm/external.py: -------------------------------------------------------------------------------- 1 | from batchglm import pkg_constants 2 | from batchglm.models.base_glm import InputDataGLM, ModelGLM 3 | from batchglm.train.base import BaseEstimatorGlm, BaseModelContainer 4 | from batchglm.utils.data import dask_compute 5 | from batchglm.utils.linalg import groupwise_solve_lm 6 | -------------------------------------------------------------------------------- /batchglm/train/numpy/base_glm/model_container.py: -------------------------------------------------------------------------------- 1 | import abc 2 | from typing import Union 3 | 4 | import dask.array 5 | import numpy as np 6 | 7 | from .external import BaseModelContainer, ModelGLM, dask_compute 8 | 9 | 10 | class NumpyModelContainer(BaseModelContainer): 11 | """ 12 | Build variables to be optimized. 13 | 14 | Attributes 15 | ---------- 16 | theta_location : np.ndarray 17 | Location model parameters 18 | theta_scale : np.ndarray 19 | Scale model parameters 20 | converged : np.ndarray 21 | Whether or not given parameters are converged 22 | params : Union[np.ndarray, dask.array.core.Array] 23 | Model parameters 24 | converged : np.ndarray 25 | Whether or not a parameter has converged 26 | idx_train_loc : np.ndarray 27 | Training indices for location model 28 | idx_train_scale : np.ndarray 29 | Training indices for scale model 30 | npar_location : int 31 | number of location parameters 32 | dtype : str 33 | data type to be used 34 | """ 35 | 36 | params: Union[np.ndarray, dask.array.core.Array] 37 | converged: np.ndarray 38 | npar_location: int 39 | dtype: str 40 | 41 | def __init__( 42 | self, 43 | model: ModelGLM, 44 | init_theta_location: Union[np.ndarray, dask.array.core.Array], 45 | init_theta_scale: Union[np.ndarray, dask.array.core.Array], 46 | chunk_size_genes: int, 47 | dtype: str, 48 | ): 49 | """ 50 | :param init_location: 51 | Initialisation for all parameters of mean model. (mean model size x features) 52 | :param init_scale: 53 | Initialisation for all parameters of dispersion model. (dispersion model size x features) 54 | :param chunk_size_genes: 55 | chunk size for dask 56 | :param dtype: 57 | Precision used in tensorflow. 58 | """ 59 | 60 | self._model = model 61 | init_theta_location_clipped = model.np_clip_param( 62 | np.asarray(init_theta_location, dtype=dtype), "theta_location" 63 | ) 64 | init_theta_scale_clipped = model.np_clip_param(np.asarray(init_theta_scale, dtype=dtype), "theta_scale") 65 | self.params = dask.array.from_array( 66 | np.concatenate( 67 | [ 68 | init_theta_location_clipped, 69 | init_theta_scale_clipped, 70 | ], 71 | axis=0, 72 | ), 73 | chunks=(1000, chunk_size_genes), 74 | ) 75 | self.npar_location = init_theta_location_clipped.shape[0] 76 | 77 | # Properties to follow gene-wise convergence. 78 | self.converged = np.repeat(a=False, repeats=self.params.shape[1]) # Initialise to non-converged. 79 | 80 | self.dtype = dtype 81 | self.idx_train_loc = np.arange(0, init_theta_location.shape[0]) 82 | self.idx_train_scale = np.arange( 83 | init_theta_location.shape[0], init_theta_location.shape[0] + init_theta_scale.shape[0] 84 | ) 85 | 86 | # overriding the location and scale parameter by referencing the getter functions within the properties. 87 | self._model._theta_location_getter = self._theta_location_getter 88 | self._model._theta_scale_getter = self._theta_scale_getter 89 | 90 | # Is this actually used in diffxpy? Why? 91 | @property 92 | def niter(self): 93 | return None 94 | 95 | # Is this actually used in diffxpy? Why? 96 | @property 97 | def error_codes(self): 98 | return np.array() 99 | 100 | @property 101 | def model(self) -> ModelGLM: 102 | return self._model 103 | 104 | @property 105 | def fisher_inv(self) -> np.ndarray: 106 | return self._fisher_inv 107 | 108 | def _theta_location_getter(self) -> dask.array.core.Array: 109 | theta_location = self.params[0 : self.npar_location] 110 | return self.np_clip_param(theta_location, "theta_location") 111 | 112 | def _theta_scale_getter(self) -> dask.array.core.Array: 113 | theta_scale = self.params[self.npar_location :] 114 | return self.np_clip_param(theta_scale, "theta_scale") 115 | 116 | def __getattr__(self, attr: str): 117 | if attr.startswith("__") and attr.endswith("__"): 118 | raise AttributeError() 119 | return self.model.__getattribute__(attr) 120 | 121 | @property 122 | def idx_not_converged(self) -> np.ndarray: 123 | """Find which features are not converged""" 124 | return np.where(np.logical_not(self.converged))[0] 125 | 126 | @property 127 | def theta_location(self) -> dask.array.core.Array: 128 | """Location parameters""" 129 | return self._theta_location_getter() 130 | 131 | @theta_location.setter 132 | def theta_location(self, value: Union[np.ndarray, dask.array.core.Array]): 133 | # Threshold new entry: 134 | value = self.np_clip_param(value, "theta_location") 135 | # Write either new dask array or into numpy array: 136 | if isinstance(self.params, dask.array.core.Array): 137 | temp = self.params.compute() 138 | temp[0 : self.npar_location] = value 139 | self.params = dask.array.from_array(temp, chunks=self.params.chunksize) 140 | else: 141 | self.params[0 : self.npar_location] = value 142 | 143 | @property 144 | def theta_scale(self) -> dask.array.core.Array: 145 | """Scale parameters""" 146 | return self._theta_scale_getter() 147 | 148 | @theta_scale.setter 149 | def theta_scale(self, value: Union[np.ndarray, dask.array.core.Array]): 150 | # Threshold new entry: 151 | value = self.np_clip_param(value, "theta_scale") 152 | # Write either new dask array or into numpy array: 153 | if isinstance(self.params, dask.array.core.Array): 154 | temp = self.params.compute() 155 | temp[self.npar_location :] = value 156 | self.params = dask.array.from_array(temp, chunks=self.params.chunksize) 157 | else: 158 | self.params[self.npar_location :] = value 159 | 160 | @property 161 | def theta_location_constrained(self) -> Union[np.ndarray, dask.array.core.Array]: 162 | """dot product of location constraints with location parameter giving new constrained parameters""" 163 | return np.dot(self.constraints_loc, self.theta_location) 164 | 165 | @property 166 | def theta_scale_constrained(self) -> Union[np.ndarray, dask.array.core.Array]: 167 | """dot product of scale constraints with scale parameter giving new constrained parameters""" 168 | return np.dot(self.constraints_scale, self.theta_scale) 169 | 170 | def theta_scale_j(self, j: Union[int, np.ndarray]) -> dask.array.core.Array: 171 | if isinstance(j, int) or isinstance(j, np.int32) or isinstance(j, np.int64): 172 | j = np.full(1, j) 173 | return self.np_clip_param(self.params[self.npar_location :, j], "theta_scale") 174 | 175 | def theta_scale_j_setter(self, value: Union[np.ndarray, dask.array.core.Array], j: Union[int, np.ndarray]): 176 | """Setter ofr a specific theta_scale value.""" 177 | # Threshold new entry: 178 | value = self.np_clip_param(value, "theta_scale") 179 | # Write either new dask array or into numpy array: 180 | if isinstance(self.params, dask.array.core.Array): 181 | temp = self.params.compute() 182 | temp[self.npar_location :, j] = value 183 | self.params = dask.array.from_array(temp, chunks=self.params.chunksize) 184 | else: 185 | self.params[self.npar_location :, j] = value 186 | 187 | # jacobians 188 | 189 | @abc.abstractmethod 190 | def jac_weight(self) -> Union[np.ndarray, dask.array.core.Array]: 191 | pass 192 | 193 | @abc.abstractmethod 194 | def jac_weight_j(self, j: Union[int, np.ndarray]) -> Union[np.ndarray, dask.array.core.Array]: 195 | pass 196 | 197 | @property 198 | def jac(self) -> Union[np.ndarray, dask.array.core.Array]: 199 | return np.concatenate([self.jac_location, self.jac_scale], axis=-1) 200 | 201 | @property 202 | def jac_location(self) -> Union[np.ndarray, dask.array.core.Array]: 203 | """ 204 | Location jacobian. 205 | :return: (features x inferred param) 206 | """ 207 | w = self.fim_weight_location_location # (observations x features) 208 | ybar = self.ybar # (observations x features) 209 | xh = self.xh_loc # (observations x inferred param) 210 | inner = np.einsum("ob,of->fob", xh, w) 211 | return np.einsum("fob,of->fb", inner, ybar) 212 | 213 | def jac_location_j(self, j: Union[int, np.ndarray]) -> Union[np.ndarray, dask.array.core.Array]: 214 | """ 215 | Location jacobian indexed by j, the dependent variable of interest. 216 | :return: (features x inferred param) 217 | """ 218 | # Make sure that dimensionality of sliced array is kept: 219 | if isinstance(j, int) or isinstance(j, np.int32) or isinstance(j, np.int64): 220 | j = np.full(1, j) 221 | w = self.fim_weight_location_location_j(j=j) # (observations x features) 222 | ybar = self.ybar_j(j=j) # (observations x features) 223 | xh = self.xh_loc # (observations x inferred param) 224 | return np.einsum("fob,of->fb", np.einsum("ob,of->fob", xh, w), ybar) 225 | 226 | @property 227 | def jac_scale(self) -> Union[np.ndarray, dask.array.core.Array]: 228 | """ 229 | 230 | :return: (features x inferred param) 231 | """ 232 | w = self.jac_weight_scale # (observations x features) 233 | xh = self.xh_scale # (observations x inferred param) 234 | return w.transpose() @ xh 235 | 236 | @dask_compute 237 | def jac_scale_j(self, j: Union[int, np.ndarray]) -> Union[np.ndarray, dask.array.core.Array]: 238 | """ 239 | 240 | :return: (features x inferred param) 241 | """ 242 | # Make sure that dimensionality of sliced array is kept: 243 | if isinstance(j, int) or isinstance(j, np.int32) or isinstance(j, np.int64): 244 | j = np.full(1, j) 245 | w = self.jac_weight_scale_j(j=j) # (observations x features) 246 | xh = self.xh_scale # (observations x inferred param) 247 | return w.transpose() @ xh 248 | 249 | @abc.abstractmethod 250 | def jac_weight_scale_j(self, j: Union[int, np.ndarray]) -> Union[np.ndarray, dask.array.core.Array]: 251 | pass 252 | 253 | # hessians 254 | 255 | @property 256 | @abc.abstractmethod 257 | def hessian_weight_location_location(self) -> Union[np.ndarray, dask.array.core.Array]: 258 | pass 259 | 260 | @property 261 | def hessian_location_location(self) -> Union[np.ndarray, dask.array.core.Array]: 262 | """ 263 | :return: (features x inferred param x inferred param) 264 | """ 265 | w = self.hessian_weight_location_location 266 | xh = self.xh_loc 267 | return np.einsum("fob,oc->fbc", np.einsum("ob,of->fob", xh, w), xh) 268 | 269 | @property 270 | @abc.abstractmethod 271 | def hessian_weight_location_scale(self) -> Union[np.ndarray, dask.array.core.Array]: 272 | pass 273 | 274 | @property 275 | def hessian_location_scale(self) -> Union[np.ndarray, dask.array.core.Array]: 276 | """ 277 | 278 | :return: (features x inferred param x inferred param) 279 | """ 280 | w = self.hessian_weight_location_scale 281 | return np.einsum("fob,oc->fbc", np.einsum("ob,of->fob", self.xh_loc, w), self.xh_scale) 282 | 283 | @property 284 | @abc.abstractmethod 285 | def hessian_weight_scale_scale(self) -> Union[np.ndarray, dask.array.core.Array]: 286 | pass 287 | 288 | @property 289 | def hessian_scale_scale(self) -> Union[np.ndarray, dask.array.core.Array]: 290 | """ 291 | 292 | :return: (features x inferred param x inferred param) 293 | """ 294 | w = self.hessian_weight_scale_scale 295 | xh = self.xh_scale 296 | return np.einsum("fob,oc->fbc", np.einsum("ob,of->fob", xh, w), xh) 297 | 298 | @property 299 | def hessian(self) -> Union[np.ndarray, dask.array.core.Array]: 300 | """ 301 | 302 | :return: (features x inferred param x inferred param) 303 | """ 304 | h_aa = self.hessian_location_location 305 | h_bb = self.hessian_scale_scale 306 | h_ab = self.hessian_location_scale 307 | h_ba = np.transpose(h_ab, axes=[0, 2, 1]) 308 | return np.concatenate([np.concatenate([h_aa, h_ab], axis=2), np.concatenate([h_ba, h_bb], axis=2)], axis=1) 309 | 310 | # fim 311 | 312 | @abc.abstractmethod 313 | def fim_weight_location_location_j(self, j: Union[int, np.ndarray]) -> Union[np.ndarray, dask.array.core.Array]: 314 | pass 315 | 316 | @property 317 | def fim_location_location(self) -> Union[np.ndarray, dask.array.core.Array]: 318 | """ 319 | Location-location coefficient block of FIM 320 | 321 | :return: (features x inferred param x inferred param) 322 | """ 323 | w = self.fim_weight_location_location # (observations x features) 324 | # constraints: (observed param x inferred param) 325 | # design: (observations x observed param) 326 | # w: (observations x features) 327 | # fim: (features x inferred param x inferred param) 328 | xh = self.xh_loc 329 | return np.einsum("fob,oc->fbc", np.einsum("ob,of->fob", xh, w), xh) 330 | 331 | @property 332 | @abc.abstractmethod 333 | def fim_location_scale(self) -> np.ndarray: 334 | pass 335 | 336 | @property 337 | def fim_scale_scale(self) -> Union[np.ndarray, dask.array.core.Array]: 338 | pass 339 | 340 | @property 341 | def fim(self) -> Union[np.ndarray, dask.array.core.Array]: 342 | """ 343 | Full FIM 344 | 345 | :return: (features x inferred param x inferred param) 346 | """ 347 | fim_location_location = self.fim_location_location 348 | fim_scale_scale = self.fim_scale_scale 349 | fim_location_scale = self.fim_location_scale 350 | fim_ba = np.transpose(fim_location_scale, axes=[0, 2, 1]) 351 | return np.concatenate( 352 | [ 353 | np.concatenate([fim_location_location, fim_location_scale], axis=2), 354 | np.concatenate([fim_ba, fim_scale_scale], axis=2), 355 | ], 356 | axis=1, 357 | ) 358 | 359 | @abc.abstractmethod 360 | def fim_weight(self) -> Union[np.ndarray, dask.array.core.Array]: 361 | pass 362 | 363 | @property 364 | @abc.abstractmethod 365 | def fim_weight_location_location(self) -> Union[np.ndarray, dask.array.core.Array]: 366 | """ 367 | This is exactly W in (11) and in equation (7) as well and will be used as such in the 368 | calculation of the Jacobian. 369 | """ 370 | pass 371 | 372 | @property 373 | @abc.abstractmethod 374 | def ll(self) -> Union[np.ndarray, dask.array.core.Array]: 375 | pass 376 | 377 | @abc.abstractmethod 378 | def ll_j(self, j: Union[int, np.ndarray]) -> Union[np.ndarray, dask.array.core.Array]: 379 | pass 380 | 381 | @property # type: ignore 382 | @dask_compute 383 | def ll_byfeature(self) -> np.ndarray: 384 | return np.sum(self.ll, axis=0) 385 | 386 | @dask_compute 387 | def ll_byfeature_j(self, j: Union[int, np.ndarray]) -> np.ndarray: 388 | return np.sum(self.ll_j(j=j), axis=0) 389 | 390 | @property 391 | @abc.abstractmethod 392 | def ybar(self) -> Union[np.ndarray, dask.array.core.Array]: 393 | """ 394 | This is Z in equation (8). 395 | """ 396 | pass 397 | 398 | @abc.abstractmethod 399 | def ybar_j(self, j: Union[int, np.ndarray]) -> Union[np.ndarray, dask.array.core.Array]: 400 | """ 401 | This is Z in equation (8) indexed by j i.e the dependent variable of interest. 402 | """ 403 | pass 404 | -------------------------------------------------------------------------------- /batchglm/train/numpy/base_glm/training_strategies.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class TrainingStrategies(Enum): 5 | 6 | AUTO = None 7 | DEFAULT = [ 8 | { 9 | "max_steps": 1000, 10 | "method_scale": "brent", 11 | "update_scale_freq": 5, 12 | "ftol_scale": 1e-6, 13 | "max_iter_scale": 1000, 14 | }, 15 | ] 16 | GD = [ 17 | {"max_steps": 1000, "method_scale": "gd", "update_scale_freq": 5, "ftol_scale": 1e-6, "max_iter_scale": 100}, 18 | ] 19 | -------------------------------------------------------------------------------- /batchglm/train/numpy/glm_nb/__init__.py: -------------------------------------------------------------------------------- 1 | from .estimator import Estimator 2 | from .model_container import ModelContainer 3 | -------------------------------------------------------------------------------- /batchglm/train/numpy/glm_nb/estimator.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from typing import Optional, Tuple, Union 3 | 4 | import numpy as np 5 | 6 | from .external import EstimatorGlm, Model, init_par 7 | from .model_container import ModelContainer 8 | 9 | 10 | class Estimator(EstimatorGlm): 11 | """ 12 | Estimator for Generalized Linear Models (GLMs) with negative binomial noise. 13 | Uses the natural logarithm as linker function. 14 | 15 | Attributes 16 | ---------- 17 | model_vars : ModelVars 18 | model variables 19 | """ 20 | 21 | def __init__( 22 | self, 23 | model: Model, 24 | init_location: str = "AUTO", 25 | init_scale: str = "AUTO", 26 | # batch_size: Optional[Union[Tuple[int, int], int]] = None, 27 | quick_scale: bool = False, 28 | dtype: str = "float64", 29 | ): 30 | """ 31 | Performs initialisation and creates a new estimator. 32 | 33 | :param init_location: (Optional) 34 | Low-level initial values for a. Can be: 35 | 36 | - str: 37 | * "auto": automatically choose best initialization 38 | * "random": initialize with random values 39 | * "standard": initialize intercept with observed mean 40 | * "init_model": initialize with another model (see `ìnit_model` parameter) 41 | * "closed_form": try to initialize with closed form 42 | - np.ndarray: direct initialization of 'a' 43 | :param init_scale: (Optional) 44 | Low-level initial values for b. Can be: 45 | 46 | - str: 47 | * "auto": automatically choose best initialization 48 | * "random": initialize with random values 49 | * "standard": initialize with zeros 50 | * "init_model": initialize with another model (see `ìnit_model` parameter) 51 | * "closed_form": try to initialize with closed form 52 | - np.ndarray: direct initialization of 'b' 53 | :param quick_scale: bool 54 | Whether `scale` will be fitted faster and maybe less accurate. 55 | Useful in scenarios where fitting the exact `scale` is not absolutely necessary. 56 | :param dtype: Numerical precision. 57 | """ 58 | init_theta_location, init_theta_scale, train_loc, train_scale = init_par( 59 | model=model, init_location=init_location, init_scale=init_scale 60 | ) 61 | self._train_loc = train_loc 62 | self._train_scale = train_scale 63 | if quick_scale: 64 | self._train_scale = False 65 | sys.stdout.write("training location model: %s\n" % str(self._train_loc)) 66 | sys.stdout.write("training scale model: %s\n" % str(self._train_scale)) 67 | init_theta_location = init_theta_location.astype(dtype) 68 | init_theta_scale = init_theta_scale.astype(dtype) 69 | 70 | _model_container = ModelContainer( 71 | model=model, 72 | init_theta_location=init_theta_location, 73 | init_theta_scale=init_theta_scale, 74 | chunk_size_genes=model.chunk_size_genes, 75 | dtype=dtype, 76 | ) 77 | super(Estimator, self).__init__(model_container=_model_container, dtype=dtype) 78 | -------------------------------------------------------------------------------- /batchglm/train/numpy/glm_nb/external.py: -------------------------------------------------------------------------------- 1 | import batchglm.utils.data as data_utils 2 | from batchglm import pkg_constants 3 | from batchglm.models.base_glm.utils import closedform_glm_mean, closedform_glm_scale 4 | from batchglm.models.glm_nb.model import Model 5 | from batchglm.models.glm_nb.utils import init_par 6 | 7 | # import necessary base_glm layers 8 | from batchglm.train.numpy.base_glm import EstimatorGlm, NumpyModelContainer 9 | from batchglm.utils.data import dask_compute 10 | from batchglm.utils.linalg import groupwise_solve_lm 11 | -------------------------------------------------------------------------------- /batchglm/train/numpy/glm_nb/model_container.py: -------------------------------------------------------------------------------- 1 | from typing import Callable, Union 2 | 3 | import dask 4 | import numpy as np 5 | import scipy 6 | 7 | from .external import NumpyModelContainer, dask_compute 8 | 9 | 10 | class ModelContainer(NumpyModelContainer): 11 | @property 12 | def fim_weight(self): 13 | raise NotImplementedError("This method is currently unimplemented as it isn't used by any built-in procedures.") 14 | 15 | @property 16 | def fim_weight_location_location(self) -> Union[np.ndarray, dask.array.core.Array]: 17 | """ 18 | Fisher inverse matrix weights 19 | :return: observations x features 20 | """ 21 | return self.location * self.scale / (self.scale + self.location) 22 | 23 | def fim_weight_location_location_j(self, j) -> Union[np.ndarray, dask.array.core.Array]: 24 | """ 25 | Fisher inverse matrix weights at j 26 | :return: observations x features 27 | """ 28 | return self.location_j(j=j) * self.scale_j(j=j) / (self.scale_j(j=j) + self.location_j(j=j)) 29 | 30 | @property 31 | def ybar(self) -> Union[np.ndarray, dask.array.core.Array]: 32 | """ 33 | :return: observations x features 34 | """ 35 | return np.asarray(self.x - self.location) / self.location 36 | 37 | def ybar_j(self, j: Union[int, np.ndarray]) -> Union[np.ndarray, dask.array.core.Array]: 38 | """ 39 | :return: observations x features 40 | """ 41 | # Make sure that dimensionality of sliced array is kept: 42 | if isinstance(j, int) or isinstance(j, np.int32) or isinstance(j, np.int64): 43 | j = np.full(1, j) 44 | if isinstance(self.x, np.ndarray) or isinstance(self.x, dask.array.core.Array): 45 | return (self.x[:, j] - self.location_j(j=j)) / self.location_j(j=j) 46 | else: 47 | return np.asarray(self.x[:, j] - self.location_j(j=j)) / self.location_j(j=j) 48 | 49 | @property 50 | def jac_weight(self): 51 | raise NotImplementedError("This method is currently unimplemented as it isn't used by any built-in procedures.") 52 | 53 | @property 54 | def jac_weight_j(self): 55 | raise NotImplementedError("This method is currently unimplemented as it isn't used by any built-in procedures.") 56 | 57 | @property 58 | def jac_weight_scale(self) -> Union[np.ndarray, dask.array.core.Array]: 59 | """ 60 | Scale model jacobian 61 | :return: observations x features 62 | """ 63 | scale = self.scale 64 | loc = self.location 65 | if isinstance(self.x, scipy.sparse.csr_matrix): 66 | scale_plus_x = np.asarray(scale + self.x) 67 | else: 68 | scale_plus_x = scale + self.x 69 | r_plus_mu = scale + loc 70 | 71 | # Define graphs for individual terms of constant term of hessian: 72 | const1 = scipy.special.digamma(scale_plus_x) - scipy.special.digamma(scale) 73 | const2 = -scale_plus_x / r_plus_mu 74 | const3 = np.log(scale) + np.ones_like(scale) - np.log(r_plus_mu) 75 | return scale * (const1 + const2 + const3) 76 | 77 | def jac_weight_scale_j(self, j: Union[int, np.ndarray]) -> Union[np.ndarray, dask.array.core.Array]: 78 | """ 79 | Scale model jacobian at location j 80 | :param j: Location 81 | :return: observations x features 82 | """ 83 | # Make sure that dimensionality of sliced array is kept: 84 | if isinstance(j, int) or isinstance(j, np.int32) or isinstance(j, np.int64): 85 | j = np.full(1, j) 86 | scale = self.scale_j(j=j) 87 | loc = self.location_j(j=j) 88 | if isinstance(self.x, scipy.sparse.csr_matrix): 89 | scale_plus_x = np.asarray(scale + self.x[:, j]) 90 | else: 91 | scale_plus_x = scale + self.x[:, j] 92 | r_plus_mu = scale + loc 93 | 94 | # Define graphs for individual terms of constant term of hessian: 95 | const1 = scipy.special.digamma(scale_plus_x) - scipy.special.digamma(scale) 96 | const2 = -scale_plus_x / r_plus_mu 97 | const3 = np.log(scale) + np.ones_like(scale) - np.log(r_plus_mu) 98 | return scale * (const1 + const2 + const3) 99 | 100 | @property 101 | def fim_location_scale(self) -> np.ndarray: 102 | """ 103 | Location-scale coefficient block of FIM 104 | 105 | The negative binomial model is not fit as whole with IRLS but only the location model. 106 | The location model is conditioned on the scale model estimates, which is why we only 107 | supply the FIM of the location model and return an empty FIM for scale model components. 108 | Note that there is also no closed form FIM for the scale-scale block. Returning a zero-array 109 | here leads to singular matrices for the whole location-scale FIM in some cases that throw 110 | linear algebra errors when inverted. 111 | 112 | :return: (features x inferred param x inferred param) 113 | """ 114 | return np.zeros([self.theta_scale.shape[1], 0, 0]) 115 | 116 | @property 117 | def fim_scale_scale(self) -> np.ndarray: 118 | """ 119 | Scale-scale coefficient block of FIM 120 | 121 | The negative binomial model is not fit as whole with IRLS but only the location model. 122 | The location model is conditioned on the scale model estimates, which is why we only 123 | supply the FIM of the location model and return an empty FIM for scale model components. 124 | Note that there is also no closed form FIM for the scale-scale block. Returning a zero-array 125 | here leads to singular matrices for the whole location-scale FIM in some cases that throw 126 | linear algebra errors when inverted. 127 | 128 | :return: (features x inferred param x inferred param) 129 | """ 130 | return np.zeros([self.theta_scale.shape[1], 0, 0]) 131 | 132 | @property 133 | def hessian_weight_location_scale(self) -> np.ndarray: 134 | """scale-location block of the hessian matrix""" 135 | scale = self.scale 136 | loc = self.location 137 | return np.multiply(loc * scale, np.asarray(self.x - loc) / np.square(loc + scale)) 138 | 139 | @property 140 | def hessian_weight_location_location(self) -> np.ndarray: 141 | """location-location block of the hessian matrix""" 142 | scale = self.scale 143 | loc = self.location 144 | if isinstance(self.x, np.ndarray) or isinstance(self.x, dask.array.core.Array): 145 | x_by_scale_plus_one = self.x / scale + np.ones_like(scale) 146 | else: 147 | x_by_scale_plus_one = np.asarray(self.x.divide(scale) + np.ones_like(scale)) 148 | 149 | return -loc * x_by_scale_plus_one / np.square((loc / scale) + np.ones_like(loc)) 150 | 151 | @property 152 | def hessian_weight_scale_scale(self) -> np.ndarray: 153 | """scale-scale block of the hessian matrix""" 154 | scale = self.scale 155 | loc = self.location 156 | scale_plus_x = np.asarray(self.x + scale) 157 | scale_plus_loc = scale + loc 158 | # Define graphs for individual terms of constant term of hessian: 159 | const1 = scipy.special.digamma(scale_plus_x) + scale * scipy.special.polygamma(n=1, x=scale_plus_x) 160 | const2 = -scipy.special.digamma(scale) + scale * scipy.special.polygamma(n=1, x=scale) 161 | const3 = -loc * scale_plus_x + np.ones_like(scale) * 2.0 * scale * scale_plus_loc / np.square(scale_plus_loc) 162 | const4 = np.log(scale) + np.ones_like(scale) * 2.0 - np.log(scale_plus_loc) 163 | return scale * (const1 + const2 + const3 + const4) 164 | 165 | @property 166 | def ll(self) -> Union[np.ndarray, dask.array.core.Array]: 167 | """log-likelihood""" 168 | scale = self.scale 169 | loc = self.location 170 | log_r_plus_mu = np.log(scale + loc) 171 | if isinstance(self.x, np.ndarray) or isinstance(self.x, dask.array.core.Array): 172 | # dense numpy or dask 173 | ll = ( 174 | scipy.special.gammaln(scale + self.x) 175 | - scipy.special.gammaln(self.x + np.ones_like(scale)) 176 | - scipy.special.gammaln(scale) 177 | + self.x * (self.eta_loc - log_r_plus_mu) 178 | + np.multiply(scale, self.eta_scale - log_r_plus_mu) 179 | ) 180 | else: 181 | # sparse scipy 182 | ll = ( 183 | scipy.special.gammaln(np.asarray(scale + self.x)) 184 | - scipy.special.gammaln(self.x + np.ones_like(scale)) 185 | - scipy.special.gammaln(scale) 186 | + np.asarray( 187 | self.x.multiply(self.eta_loc - log_r_plus_mu) + np.multiply(scale, self.eta_scale - log_r_plus_mu) 188 | ) 189 | ) 190 | ll = np.asarray(ll) 191 | return self.np_clip_param(ll, "ll") 192 | 193 | def ll_j(self, j: Union[int, np.ndarray]) -> Union[np.ndarray, dask.array.core.Array]: 194 | """ 195 | Log likelhiood for observation j 196 | :param j: observation 197 | """ 198 | # Make sure that dimensionality of sliced array is kept: 199 | if isinstance(j, int) or isinstance(j, np.int32) or isinstance(j, np.int64): 200 | j = np.full(1, j) 201 | scale = self.scale_j(j=j) 202 | loc = self.location_j(j=j) 203 | log_r_plus_mu = np.log(scale + loc) 204 | if isinstance(self.x, np.ndarray) or isinstance(self.x, dask.array.core.Array): 205 | # dense numpy or dask 206 | ll = ( 207 | scipy.special.gammaln(scale + self.x[:, j]) 208 | - scipy.special.gammaln(self.x[:, j] + np.ones_like(scale)) 209 | - scipy.special.gammaln(scale) 210 | + self.x[:, j] * (self.eta_loc_j(j=j) - log_r_plus_mu) 211 | + np.multiply(scale, self.eta_scale_j(j=j) - log_r_plus_mu) 212 | ) 213 | else: 214 | # sparse scipy 215 | ll = ( 216 | scipy.special.gammaln(np.asarray(scale + self.x[:, j])) 217 | - scipy.special.gammaln(self.x + np.ones_like(scale)) 218 | - scipy.special.gammaln(scale) 219 | + np.asarray( 220 | self.x[:, j].multiply(self.eta_loc_j(j=j) - log_r_plus_mu) 221 | + np.multiply(scale, self.eta_scale_j(j=j) - log_r_plus_mu) 222 | ) 223 | ) 224 | ll = np.asarray(ll) 225 | return self.np_clip_param(ll, "ll") 226 | 227 | def ll_handle(self) -> Callable: 228 | def fun(x, eta_loc, theta_scale, xh_scale): 229 | eta_scale = np.matmul(xh_scale, theta_scale) 230 | scale = np.exp(eta_scale) 231 | loc = np.exp(eta_loc) 232 | log_r_plus_mu = np.log(scale + loc) 233 | if isinstance(x, np.ndarray) or isinstance(x, dask.array.core.Array): 234 | # dense numpy or dask 235 | ll = ( 236 | scipy.special.gammaln(scale + x) 237 | - scipy.special.gammaln(x + np.ones_like(scale)) 238 | - scipy.special.gammaln(scale) 239 | + x * (eta_loc - log_r_plus_mu) 240 | + np.multiply(scale, eta_scale - log_r_plus_mu) 241 | ) 242 | else: 243 | raise ValueError("type x %s not supported" % type(x)) 244 | return self.np_clip_param(ll, "ll") 245 | 246 | return fun 247 | 248 | def jac_scale_handle(self) -> Callable: 249 | def fun(x, eta_loc, theta_scale, xh_scale): 250 | scale = np.exp(theta_scale) 251 | loc = np.exp(eta_loc) 252 | scale_plus_x = scale + x 253 | r_plus_mu = scale + loc 254 | 255 | # Define graphs for individual terms of constant term of hessian: 256 | const1 = scipy.special.digamma(scale_plus_x) - scipy.special.digamma(scale) 257 | const2 = -scale_plus_x / r_plus_mu 258 | const3 = np.log(scale) + np.ones_like(scale) - np.log(r_plus_mu) 259 | return scale * (const1 + const2 + const3) 260 | 261 | return fun 262 | -------------------------------------------------------------------------------- /batchglm/train/numpy/glm_norm/__init__.py: -------------------------------------------------------------------------------- 1 | from .estimator import Estimator 2 | from .model_container import ModelContainer 3 | -------------------------------------------------------------------------------- /batchglm/train/numpy/glm_norm/estimator.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import numpy as np 4 | 5 | from .external import EstimatorGlm, Model, init_par 6 | from .model_container import ModelContainer 7 | 8 | logger = logging.getLogger("batchglm") 9 | 10 | 11 | class Estimator(EstimatorGlm): 12 | def __init__( 13 | self, 14 | model: Model, 15 | init_location: str = "AUTO", 16 | init_scale: str = "AUTO", 17 | # batch_size: Optional[Union[Tuple[int, int], int]] = None, 18 | quick_scale: bool = False, 19 | dtype: str = "float64", 20 | ): 21 | """ 22 | Performs initialisation and creates a new estimator. 23 | :param model: 24 | The GLM model to be fit 25 | :param init_location: (Optional) 26 | Low-level initial values for a. Can be: 27 | 28 | - str: 29 | * "auto": automatically choose best initialization 30 | * "standard": initialize intercept with observed mean 31 | * "closed_form": try to initialize with closed form 32 | - np.ndarray: direct initialization of 'a' 33 | :param init_scale: (Optional) 34 | Low-level initial values for b. Can be: 35 | 36 | - str: 37 | * "auto": automatically choose best initialization 38 | * "random": initialize with random values 39 | * "standard": initialize with zeros 40 | * "closed_form": try to initialize with closed form 41 | - np.ndarray: direct initialization of 'b' 42 | :param quick_scale: bool 43 | Whether `scale` will be fitted faster and maybe less accurate. 44 | Useful in scenarios where fitting the exact `scale` is not absolutely necessary. 45 | :param dtype: Numerical precision. 46 | """ 47 | init_theta_location, init_theta_scale, train_loc, train_scale = init_par( 48 | model=model, init_location=init_location, init_scale=init_scale 49 | ) 50 | init_theta_location = init_theta_location.astype(dtype) 51 | init_theta_scale = init_theta_scale.astype(dtype) 52 | self._train_scale = train_scale 53 | self._train_loc = train_loc 54 | if quick_scale: 55 | self._train_scale = False 56 | _model_container = ModelContainer( 57 | model=model, 58 | init_theta_location=init_theta_location, 59 | init_theta_scale=init_theta_scale, 60 | chunk_size_genes=model.chunk_size_genes, 61 | dtype=dtype, 62 | ) 63 | super(Estimator, self).__init__(model_container=_model_container, dtype=dtype) 64 | 65 | def train( 66 | self, 67 | **kwargs, 68 | ): 69 | model = self._model_container.model 70 | if self._train_loc: 71 | theta_location, _, _, _ = np.linalg.lstsq(model.design_loc, model.x) 72 | self._model_container.theta_location = theta_location 73 | self._train_loc = False 74 | super().train(**kwargs) 75 | self._train_loc = True 76 | -------------------------------------------------------------------------------- /batchglm/train/numpy/glm_norm/external.py: -------------------------------------------------------------------------------- 1 | import batchglm.utils.data as data_utils 2 | from batchglm import pkg_constants 3 | from batchglm.models.base_glm.utils import closedform_glm_scale 4 | from batchglm.models.glm_norm.model import Model 5 | from batchglm.models.glm_norm.utils import closedform_norm_glm_logsd, init_par 6 | 7 | # import necessary base_glm layers 8 | from batchglm.train.numpy.base_glm import EstimatorGlm, NumpyModelContainer 9 | from batchglm.utils.linalg import groupwise_solve_lm 10 | -------------------------------------------------------------------------------- /batchglm/train/numpy/glm_norm/model_container.py: -------------------------------------------------------------------------------- 1 | import math 2 | from typing import Callable, Union 3 | 4 | import dask 5 | import numpy as np 6 | 7 | from .external import NumpyModelContainer 8 | 9 | 10 | def ll(scale, loc, x): 11 | resid = loc - x 12 | ll = -0.5 * np.log(2 * math.pi) - np.log(scale) - 0.5 * np.power(resid / scale, 2) 13 | return ll 14 | 15 | 16 | class ModelContainer(NumpyModelContainer): 17 | @property 18 | def fim_weight(self): 19 | raise NotImplementedError("This method is currently unimplemented as it isn't used by any built-in procedures.") 20 | 21 | @property 22 | def jac_weight(self): 23 | raise NotImplementedError("This method is currently unimplemented as it isn't used by any built-in procedures.") 24 | 25 | @property 26 | def jac_weight_j(self): 27 | raise NotImplementedError("This method is currently unimplemented as it isn't used by any built-in procedures.") 28 | 29 | @property 30 | def ybar(self) -> Union[np.ndarray, dask.array.core.Array]: 31 | """ 32 | :return: observations x features 33 | """ 34 | return np.asarray(self.x - self.location) 35 | 36 | def ybar_j(self, j: Union[int, np.ndarray]) -> Union[np.ndarray, dask.array.core.Array]: 37 | """ 38 | :return: observations x features 39 | """ 40 | # Make sure that dimensionality of sliced array is kept: 41 | if isinstance(j, int) or isinstance(j, np.int32) or isinstance(j, np.int64): 42 | j = np.full(1, j) 43 | if isinstance(self.x, np.ndarray) or isinstance(self.x, dask.array.core.Array): 44 | return (self.x[:, j] - self.location_j(j=j)) / self.location_j(j=j) 45 | else: 46 | return np.asarray(self.x[:, j] - self.location_j(j=j)) 47 | 48 | @property 49 | def fim_weight_location_location(self) -> Union[np.ndarray, dask.array.core.Array]: 50 | return 1 / np.power(self.scale, 2) 51 | 52 | def fim_weight_location_location_j(self, j: Union[int, np.ndarray]) -> Union[np.ndarray, dask.array.core.Array]: 53 | return 1 / (self.scale_j(j=j) * self.scale_j(j=j)) 54 | 55 | @property 56 | def jac_weight_scale(self) -> Union[np.ndarray, dask.array.core.Array]: 57 | return -np.ones_like(self.x) - np.power((self.x - self.location) / self.scale, 2) 58 | 59 | def jac_weight_scale_j(self, j: Union[int, np.ndarray]) -> Union[np.ndarray, dask.array.core.Array]: 60 | return -np.ones_like(self.x[:, j]) - np.power((self.x[:, j] - self.location_j(j=j)) / self.scale_j(j=j), 2) 61 | 62 | @property 63 | def fim_location_scale(self) -> np.ndarray: 64 | return np.zeros([self.model.x.shape[1], self.theta_location.shape[0], self.theta_scale.shape[0]]) 65 | 66 | @property 67 | def fim_weight_scale_scale(self) -> np.ndarray: 68 | return np.full(self.scale.shape, 2) 69 | 70 | @property 71 | def fim_scale_scale(self) -> Union[np.ndarray, dask.array.core.Array]: 72 | """ 73 | 74 | :return: (features x inferred param x inferred param) 75 | """ 76 | w = self.fim_weight_scale_scale 77 | xh = self.xh_scale 78 | return np.einsum("fob,oc->fbc", np.einsum("ob,of->fob", xh, w), xh) 79 | 80 | @property 81 | def hessian_weight_location_scale(self) -> np.ndarray: 82 | scale = self.scale 83 | loc = self.location 84 | return (2 / np.power(scale, 2)) * (self.x - loc) 85 | 86 | @property 87 | def hessian_weight_location_location(self) -> np.ndarray: 88 | scale = self.scale 89 | return -1 / np.power(scale, 2) 90 | 91 | @property 92 | def hessian_weight_scale_scale(self) -> np.ndarray: 93 | scale = self.scale 94 | loc = self.location 95 | return (2 / np.power(scale, 2)) * np.power(self.x - loc, 2) 96 | 97 | @property 98 | def ll(self) -> Union[np.ndarray, dask.array.core.Array]: 99 | loc = self.location 100 | scale = self.scale 101 | x = self.model.x 102 | return np.asarray(ll(scale, loc, x)) 103 | 104 | def ll_j(self, j: Union[int, np.ndarray]) -> Union[np.ndarray, dask.array.core.Array]: 105 | # Make sure that dimensionality of sliced array is kept: 106 | if isinstance(j, int) or isinstance(j, np.int32) or isinstance(j, np.int64): 107 | j = np.full(1, j) 108 | 109 | loc = self.location_j(j=j) 110 | scale = self.scale_j(j=j) 111 | resid = loc - self.model.x[:, j] 112 | ll = -0.5 * np.log(2 * math.pi) - np.log(scale) - 0.5 * np.power(resid / scale, 2) 113 | return ll 114 | 115 | def ll_handle(self) -> Callable: 116 | def fun(x, eta_loc, theta_scale, xh_scale): 117 | eta_scale = np.matmul(xh_scale, theta_scale) 118 | scale = self.model.inverse_link_scale(eta_scale) 119 | loc = self.model.inverse_link_loc(eta_loc) 120 | return ll(scale, loc, x) 121 | 122 | return fun 123 | -------------------------------------------------------------------------------- /batchglm/train/numpy/glm_norm/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/theislab/batchglm/b893fd0ce020669ff38583e4ec135b10926093ae/batchglm/train/numpy/glm_norm/utils.py -------------------------------------------------------------------------------- /batchglm/train/numpy/glm_poisson/__init__.py: -------------------------------------------------------------------------------- 1 | from .estimator import Estimator 2 | from .model_container import ModelContainer 3 | -------------------------------------------------------------------------------- /batchglm/train/numpy/glm_poisson/estimator.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from typing import Optional, Tuple, Union 3 | 4 | import numpy as np 5 | 6 | from .external import EstimatorGlm, Model, init_par 7 | from .model_container import ModelContainer 8 | 9 | 10 | class Estimator(EstimatorGlm): 11 | """ 12 | Estimator for Generalized Linear Models (GLMs) with negative binomial noise. 13 | Uses the natural logarithm as linker function. 14 | 15 | Attributes 16 | ---------- 17 | model_vars : ModelVars 18 | model variables 19 | """ 20 | 21 | def __init__( 22 | self, 23 | model: Model, 24 | init_location: str = "AUTO", 25 | init_scale: str = "AUTO", 26 | # batch_size: Optional[Union[Tuple[int, int], int]] = None, 27 | quick_scale: bool = False, 28 | dtype: str = "float64", 29 | ): 30 | """ 31 | Performs initialisation and creates a new estimator. 32 | 33 | :param init_location: (Optional) 34 | Low-level initial values for a. Can be: 35 | 36 | - str: 37 | * "auto": automatically choose best initialization 38 | * "standard": initialize intercept with observed mean 39 | * "init_model": initialize with another model (see `ìnit_model` parameter) 40 | * "closed_form": try to initialize with closed form 41 | - np.ndarray: direct initialization of 'a' 42 | :param dtype: Numerical precision. 43 | """ 44 | init_theta_location, _, train_loc, _ = init_par(model=model, init_location=init_location) 45 | self._train_loc = train_loc 46 | # no need to train the scale parameter for the poisson model since it only has one parameter 47 | self._train_scale = False 48 | sys.stdout.write("training location model: %s\n" % str(self._train_loc)) 49 | init_theta_location = init_theta_location.astype(dtype) 50 | 51 | _model_container = ModelContainer( 52 | model=model, 53 | init_theta_location=init_theta_location, 54 | init_theta_scale=init_theta_location, # Not used. 55 | chunk_size_genes=model.chunk_size_genes, 56 | dtype=dtype, 57 | ) 58 | super(Estimator, self).__init__(model_container=_model_container, dtype=dtype) 59 | -------------------------------------------------------------------------------- /batchglm/train/numpy/glm_poisson/exceptions.py: -------------------------------------------------------------------------------- 1 | class NoScaleError(Exception): 2 | """ 3 | Exception raised for attempting to access the scale parameter (or one of its derived methods) of a poisson model. 4 | """ 5 | 6 | def __init__(self, method: str): 7 | self.message = f"Attempted to access {method}. No scale parameter is fit for poisson - please use location." 8 | super().__init__(self.message) 9 | -------------------------------------------------------------------------------- /batchglm/train/numpy/glm_poisson/external.py: -------------------------------------------------------------------------------- 1 | import batchglm.utils.data as data_utils 2 | from batchglm import pkg_constants 3 | from batchglm.models.base_glm.utils import closedform_glm_mean, closedform_glm_scale 4 | from batchglm.models.glm_poisson.model import Model 5 | from batchglm.models.glm_poisson.utils import init_par 6 | 7 | # import necessary base_glm layers 8 | from batchglm.train.numpy.base_glm import EstimatorGlm, NumpyModelContainer 9 | from batchglm.utils.linalg import groupwise_solve_lm 10 | -------------------------------------------------------------------------------- /batchglm/train/numpy/glm_poisson/model_container.py: -------------------------------------------------------------------------------- 1 | from typing import Union 2 | 3 | import dask 4 | import numpy as np 5 | import scipy 6 | 7 | from .exceptions import NoScaleError 8 | from .external import NumpyModelContainer 9 | 10 | 11 | class ModelContainer(NumpyModelContainer): 12 | @property 13 | def fim_weight_location_location(self) -> Union[np.ndarray, dask.array.core.Array]: 14 | """ 15 | Fisher inverse matrix weights 16 | :return: observations x features 17 | """ 18 | return -self.hessian_weight_location_location 19 | 20 | @property 21 | def ybar(self) -> Union[np.ndarray, dask.array.core.Array]: 22 | """ 23 | :return: observations x features 24 | """ 25 | return np.asarray(self.x - self.location) / self.location 26 | 27 | def fim_weight_location_location_j(self, j: Union[int, np.ndarray]) -> Union[np.ndarray, dask.array.core.Array]: 28 | """ 29 | Fisher inverse matrix weights at j 30 | :return: observations x features 31 | """ 32 | return self.location_j(j=j) 33 | 34 | def ybar_j(self, j: Union[int, np.ndarray]) -> Union[np.ndarray, dask.array.core.Array]: 35 | """ 36 | :return: observations x features 37 | """ 38 | # Make sure that dimensionality of sliced array is kept: 39 | if isinstance(j, int) or isinstance(j, np.int32) or isinstance(j, np.int64): 40 | j = np.full(1, j) 41 | if isinstance(self.x, np.ndarray) or isinstance(self.x, dask.array.core.Array): 42 | return (self.x[:, j] - self.location_j(j=j)) / self.location_j(j=j) 43 | else: 44 | return np.asarray(self.x[:, j] - self.location_j(j=j)) / self.location_j(j=j) 45 | 46 | @property 47 | def hessian_weight_location_location(self) -> np.ndarray: 48 | """location-location block of the hessian matrix""" 49 | return -self.location 50 | 51 | @property 52 | def ll(self) -> Union[np.ndarray, dask.array.core.Array]: 53 | """log-likelihood""" 54 | loc = self.location 55 | log_loc = np.log(loc) 56 | x_times_log_loc = self.x * log_loc 57 | log_x_factorial = np.log(scipy.special.gammaln(self.x + np.ones_like(self.x))) 58 | ll = x_times_log_loc - loc - log_x_factorial 59 | return np.asarray(self.np_clip_param(ll, "ll")) 60 | 61 | def ll_j(self, j: Union[int, np.ndarray]) -> Union[np.ndarray, dask.array.core.Array]: 62 | """ 63 | Log likelhiood for observation j 64 | :param j: observation 65 | """ 66 | # Make sure that dimensionality of sliced array is kept: 67 | if isinstance(j, int) or isinstance(j, np.int32) or isinstance(j, np.int64): 68 | j = np.full(1, j) 69 | loc_j = self.location_j(j=j) 70 | log_loc = np.log(loc_j) 71 | x_times_log_loc = self.x[:, j] * log_loc 72 | log_x_factorial = np.log(scipy.special.gammaln(self.x[:, j] + np.ones_like(self.x[:, j]))) 73 | ll = x_times_log_loc - loc_j - log_x_factorial 74 | return np.asarray(self.np_clip_param(ll, "ll")) 75 | 76 | @property 77 | def hessian(self) -> Union[np.ndarray, dask.array.core.Array]: 78 | return self.hessian_location_location 79 | 80 | @property 81 | def fim_weight(self): 82 | raise NotImplementedError("This method is currently unimplemented as it isn't used by any built-in procedures.") 83 | 84 | @property 85 | def jac_weight(self): 86 | raise NotImplementedError("This method is currently unimplemented as it isn't used by any built-in procedures.") 87 | 88 | def jac_weight_j(self, j: Union[int, np.ndarray]): 89 | raise NotImplementedError("This method is currently unimplemented as it isn't used by any built-in procedures.") 90 | 91 | # Methods marked as abstract that involve the scale parameter: 92 | @property 93 | def fim_location_scale(self): 94 | raise NoScaleError("fim_location_scale") 95 | 96 | @property 97 | def hessian_weight_scale_scale(self): 98 | raise NoScaleError("hessian_weight_scale_scale") 99 | 100 | @property 101 | def hessian_weight_location_scale(self): 102 | raise NoScaleError("hessian_weight_location_scale") 103 | 104 | def jac_weight_scale_j(self, j: Union[int, np.ndarray]): 105 | raise NoScaleError("jac_weight_scale_j") 106 | 107 | @property 108 | def fim(self) -> Union[np.ndarray, dask.array.core.Array]: 109 | return self.fim_location_location 110 | 111 | @property 112 | def jac(self) -> Union[np.ndarray, dask.array.core.Array]: 113 | return self.jac_location 114 | -------------------------------------------------------------------------------- /batchglm/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from . import data, input, plotting 2 | -------------------------------------------------------------------------------- /batchglm/utils/linalg.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import Callable, Union 3 | 4 | import dask.array 5 | import numpy as np 6 | 7 | logger = logging.getLogger("batchglm") 8 | 9 | 10 | def stacked_lstsq(L: Union[np.ndarray, dask.array.core.Array], b: np.ndarray, rcond: float = 1e-10): 11 | r""" 12 | Solve `Lx = b`, via SVD least squares cutting of small singular values 13 | 14 | :param L: tensor of shape (..., M, K) 15 | :param b: tensor of shape (..., M, N). 16 | :param rcond: threshold for inverse 17 | :param name: name scope of this op 18 | :return: x of shape (..., K, N) 19 | """ 20 | u, s, v = np.linalg.svd(L, full_matrices=False) 21 | s_max = s.max(axis=-1, keepdims=True) 22 | s_min = rcond * s_max 23 | 24 | inv_s = np.reciprocal(s, out=np.zeros_like(s), where=s >= s_min) 25 | 26 | x = np.einsum("...MK,...MN->...KN", v, np.einsum("...K,...MK,...MN->...KN", inv_s, u, b)) 27 | 28 | # rank = np.sum(s > rcond) 29 | 30 | return np.conj(x, out=x) 31 | 32 | 33 | def groupwise_solve_lm( 34 | dmat: Union[np.ndarray, dask.array.core.Array], 35 | apply_fun: Callable, 36 | constraints: Union[np.ndarray, dask.array.core.Array], 37 | ): 38 | r""" 39 | Solve GLMs by estimating the distribution parameters of each unique group of observations independently and 40 | solving then for the design matrix `dmat`. 41 | 42 | Idea: 43 | $$ 44 | \theta &= f(x) \\ 45 | \Rightarrow f^{-1}(\theta) &= x \\ 46 | &= (D \cdot D^{+}) \cdot x \\ 47 | &= D \cdot (D^{+} \cdot x) \\ 48 | &= D \cdot x' = f^{-1}(\theta) 49 | $$ 50 | 51 | :param dmat: design matrix which should be solved for 52 | :param apply_fun: some callable function taking one xr.DataArray argument. 53 | Should compute a group-wise parameter solution. 54 | 55 | Example method calculating group-wise means: 56 | :: 57 | def apply_fun(grouping): 58 | groupwise_means = data.groupby(grouping).mean(dim="observations").values 59 | 60 | return np.log(groupwise_means) 61 | 62 | The `data` argument provided to `apply_fun` is the same xr.DataArray provided to this 63 | :param constraints: tensor (all parameters x dependent parameters) 64 | Tensor that encodes how complete parameter set which includes dependent 65 | parameters arises from indepedent parameters: all = . 66 | This form of constraints is used in vector generalized linear models (VGLMs). 67 | 68 | :return: tuple of (apply_fun(grouping), x_prime, rmsd, rank, s) where x_prime is the parameter matrix solved for 69 | `dmat`. 70 | """ 71 | # Get unqiue rows of design matrix and vector with group assignments: 72 | if isinstance(dmat, dask.array.core.Array): # axis argument not supported by dask in .unique() 73 | unique_design, inverse_idx = np.unique(dmat.compute(), axis=0, return_inverse=True) 74 | unique_design = dask.array.from_array(unique_design, chunks=unique_design.shape) 75 | else: 76 | unique_design, inverse_idx = np.unique(dmat, axis=0, return_inverse=True) 77 | if unique_design.shape[0] > 500: 78 | raise ValueError("large least-square problem in init, likely defined a numeric predictor as categorical") 79 | 80 | full_rank = constraints.shape[1] 81 | unique_constrained_dmat = np.matmul(unique_design, constraints) 82 | if isinstance(unique_constrained_dmat, dask.array.core.Array): # matrix_rank not supported by dask 83 | rank = np.linalg.matrix_rank(unique_constrained_dmat.compute()) 84 | else: 85 | rank = np.linalg.matrix_rank(unique_constrained_dmat) 86 | if full_rank > rank: 87 | logger.error("model is not full rank!") 88 | 89 | # Get group-wise means in linker space based on group assignments 90 | # based on unique rows of design matrix: 91 | params = apply_fun(inverse_idx) 92 | 93 | # Use least-squares solver to compute model parameterization 94 | # accounting for dependent parameters, ie. degrees of freedom 95 | # of the model which appear as groups in the design matrix 96 | # and are not accounted for by parameters but which are 97 | # accounted for by constraints: 98 | # = means -> , H> = means -> lstsqs for theta 99 | # (This is faster and more accurate than using matrix inversion.) 100 | logger.debug(" ** Solve lstsq problem") 101 | if np.any(np.isnan(params)): 102 | raise Warning("entries of params were nan which will throw error in lstsq") 103 | x_prime, rmsd, rank, s = np.linalg.lstsq(unique_constrained_dmat, params) 104 | 105 | return params, x_prime, rmsd, rank, s 106 | -------------------------------------------------------------------------------- /batchglm/utils/plotting.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import Optional, Tuple, Union 3 | 4 | import dask.array 5 | import matplotlib.pyplot as plt 6 | import numpy as np 7 | import pandas as pd 8 | import seaborn as sns 9 | from matplotlib import gridspec, rcParams 10 | from matplotlib.axes import Axes 11 | 12 | logger = logging.getLogger(__name__) 13 | 14 | 15 | def _input_checks( 16 | true_values: Union[np.ndarray, dask.array.core.Array], pred_values: Union[np.ndarray, dask.array.core.Array] 17 | ): 18 | """ 19 | Check the type of true and predicted input and make sure they have the same size. 20 | 21 | :param true_values: The reference parameters. 22 | :param pred_values: The fitted parameters. 23 | """ 24 | 25 | def _cast(data: Union[np.ndarray, dask.array.core.Array]) -> Tuple[np.ndarray, np.ndarray]: 26 | if isinstance(data, dask.array.core.Array): 27 | to_return = data.compute() 28 | elif isinstance(data, np.ndarray): 29 | to_return = data 30 | else: 31 | raise TypeError(f"Type {type(data)} is not recognized for true/pred values.") 32 | return to_return 33 | 34 | true_vals = _cast(true_values) 35 | pred_vals = _cast(pred_values) 36 | 37 | assert len(true_values.shape) == len(pred_values.shape), "true_values must have same dimensions as pred_values" 38 | assert np.all(true_values.shape == pred_values.shape), "true_values must have same dimensions as pred_values" 39 | 40 | return true_vals, pred_vals 41 | 42 | 43 | def plot_coef_vs_ref( 44 | true_values: Union[np.ndarray, dask.array.core.Array], 45 | pred_values: Union[np.ndarray, dask.array.core.Array], 46 | size=1, 47 | log=False, 48 | save=None, 49 | show=True, 50 | ncols=5, 51 | row_gap=0.3, 52 | col_gap=0.25, 53 | title: str = "", 54 | return_axs: bool = False, 55 | ) -> Optional[Axes]: 56 | """ 57 | Plot estimated coefficients against reference (true) coefficients for location model. 58 | 59 | :param true_values: 60 | :param size: Point size. 61 | :param save: Path+file name stem to save plots to. 62 | File will be save+"_genes.png". Does not save if save is None. 63 | :param show: Whether to display plot. 64 | :param ncols: Number of columns in plot grid if multiple genes are plotted. 65 | :param row_gap: Vertical gap between panel rows relative to panel height. 66 | :param col_gap: Horizontal gap between panel columns relative to panel width. 67 | :param title: Plot title. 68 | :param return_axs: Whether to return axis objects. 69 | :return: Matplotlib axis objects. 70 | """ 71 | true_values, pred_values = _input_checks(true_values, pred_values) 72 | 73 | plt.ioff() 74 | 75 | n_par = true_values.shape[0] 76 | ncols = ncols if n_par > ncols else n_par 77 | nrows = n_par // ncols + (n_par - (n_par // ncols) * ncols) 78 | 79 | gs = gridspec.GridSpec(nrows=nrows, ncols=ncols, hspace=row_gap, wspace=col_gap) 80 | 81 | fig = plt.figure( 82 | figsize=( 83 | ncols * rcParams["figure.figsize"][0], # width in inches 84 | nrows * rcParams["figure.figsize"][1] * (1 + row_gap), # height in inches 85 | ) 86 | ) 87 | 88 | if title is None: 89 | title = "parameter" 90 | 91 | # Build axis objects in loop. 92 | axs = [] 93 | for i in range(n_par): 94 | ax = plt.subplot(gs[i]) 95 | axs.append(ax) 96 | 97 | x = true_values[i, :] 98 | y = pred_values[i, :] 99 | if log: 100 | x = np.log(x + 1) 101 | y = np.log(y + 1) 102 | 103 | sns.scatterplot(x=x, y=y, size=size, ax=ax, legend=False) 104 | sns.lineplot( 105 | x=np.array([np.min([np.min(x), np.min(y)]), np.max([np.max(x), np.max(y)])]), 106 | y=np.array([np.min([np.min(x), np.min(y)]), np.max([np.max(x), np.max(y)])]), 107 | ax=ax, 108 | ) 109 | 110 | title_i = title + "_" + str(i) 111 | # Add correlation into title: 112 | title_i = title_i + " (R=" + str(np.round(np.corrcoef(x, y)[0, 1], 3)) + ")" 113 | ax.set_title(title_i) 114 | ax.set_xlabel("true parameter") 115 | ax.set_ylabel("estimated parameter") 116 | 117 | # Save, show and return figure. 118 | if save is not None: 119 | plt.savefig(save + "_parameter_scatter.png") 120 | 121 | if show: 122 | plt.show() 123 | 124 | plt.close(fig) 125 | plt.ion() 126 | 127 | if return_axs: 128 | return axs 129 | return None 130 | 131 | 132 | def plot_deviation( 133 | true_values: np.ndarray, pred_values: np.ndarray, save=None, show=True, return_axs=False, title: str = "" 134 | ) -> Optional[Axes]: 135 | """ 136 | Plot deviation of estimated coefficients from reference (true) coefficients 137 | as violin plot for location model. 138 | 139 | :param true_values: 140 | :param pred_values: 141 | :param save: Path+file name stem to save plots to. 142 | File will be save+"_genes.png". Does not save if save is None. 143 | :param show: Whether to display plot. 144 | :param return_axs: Whether to return axis objects. 145 | :param title: Title. 146 | :return: Matplotlib axis objects. 147 | """ 148 | true_values, pred_values = _input_checks(true_values, pred_values) 149 | 150 | plt.ioff() 151 | 152 | n_par = true_values.shape[0] 153 | summary_fit = pd.concat( 154 | [ 155 | pd.DataFrame( 156 | { 157 | "deviation": pred_values[i, :] - true_values[i, :], 158 | "coefficient": pd.Series(["coef_" + str(i) for x in range(pred_values.shape[1])], dtype="category"), 159 | } 160 | ) 161 | for i in range(n_par) 162 | ] 163 | ) 164 | summary_fit["coefficient"] = summary_fit["coefficient"].astype("category") 165 | 166 | fig, ax = plt.subplots() 167 | sns.violinplot(x=summary_fit["coefficient"], y=summary_fit["deviation"], ax=ax) 168 | 169 | if title is not None: 170 | ax.set_title(title) 171 | 172 | # Save, show and return figure. 173 | if save is not None: 174 | plt.savefig(save + "_deviation_violin.png") 175 | 176 | if show: 177 | plt.show() 178 | 179 | plt.close(fig) 180 | plt.ion() 181 | 182 | if return_axs: 183 | return ax 184 | return None 185 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | comment: false 2 | coverage: 3 | status: 4 | project: 5 | default: 6 | target: auto 7 | patch: 8 | default: 9 | target: auto 10 | -------------------------------------------------------------------------------- /cookietemple.cfg: -------------------------------------------------------------------------------- 1 | [bumpversion] 2 | current_version = 0.7.4 3 | 4 | [bumpversion_files_whitelisted] 5 | init_file = batchglm/__init__.py 6 | dot_cookietemple = .cookietemple.yml 7 | conf_py = docs/conf.py 8 | main_file = batchglm/__main__.py 9 | 10 | [bumpversion_files_blacklisted] 11 | poetry = pyproject.toml 12 | release_drafter_config = .github/release-drafter.yml 13 | 14 | [sync] 15 | sync_enabled = True 16 | 17 | [sync_level] 18 | ct_sync_level = minor 19 | 20 | [sync_files_blacklisted] 21 | changelog = CHANGELOG.rst 22 | poetry_lock = poetry.lock 23 | poetry = pyproject.toml 24 | tests = tests/**/* 25 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = python -msphinx 7 | SPHINXPROJ = batchglm 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/_static/css/custom.css: -------------------------------------------------------------------------------- 1 | .small { 2 | font-size: 40%; 3 | } 4 | 5 | .rst-content dl:not(.docutils) dl dt { 6 | /* mimick numpydoc’s blockquote style */ 7 | font-weight: normal; 8 | background: none transparent; 9 | border-left: none; 10 | margin: 0 0 12px; 11 | padding: 3px 0 0; 12 | font-size: 100%; 13 | } 14 | 15 | .rst-content dl:not(.docutils) dl dt code { 16 | font-size: 100%; 17 | font-weight: normal; 18 | background: none transparent; 19 | border: none; 20 | padding: 0 2px; 21 | } 22 | 23 | .rst-content dl:not(.docutils) dl dt a.reference > code { 24 | text-decoration: underline; 25 | } 26 | -------------------------------------------------------------------------------- /docs/_static/custom_cookietemple.css: -------------------------------------------------------------------------------- 1 | @import "basic.css"; 2 | 3 | /*Set max width to none so the theme uses all available width*/ 4 | .wy-nav-content { 5 | max-width: none; 6 | } 7 | -------------------------------------------------------------------------------- /docs/api/.gitignore: -------------------------------------------------------------------------------- 1 | batchglm.* 2 | -------------------------------------------------------------------------------- /docs/api/index.rst: -------------------------------------------------------------------------------- 1 | .. automodule:: batchglm 2 | 3 | API 4 | === 5 | 6 | 7 | Import batchglm's high-level API as:: 8 | 9 | import batchglm.api as glm 10 | 11 | 12 | Fitting models 13 | ----------------------------------- 14 | 15 | All models are collected in the :mod:`train` and `model` module. 16 | Each model consists of at least: 17 | 18 | 1) `models.glm_nb.Model` class which basicially describes the model 19 | 3) `train.xxxxx.Estimator` class which takes a `Model` object and fits the corresponding model onto it. 20 | 21 | where `xxxxxx` is the backend desired, like `tf2`, `numpy` or `statsmodel`. 22 | 23 | For example, here is a short snippet to give a sense of how the API might work:: 24 | 25 | from batchglm.models.glm_nb import Model as NBModel 26 | from batchglm.train.numpy.glm_nb import Estimator as NBEstimator 27 | from batchglm.utils.input import InputDataGLM 28 | 29 | input_data = InputDataGLM(data=data_matrix, design_loc=_design_loc, design_scale=_design_scale, as_dask=as_dask) 30 | model = NBModel(input_data=input_data) 31 | estimator = NBEstimator(model=model, init_location="standard", init_scale="standard") 32 | estimator.initialize() 33 | estimator.train_sequence(training_strategy="DEFAULT") 34 | # Now you can perform statistical tests, for example, on parameters like model.theta_location. 35 | 36 | Currently implemented models: 37 | 38 | Negative Binomial 39 | ~~~~~~~~~~~~~~~~~ 40 | 41 | .. autosummary:: 42 | :toctree: . 43 | 44 | models.glm_nb.Model 45 | train.numpy.glm_nb.Estimator 46 | 47 | Normal 48 | ~~~~~~~~~~~~~~~~~ 49 | .. autosummary:: 50 | :toctree: . 51 | 52 | models.glm_norm.Model 53 | train.numpy.glm_norm.Estimator 54 | Poisson 55 | ~~~~~~~~~~~~~~~~~ 56 | .. autosummary:: 57 | :toctree: . 58 | 59 | models.glm_poisson.Model 60 | train.numpy.glm_poisson.Estimator 61 | 62 | Planned or Incomplete Models: 63 | 64 | Beta 65 | ~~~~~~~~~~~~~~~~~ 66 | 67 | Data Utilities 68 | ----------------------------------- 69 | We also provide some data utilities for working with things like design and constraint matrices. 70 | 71 | .. autosummary:: 72 | :toctree: . 73 | 74 | utils.data.bin_continuous_covariate 75 | utils.data.constraint_matrix_from_string 76 | utils.data.constraint_system_from_star 77 | utils.data.design_matrix 78 | utils.data.preview_coef_names 79 | utils.data.string_constraints_from_dict 80 | utils.data.view_coef_names 81 | utils.input.InputDataGLM 82 | -------------------------------------------------------------------------------- /docs/authors.rst: -------------------------------------------------------------------------------- 1 | ======= 2 | Credits 3 | ======= 4 | 5 | Development Lead 6 | ---------------- 7 | 8 | * Mario Picciani 9 | 10 | Contributors 11 | ------------ 12 | 13 | None yet. Why not be the first? 14 | -------------------------------------------------------------------------------- /docs/code_of_conduct.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../CODE_OF_CONDUCT.rst 2 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # mypy: ignore-errors 3 | # batchglm documentation build configuration file 4 | # 5 | # If extensions (or modules to document with autodoc) are in another 6 | # directory, add these directories to sys.path here. If the directory is 7 | # relative to the documentation root, use os.path.abspath to make it 8 | # absolute, like shown here. 9 | # 10 | import os 11 | import sys 12 | 13 | sys.path.insert(0, os.path.abspath("..")) 14 | 15 | 16 | # -- General configuration --------------------------------------------- 17 | 18 | # If your documentation needs a minimal Sphinx version, state it here. 19 | # needs_sphinx = '1.0' 20 | 21 | # Add any Sphinx extension module names here, as strings. They can be 22 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones. 23 | 24 | # Add 'sphinx_automodapi.automodapi' if you want to build modules 25 | extensions = [ 26 | "sphinx.ext.viewcode", 27 | "sphinx.ext.autodoc", 28 | "sphinx.ext.autosummary", 29 | "sphinx.ext.napoleon", 30 | "sphinx_click", 31 | "sphinx_rtd_dark_mode", 32 | ] 33 | 34 | default_dark_mode = True 35 | 36 | # Add any paths that contain templates here, relative to this directory. 37 | templates_path = ["_templates"] 38 | 39 | # The suffix(es) of source filenames. 40 | source_suffix = ".rst" 41 | 42 | # The master toctree document. 43 | master_doc = "index" 44 | 45 | # General information about the project. 46 | project = "batchglm" 47 | copyright = "2022, Mario Picciani" 48 | author = "Mario Picciani" 49 | 50 | # The version info for the project you're documenting, acts as replacement 51 | # for |version| and |release|, also used in various other places throughout 52 | # the built documents. 53 | # 54 | # The short X.Y version. 55 | version = "0.7.4" 56 | # The full version, including alpha/beta/rc tags. 57 | release = "0.7.4" 58 | 59 | # The language for content autogenerated by Sphinx. Refer to documentation 60 | # for a list of supported languages. 61 | # 62 | # This is also used if you do content translation via gettext catalogs. 63 | # Usually you set "language" from the command line for these cases. 64 | language = None 65 | 66 | # List of patterns, relative to source directory, that match files and 67 | # directories to ignore when looking for source files. 68 | # This patterns also effect to html_static_path and html_extra_path 69 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] 70 | 71 | # The name of the Pygments (syntax highlighting) style to use. 72 | pygments_style = "sphinx" 73 | 74 | # If true, `todo` and `todoList` produce output, else they produce nothing. 75 | todo_include_todos = False 76 | 77 | 78 | # -- Options for HTML output ------------------------------------------- 79 | 80 | # The theme to use for HTML and HTML Help pages. See the documentation for 81 | # a list of builtin themes. 82 | # 83 | html_theme = "sphinx_rtd_theme" 84 | 85 | # Theme options are theme-specific and customize the look and feel of a 86 | # theme further. For a list of options available for each theme, see the 87 | # documentation. 88 | # 89 | # html_theme_options = {} 90 | 91 | # Add any paths that contain custom static files (such as style sheets) here, 92 | # relative to this directory. They are copied after the builtin static files, 93 | # so a file named "default.css" will overwrite the builtin "default.css". 94 | html_static_path = ["_static"] 95 | 96 | 97 | # -- Options for HTMLHelp output --------------------------------------- 98 | 99 | # Output file base name for HTML help builder. 100 | htmlhelp_basename = "batchglmdoc" 101 | 102 | 103 | # -- Options for LaTeX output ------------------------------------------ 104 | 105 | latex_elements = { 106 | # The paper size ("letterpaper" or "a4paper"). 107 | # 108 | # "papersize": "letterpaper", 109 | # The font size ("10pt", "11pt" or "12pt"). 110 | # 111 | # "pointsize": "10pt", 112 | # Additional stuff for the LaTeX preamble. 113 | # 114 | # "preamble": "", 115 | # Latex figure (float) alignment 116 | # 117 | # "figure_align": "htbp", 118 | } 119 | 120 | # Grouping the document tree into LaTeX files. List of tuples 121 | # (source start file, target name, title, author, documentclass 122 | # [howto, manual, or own class]). 123 | latex_documents = [ 124 | ( 125 | master_doc, 126 | "batchglm.tex", 127 | "batchglm Documentation", 128 | "Mario Picciani", 129 | "manual", 130 | ), 131 | ] 132 | 133 | 134 | # -- Options for manual page output ------------------------------------ 135 | 136 | # One entry per manual page. List of tuples 137 | # (source start file, name, description, authors, manual section). 138 | man_pages = [ 139 | ( 140 | master_doc, 141 | "batchglm", 142 | "batchglm Documentation", 143 | [author], 144 | 1, 145 | ) 146 | ] 147 | 148 | autodoc_typehints = "description" 149 | 150 | 151 | # -- Options for Texinfo output ---------------------------------------- 152 | 153 | # Grouping the document tree into Texinfo files. List of tuples 154 | # (source start file, target name, title, author, 155 | # dir menu entry, description, category) 156 | texinfo_documents = [ 157 | ( 158 | master_doc, 159 | "batchglm", 160 | "batchglm Documentation", 161 | author, 162 | "batchglm", 163 | "One line description of project.", 164 | "Miscellaneous", 165 | ), 166 | ] 167 | 168 | html_css_files = [ 169 | "custom_cookietemple.css", 170 | ] 171 | -------------------------------------------------------------------------------- /docs/contributing.rst: -------------------------------------------------------------------------------- 1 | Contributor Guide 2 | ================= 3 | 4 | Thank you for your interest in improving this project. 5 | This project is open-source under the `BSD license`_ and 6 | highly welcomes contributions in the form of bug reports, feature requests, and pull requests. 7 | 8 | Here is a list of important resources for contributors: 9 | 10 | - `Source Code`_ 11 | - `Documentation`_ 12 | - `Issue Tracker`_ 13 | - `Code of Conduct`_ 14 | 15 | .. _BSD license: https://opensource.org/licenses/BSD 16 | .. _Source Code: https://github.com/theislab/batchglm 17 | .. _Documentation: https://batchglm.readthedocs.io/ 18 | .. _Issue Tracker: https://github.com/theislab/batchglm/issues 19 | 20 | How to report a bug 21 | ------------------- 22 | 23 | Report bugs on the `Issue Tracker`_. 24 | 25 | 26 | How to request a feature 27 | ------------------------ 28 | 29 | Request features on the `Issue Tracker`_. 30 | 31 | 32 | How to set up your development environment 33 | ------------------------------------------ 34 | 35 | You need Python 3.7+ and the following tools: 36 | 37 | - Poetry_ 38 | - Nox_ 39 | - nox-poetry_ 40 | 41 | You can install them with: 42 | 43 | .. code:: console 44 | 45 | $ pip install poetry nox nox-poetry 46 | 47 | Install the package with development requirements: 48 | 49 | .. code:: console 50 | 51 | $ make install 52 | 53 | You can now run an interactive Python session, 54 | or the command-line interface: 55 | 56 | .. code:: console 57 | 58 | $ poetry run python 59 | $ poetry run batchglm 60 | 61 | .. _Poetry: https://python-poetry.org/ 62 | .. _Nox: https://nox.thea.codes/ 63 | .. _nox-poetry: https://nox-poetry.readthedocs.io/ 64 | 65 | 66 | How to test the project 67 | ----------------------- 68 | 69 | Run the full test suite: 70 | 71 | .. code:: console 72 | 73 | $ nox 74 | 75 | List the available Nox sessions: 76 | 77 | .. code:: console 78 | 79 | $ nox --list-sessions 80 | 81 | You can also run a specific Nox session. 82 | For example, invoke the unit test suite like this: 83 | 84 | .. code:: console 85 | 86 | $ nox --session=tests 87 | 88 | Unit tests are located in the ``tests`` directory, 89 | and are written using the pytest_ testing framework. 90 | 91 | .. _pytest: https://pytest.readthedocs.io/ 92 | 93 | 94 | How to submit changes 95 | --------------------- 96 | 97 | Open a `pull request`_ to submit changes to this project against the ``development`` branch. 98 | 99 | Your pull request needs to meet the following guidelines for acceptance: 100 | 101 | - The Nox test suite must pass without errors and warnings. 102 | - Include unit tests. This project maintains a high code coverage. 103 | - If your changes add functionality, update the documentation accordingly. 104 | 105 | To run linting and code formatting checks before committing your change, you can install pre-commit as a Git hook by running the following command: 106 | 107 | .. code:: console 108 | 109 | $ nox --session=pre-commit -- install 110 | 111 | It is recommended to open an issue before starting work on anything. 112 | This will allow a chance to talk it over with the owners and validate your approach. 113 | 114 | .. _pull request: https://github.com/theislab/batchglm/pulls 115 | .. _Code of Conduct: CODE_OF_CONDUCT.rst 116 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../README.rst 2 | =========================== 3 | Welcome to batchglm's documentation! 4 | ========================================================== 5 | 6 | .. toctree:: 7 | :maxdepth: 2 8 | :caption: Contents: 9 | 10 | readme 11 | installation 12 | api/index 13 | contributing 14 | authors 15 | code_of_conduct 16 | 17 | Indices and tables 18 | ================== 19 | * :ref:`genindex` 20 | * :ref:`modindex` 21 | * :ref:`search` 22 | -------------------------------------------------------------------------------- /docs/installation.rst: -------------------------------------------------------------------------------- 1 | .. highlight:: shell 2 | 3 | ============ 4 | Installation 5 | ============ 6 | 7 | 8 | Stable release 9 | -------------- 10 | 11 | To install batchglm, run this command in your terminal: 12 | 13 | .. code-block:: console 14 | 15 | $ pip install batchglm 16 | 17 | This is the preferred method to install batchglm, as it will always install the most recent stable release. 18 | 19 | If you don't have `pip`_ installed, this `Python installation guide`_ can guide 20 | you through the process. 21 | 22 | .. _pip: https://pip.pypa.io 23 | .. _Python installation guide: http://docs.python-guide.org/en/latest/starting/installation/ 24 | 25 | 26 | From sources 27 | ------------ 28 | 29 | The sources for batchglm can be downloaded from the `Github repo`_. 30 | Please note that you require `poetry`_ to be installed. 31 | 32 | You can either clone the public repository: 33 | 34 | .. code-block:: console 35 | 36 | $ git clone git://github.com/theislab/batchglm 37 | 38 | Or download the `tarball`_: 39 | 40 | .. code-block:: console 41 | 42 | $ curl -OJL https://github.com/theislab/batchglm/tarball/master 43 | 44 | Once you have a copy of the source, you can install it with: 45 | 46 | 47 | $ make install 48 | 49 | 50 | .. _Github repo: https://github.com/theislab/batchglm 51 | .. _tarball: https://github.com/theislab/batchglm/tarball/master 52 | .. _poetry: https://python-poetry.org/ 53 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=python -msphinx 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | set SPHINXPROJ=batchglm 13 | 14 | if "%1" == "" goto help 15 | 16 | %SPHINXBUILD% >NUL 2>NUL 17 | if errorlevel 9009 ( 18 | echo. 19 | echo.The Sphinx module was not found. Make sure you have Sphinx installed, 20 | echo.then set the SPHINXBUILD environment variable to point to the full 21 | echo.path of the 'sphinx-build' executable. Alternatively you may add the 22 | echo.Sphinx directory to PATH. 23 | echo. 24 | echo.If you don't have Sphinx installed, grab it from 25 | echo.http://sphinx-doc.org/ 26 | exit /b 1 27 | ) 28 | 29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 30 | goto end 31 | 32 | :help 33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 34 | 35 | :end 36 | popd 37 | -------------------------------------------------------------------------------- /docs/readme.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../README.rst 2 | -------------------------------------------------------------------------------- /docs/reference.rst: -------------------------------------------------------------------------------- 1 | Reference 2 | ========= 3 | 4 | .. contents:: 5 | :local: 6 | :backlinks: none 7 | 8 | 9 | .. automodule:: batchglm.__main__ 10 | :members: -------------------------------------------------------------------------------- /docs/references.rst: -------------------------------------------------------------------------------- 1 | References 2 | ========== 3 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | Sphinx>=4.0.1 2 | sphinx_rtd_theme>=0.5.2 3 | sphinx-rtd-dark-mode>=1.2.1 4 | sphinx-automodapi>=0.13 5 | sphinx_click>=3.0.0 6 | click>=8.0.1 7 | -------------------------------------------------------------------------------- /docs/tutorials.rst: -------------------------------------------------------------------------------- 1 | Tutorials 2 | ========= 3 | 4 | 5 | 6 | Fitting distributions 7 | --------------------- 8 | 9 | 10 | How to fit a `GLM `__. 11 | 12 | 13 | 14 | Other 15 | ----- 16 | 17 | Linear regression example using Tensorflow `GLM `__. 18 | 19 | 20 | -------------------------------------------------------------------------------- /docs/usage.rst: -------------------------------------------------------------------------------- 1 | Usage 2 | ===== 3 | 4 | .. click:: batchglm.__main__:main 5 | :prog: batchglm 6 | :nested: full -------------------------------------------------------------------------------- /makefiles/Linux.mk: -------------------------------------------------------------------------------- 1 | .PHONY: clean clean-test clean-pyc clean-build docs help 2 | .DEFAULT_GOAL := help 3 | 4 | define BROWSER_PYSCRIPT 5 | import os, webbrowser, sys 6 | 7 | from urllib.request import pathname2url 8 | 9 | webbrowser.open("file://" + pathname2url(os.path.abspath(sys.argv[1]))) 10 | endef 11 | export BROWSER_PYSCRIPT 12 | 13 | define PRINT_HELP_PYSCRIPT 14 | import re, sys 15 | 16 | for line in sys.stdin: 17 | match = re.match(r'^([a-zA-Z_-]+):.*?## (.*)$$', line) 18 | if match: 19 | target, help = match.groups() 20 | print("%-20s %s" % (target, help)) 21 | endef 22 | export PRINT_HELP_PYSCRIPT 23 | 24 | BROWSER := python -c "$$BROWSER_PYSCRIPT" 25 | 26 | help: 27 | @python -c "$$PRINT_HELP_PYSCRIPT" < $(MAKEFILE_LIST) 28 | 29 | clean: clean-build clean-pyc clean-test ## remove all build, test, coverage and Python artifacts 30 | 31 | clean-build: ## remove build artifacts 32 | rm -fr build/ 33 | rm -fr dist/ 34 | rm -fr .eggs/ 35 | find . -name '*.egg-info' -exec rm -fr {} + 36 | find . -name '*.egg' -exec rm -f {} + 37 | 38 | clean-pyc: ## remove Python file artifacts 39 | find . -name '*.pyc' -exec rm -f {} + 40 | find . -name '*.pyo' -exec rm -f {} + 41 | find . -name '*~' -exec rm -f {} + 42 | find . -name '__pycache__' -exec rm -fr {} + 43 | 44 | clean-test: ## remove test and coverage artifacts 45 | rm -fr .tox/ 46 | rm -f .coverage 47 | rm -fr htmlcov/ 48 | rm -fr .pytest_cache 49 | 50 | lint: ## check style with flake8 51 | flake8 batchglm tests 52 | 53 | test: ## run tests quickly with the default Python 54 | pytest 55 | 56 | test-all: ## run tests on every Python version with tox 57 | nox 58 | 59 | coverage: ## check code coverage quickly with the default Python 60 | coverage run --source batchglm -m pytest 61 | coverage report -m 62 | coverage html 63 | $(BROWSER) htmlcov/index.html 64 | 65 | docs: ## generate Sphinx HTML documentation, including API docs 66 | rm -f docs/batchglm.rst 67 | rm -f docs/modules.rst 68 | sphinx-apidoc -o docs/ batchglm 69 | $(MAKE) -C docs clean 70 | $(MAKE) -C docs html 71 | $(BROWSER) docs/_build/html/index.html 72 | 73 | servedocs: docs ## compile the docs watching for changes 74 | watchmedo shell-command -p '*.rst' -c '$(MAKE) -C docs html' -R -D . 75 | 76 | release: dist ## package and upload a release 77 | poetry release 78 | 79 | dist: clean-build clean-pyc ## builds source and wheel package 80 | poetry build 81 | 82 | install: clean-build clean-pyc ## install the package to the active Python's site-packages 83 | poetry install 84 | -------------------------------------------------------------------------------- /makefiles/Windows.mk: -------------------------------------------------------------------------------- 1 | .PHONY: clean clean-test clean-pyc clean-build docs help 2 | .DEFAULT_GOAL := help 3 | 4 | define BROWSER_PYSCRIPT 5 | import os, webbrowser, sys 6 | 7 | from urllib.request import pathname2url 8 | 9 | webbrowser.open("file://" + pathname2url(os.path.abspath(sys.argv[1]))) 10 | endef 11 | export BROWSER_PYSCRIPT 12 | 13 | define PRINT_HELP_PYSCRIPT 14 | import re, sys 15 | 16 | for line in sys.stdin: 17 | match = re.match(r'^([a-zA-Z_-]+):.*?## (.*)$$', line) 18 | if match: 19 | target, help = match.groups() 20 | print("%-20s %s" % (target, help)) 21 | endef 22 | export PRINT_HELP_PYSCRIPT 23 | 24 | BROWSER := python -c "$$BROWSER_PYSCRIPT" 25 | 26 | help: 27 | @python -c "$$PRINT_HELP_PYSCRIPT" < $(MAKEFILE_LIST) 28 | 29 | clean: clean-build clean-pyc clean-test ## remove all build, test, coverage and Python artifacts 30 | 31 | clean-build: ## remove build artifacts 32 | if exist build rd /s /q build 33 | if exist build rd /s /q dist 34 | if exist build rd /s /q .eggs 35 | for /d /r . %%d in (*.egg-info) do @if exist "%%d" echo "%%d" && rd /s/q "%%d" 36 | del /q /s /f .\*.egg 37 | 38 | 39 | clean-pyc: ## remove Python file artifacts 40 | del /s /f /q .\*.pyc 41 | del /s /f /q .\*.pyo 42 | del /s /f /q .\*~ 43 | for /d /r . %%d in (*__pycache__) do @if exist "%%d" echo "%%d" && rd /s/q "%%d" 44 | 45 | clean-test: ## remove test and coverage artifacts 46 | if exist .tox rd /s /q .tox 47 | if exist .coverage rd /s /q .coverage 48 | if exist htmlcov rd /s /q htmlcov 49 | if exist .pytest_cache rd /s /q .pytest_cache 50 | 51 | lint: ## check style with flake8 52 | flake8 batchglm tests 53 | 54 | test: ## run tests quickly with the default Python 55 | pytest 56 | 57 | test-all: ## run tests on every Python version with tox 58 | tox 59 | 60 | coverage: ## check code coverage quickly with the default Python 61 | coverage run --source batchglm -m pytest 62 | coverage report -m 63 | coverage html 64 | $(BROWSER) htmlcov\index.html 65 | 66 | docs: ## generate Sphinx HTML documentation, including API docs 67 | del /f /q docs\batchglm.rst 68 | del /f /q docs\modules.rst 69 | sphinx-apidoc -o docs batchglm 70 | $(MAKE) -C docs clean 71 | $(MAKE) -C docs html 72 | $(BROWSER) docs\_build\html\index.html 73 | 74 | servedocs: docs ## compile the docs watching for changes 75 | watchmedo shell-command -p '*.rst' -c '$(MAKE) -C docs html' -R -D . 76 | 77 | release: dist ## package and upload a release 78 | poetry release 79 | 80 | dist: clean-build clean-pyc ## builds source and wheel package 81 | poetry build 82 | 83 | install: clean-build clean-pyc ## install the package to the active Python's site-packages 84 | poetry install 85 | -------------------------------------------------------------------------------- /noxfile.py: -------------------------------------------------------------------------------- 1 | """Nox sessions.""" 2 | import shutil 3 | import sys 4 | from pathlib import Path 5 | from textwrap import dedent 6 | 7 | import nox 8 | from rich import print 9 | 10 | try: 11 | from nox_poetry import Session, session 12 | except ImportError: 13 | print("[bold red]Did not find nox-poetry installed in your current environment!") 14 | print("[bold blue]Try installing it using [bold green]pip install nox-poetry [bold blue]! ") 15 | sys.exit(1) 16 | 17 | package = "batchglm" 18 | python_versions = ["3.8", "3.9"] 19 | nox.options.sessions = ( 20 | "pre-commit", 21 | "safety", 22 | "mypy", 23 | "tests", 24 | "xdoctest", 25 | "docs-build", 26 | ) 27 | 28 | 29 | def activate_virtualenv_in_precommit_hooks(session: Session) -> None: 30 | """Activate virtualenv in hooks installed by pre-commit. 31 | 32 | This function patches git hooks installed by pre-commit to activate the 33 | session's virtual environment. This allows pre-commit to locate hooks in 34 | that environment when invoked from git. 35 | 36 | Args: 37 | session: The Session object. 38 | """ 39 | if session.bin is None: 40 | return 41 | 42 | virtualenv = session.env.get("VIRTUAL_ENV") 43 | if virtualenv is None: 44 | return 45 | 46 | hookdir = Path(".git") / "hooks" 47 | if not hookdir.is_dir(): 48 | return 49 | 50 | for hook in hookdir.iterdir(): 51 | if hook.name.endswith(".sample") or not hook.is_file(): 52 | continue 53 | 54 | text = hook.read_text() 55 | bindir = repr(session.bin)[1:-1] # strip quotes 56 | if not (Path("A") == Path("a") and bindir.lower() in text.lower() or bindir in text): 57 | continue 58 | 59 | lines = text.splitlines() 60 | if not (lines[0].startswith("#!") and "python" in lines[0].lower()): 61 | continue 62 | 63 | header = dedent( 64 | f"""\ 65 | import os 66 | os.environ["VIRTUAL_ENV"] = {virtualenv!r} 67 | os.environ["PATH"] = os.pathsep.join(( 68 | {session.bin!r}, 69 | os.environ.get("PATH", ""), 70 | )) 71 | """ 72 | ) 73 | 74 | lines.insert(1, header) 75 | hook.write_text("\n".join(lines)) 76 | 77 | 78 | @session(name="pre-commit", python=python_versions) 79 | def precommit(session: Session) -> None: 80 | """Lint using pre-commit.""" 81 | args = session.posargs or ["run", "--all-files"] 82 | session.install( 83 | "black", 84 | "darglint", 85 | "flake8", 86 | "flake8-bandit", 87 | "flake8-bugbear", 88 | "flake8-docstrings", 89 | "flake8-rst-docstrings", 90 | "pep8-naming", 91 | "pre-commit", 92 | "pre-commit-hooks", 93 | "reorder-python-imports", 94 | ) 95 | session.run("pre-commit", *args) 96 | if args and args[0] == "install": 97 | activate_virtualenv_in_precommit_hooks(session) 98 | 99 | 100 | @session(python=python_versions) 101 | def safety(session: Session) -> None: 102 | """Scan dependencies for insecure packages.""" 103 | requirements = session.poetry.export_requirements() 104 | session.install("safety") 105 | session.run("safety", "check", "--full-report", f"--file={requirements}") 106 | 107 | 108 | @session(python=python_versions) 109 | def mypy(session: Session) -> None: 110 | """Type-check using mypy.""" 111 | args = session.posargs or ["batchglm", "tests", "docs/conf.py"] 112 | session.install(".") 113 | session.install("mypy", "pytest", "types-pkg-resources", "types-requests", "types-attrs") 114 | session.run("mypy", *args) 115 | 116 | 117 | @session(python=python_versions) 118 | def tests(session: Session) -> None: 119 | """Run the test suite.""" 120 | session.install(".") 121 | session.install("coverage[toml]", "pytest", "pygments") 122 | try: 123 | session.run("coverage", "run", "--parallel", "-m", "pytest", *session.posargs) 124 | finally: 125 | if session.interactive: 126 | session.notify("coverage") 127 | 128 | 129 | @session 130 | def coverage(session: Session) -> None: 131 | """Produce the coverage report.""" 132 | # Do not use session.posargs unless this is the only session. 133 | nsessions = len(session._runner.manifest) # type: ignore[attr-defined] 134 | has_args = session.posargs and nsessions == 1 135 | args = session.posargs if has_args else ["report", "-i"] 136 | 137 | session.install("coverage[toml]") 138 | 139 | if not has_args and any(Path().glob(".coverage.*")): 140 | session.run("coverage", "combine") 141 | 142 | session.run("coverage", *args) 143 | 144 | 145 | @session(python=python_versions) 146 | def typeguard(session: Session) -> None: 147 | """Runtime type checking using Typeguard.""" 148 | session.install(".") 149 | session.install("pytest", "typeguard", "pygments") 150 | session.run("pytest", f"--typeguard-packages={package}", *session.posargs) 151 | 152 | 153 | @session(python=python_versions) 154 | def xdoctest(session: Session) -> None: 155 | """Run examples with xdoctest.""" 156 | args = session.posargs or ["all"] 157 | session.install(".") 158 | session.install("xdoctest[colors]") 159 | session.run("python", "-m", "xdoctest", package, *args) 160 | 161 | 162 | @session(name="docs-build", python=python_versions) 163 | def docs_build(session: Session) -> None: 164 | """Build the documentation.""" 165 | args = session.posargs or ["docs", "docs/_build"] 166 | session.install(".") 167 | session.install("sphinx", "sphinx-click", "sphinx-rtd-theme", "sphinx-rtd-dark-mode") 168 | 169 | build_dir = Path("docs", "_build") 170 | if build_dir.exists(): 171 | shutil.rmtree(build_dir) 172 | 173 | session.run("sphinx-build", *args) 174 | 175 | 176 | @session(python=python_versions) 177 | def docs(session: Session) -> None: 178 | """Build and serve the documentation with live reloading on file changes.""" 179 | args = session.posargs or ["--open-browser", "docs", "docs/_build"] 180 | session.install(".") 181 | session.install("sphinx", "sphinx-autobuild", "sphinx-click", "sphinx-rtd-theme", "sphinx-rtd-dark-mode") 182 | 183 | build_dir = Path("docs", "_build") 184 | if build_dir.exists(): 185 | shutil.rmtree(build_dir) 186 | 187 | session.run("sphinx-autobuild", *args) 188 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "batchglm" 3 | version = "0.7.4" # <> 4 | description = "batchglm. A cookietemple based ." 5 | authors = ["Mario Picciani "] 6 | license = "BSD" 7 | readme = "README.rst" 8 | homepage = "https://github.com/theislab/batchglm" 9 | repository = "https://github.com/theislab/batchglm" 10 | documentation = "https://batchglm.readthedocs.io" 11 | packages = [ 12 | { include = "batchglm" }, 13 | ] 14 | classifiers = [ 15 | "Programming Language :: Python :: 3.6", 16 | "Programming Language :: Python :: 3.7", 17 | "Programming Language :: Python :: 3.8", 18 | "Programming Language :: Python :: 3.9", 19 | ] 20 | 21 | 22 | [tool.poetry.dependencies] 23 | python = ">=3.8.0, <3.10.0" 24 | click = "^8.0.0" 25 | rich = "^10.3.0" 26 | PyYAML = "^5.4.1" 27 | dask = "2021.4.1" # https://github.com/theislab/diffxpy/issues/194 28 | numpy = ">=1.22.2" 29 | patsy = "^0.5.2" 30 | scipy = "^1.7.3" 31 | pandas = "^1.4.0" 32 | anndata = "^0.7.8" 33 | sparse = "0.9.1" # https://github.com/theislab/diffxpy/issues/194 34 | matplotlib = "^3.5.1" 35 | sphinx-autodoc-typehints = "^1.16.0" 36 | seaborn = "^0.11.2" 37 | bandit = "1.7.2" 38 | 39 | [tool.poetry.dev-dependencies] 40 | pytest = "^6.2.3" 41 | coverage = {extras = ["toml"], version = "^5.3"} 42 | safety = "^1.9.0" 43 | typeguard = "^2.12.0" 44 | xdoctest = {extras = ["colors"], version = "^0.15.0"} 45 | sphinx = "^4.0.2" 46 | sphinx-autobuild = "^2021.3.14" 47 | pre-commit = "^2.11.1" 48 | flake8 = "^3.8.4" 49 | black = ">=21.12b0" 50 | flake8-bandit = "^2.1.2" 51 | flake8-bugbear = "^21.4.3" 52 | flake8-docstrings = "^1.5.0" 53 | flake8-rst-docstrings = "^0.2.3" 54 | pep8-naming = "^0.11.1" 55 | darglint = "^1.5.8" 56 | reorder-python-imports = "^2.5.0" 57 | pre-commit-hooks = "^4.0.1" 58 | sphinx-rtd-theme = "^0.5.0" 59 | sphinx-click = "^3.0.0" 60 | Pygments = "^2.8.1" 61 | types-pkg-resources = "^0.1.2" 62 | types-requests = "^2.25.2" 63 | types-attrs = "^19.1.0" 64 | sphinx-rtd-dark-mode = "^1.2.3" 65 | Jinja2 = "^3.0.1" 66 | mypy = "^0.910" 67 | matplotlib = "^3.5.1" 68 | nox = "^2022.1.7" 69 | cookietemple = "^1.3.11" 70 | nox-poetry = "^0.9.0" 71 | 72 | [tool.poetry.scripts] 73 | batchglm = "batchglm.__main__:main" 74 | 75 | [tool.black] 76 | line-length = 120 77 | 78 | [tool.mypy] 79 | strict = false 80 | pretty = true 81 | show_column_numbers = true 82 | show_error_codes = true 83 | show_error_context = true 84 | ignore_missing_imports = true 85 | exclude = "_version.py" 86 | 87 | [tool.isort] 88 | multi_line_output=3 89 | include_trailing_comma=true 90 | balanced_wrapping=true 91 | line_length=120 92 | 93 | [tool.coverage.paths] 94 | source = ["batchglm", "*/site-packages"] 95 | 96 | [tool.coverage.run] 97 | branch = true 98 | source = ["batchglm"] 99 | 100 | [tool.coverage.report] 101 | show_missing = true 102 | 103 | [build-system] 104 | requires = ["poetry-core>=1.0.0"] 105 | build-backend = "poetry.core.masonry.api" 106 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | anndata==0.7.8 2 | dask==2021.3.0 3 | numpy>=1.16.4 4 | pandas==1.1.5 5 | patsy==0.5.2 6 | pytest==6.2.5 7 | scipy>=1.2.1 8 | sparse==0.9.1 9 | toolz==0.11.2 10 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [versioneer] 2 | VCS = git 3 | style = pep440 4 | versionfile_source = batchglm/_version.py 5 | versionfile_build = batchglm/_version.py 6 | tag_prefix = 7 | 8 | [build_ext] 9 | inplace = 1 10 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import find_packages, setup 2 | 3 | import versioneer 4 | 5 | author = "David S. Fischer, Florian R. Hölzlwimmer, Sabrina Richter" 6 | author_email = "david.fischer@helmholtz-muenchen.de" 7 | description = "Fast and scalable fitting of over-determined generalized-linear models (GLMs)" 8 | 9 | with open("README.md", "r") as fh: 10 | long_description = fh.read() 11 | 12 | setup( 13 | name="batchglm", 14 | author=author, 15 | author_email=author_email, 16 | description=description, 17 | long_description=long_description, 18 | long_description_content_type="text/markdown", 19 | packages=find_packages(), 20 | install_requires=["anndata", "numpy>=1.16.4", "scipy>=1.2.1", "pandas", "dask", "toolz", "patsy", "sparse", "dask"], 21 | extras_require={ 22 | "plotting_deps": ["matplotlib", "seaborn"], 23 | "docs": [ 24 | "sphinx", 25 | "sphinx-autodoc-typehints", 26 | "sphinx_rtd_theme", 27 | "jinja2", 28 | "docutils", 29 | ], 30 | }, 31 | version=versioneer.get_version(), 32 | cmdclass=versioneer.get_cmdclass(), 33 | ) 34 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | """Test suite for the batchglm package.""" 2 | -------------------------------------------------------------------------------- /tests/numpy/test_accuracy.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import unittest 3 | 4 | import numpy as np 5 | from utils import get_estimator, get_generated_model 6 | 7 | from batchglm import pkg_constants 8 | from batchglm.models.base_glm import ModelGLM 9 | from batchglm.train.numpy.base_glm import EstimatorGlm 10 | 11 | logger = logging.getLogger("batchglm") 12 | 13 | NB_OPTIMIZERS = ["GD", "ADAM", "ADAGRAD", "RMSPROP", "NR", "NR_TR", "IRLS", "IRLS_GD", "IRLS_TR", "IRLS_GD_TR"] 14 | NORM_OPTIMIZERS = ["GD", "ADAM", "ADAGRAD", "RMSPROP", "NR", "NR_TR", "IRLS", "IRLS_TR"] 15 | BETA_OPTIMIZERS = ["GD", "ADAM", "ADAGRAD", "RMSPROP", "NR", "NR_TR"] 16 | 17 | 18 | pkg_constants.TRUST_REGION_T1 = 0.5 19 | pkg_constants.TRUST_REGION_T2 = 1.5 20 | pkg_constants.CHOLESKY_LSTSQS = True 21 | pkg_constants.CHOLESKY_LSTSQS_BATCHED = True 22 | pkg_constants.JACOBIAN_MODE = "analytic" 23 | 24 | 25 | class TestAccuracy(unittest.TestCase): 26 | def eval_estimation(self, estimator: EstimatorGlm): 27 | mean_thres_location = 0.2 28 | mean_thres_scale = 0.2 29 | std_thres_location = 1 30 | std_thres_scale = 1 31 | 32 | def deviation_theta(true: np.ndarray, pred: np.ndarray, mean_thres: float, std_thres: float) -> bool: 33 | relative_deviation = (pred - true) / true 34 | mean = np.mean(relative_deviation) 35 | std = np.std(relative_deviation) 36 | logger.info(f"Relative deviation theta location: {mean} (mean), {std} (std)") 37 | return np.abs(mean) <= mean_thres and std <= std_thres 38 | 39 | success = True 40 | if estimator.train_loc: 41 | success = deviation_theta( 42 | true=estimator.model_container.model._theta_location, 43 | pred=estimator.model_container.theta_location, 44 | mean_thres=mean_thres_location, 45 | std_thres=std_thres_location, 46 | ) 47 | if estimator.train_scale: 48 | success &= deviation_theta( 49 | true=estimator.model_container.model._theta_scale, 50 | pred=estimator.model_container.theta_scale, 51 | mean_thres=mean_thres_scale, 52 | std_thres=std_thres_scale, 53 | ) 54 | return success 55 | 56 | def _test_accuracy(self, estimator: EstimatorGlm) -> bool: 57 | """Runs the estimator to fit the model and evaluates with respect to the simulated parameters.""" 58 | estimator.initialize() 59 | estimator.train_sequence(training_strategy="DEFAULT") 60 | success = self.eval_estimation(estimator) 61 | if not success: 62 | logger.warning("Estimator did not yield exact results") 63 | return success 64 | 65 | 66 | class TestAccuracyNB(TestAccuracy): 67 | def test_accuracy_rand_theta(self): 68 | """ 69 | This tests randTheta simulated data with 2 conditions and 4 batches sparse and dense. 70 | """ 71 | dense_model = get_generated_model( 72 | noise_model="nb", num_conditions=2, num_batches=4, sparse=False, mode="randTheta" 73 | ) 74 | sparse_model = get_generated_model( 75 | noise_model="nb", num_conditions=2, num_batches=4, sparse=True, mode="randTheta" 76 | ) 77 | dense_estimator = get_estimator( 78 | noise_model="nb", model=dense_model, init_location="standard", init_scale="standard" 79 | ) 80 | assert self._test_accuracy(dense_estimator) 81 | 82 | sparse_estimator = get_estimator( 83 | noise_model="nb", model=sparse_model, init_location="standard", init_scale="standard" 84 | ) 85 | assert self._test_accuracy(sparse_estimator) 86 | 87 | def test_accuracy_const_theta(self): 88 | """ 89 | This tests constTheta simulated data with 2 conditions and 0 batches sparse and dense. 90 | """ 91 | dense_model = get_generated_model( 92 | noise_model="nb", num_conditions=2, num_batches=0, sparse=False, mode="constTheta" 93 | ) 94 | sparse_model = get_generated_model( 95 | noise_model="nb", num_conditions=2, num_batches=0, sparse=True, mode="constTheta" 96 | ) 97 | 98 | dense_estimator = get_estimator( 99 | noise_model="nb", model=dense_model, init_location="standard", init_scale="standard" 100 | ) 101 | assert self._test_accuracy(dense_estimator) 102 | 103 | sparse_estimator = get_estimator( 104 | noise_model="nb", model=sparse_model, init_location="standard", init_scale="standard" 105 | ) 106 | assert self._test_accuracy(sparse_estimator) 107 | 108 | 109 | class TestAccuracyPoisson(TestAccuracy): 110 | def test_accuracy_rand_theta(self): 111 | """ 112 | This tests randTheta simulated data with 2 conditions and 4 batches sparse and dense. 113 | """ 114 | dense_model = get_generated_model( 115 | noise_model="poisson", num_conditions=2, num_batches=4, sparse=False, mode="randTheta" 116 | ) 117 | sparse_model = get_generated_model( 118 | noise_model="poisson", num_conditions=2, num_batches=4, sparse=True, mode="randTheta" 119 | ) 120 | dense_estimator = get_estimator( 121 | noise_model="poisson", model=dense_model, init_location="standard", init_scale="standard" 122 | ) 123 | assert self._test_accuracy(dense_estimator) 124 | 125 | sparse_estimator = get_estimator( 126 | noise_model="poisson", model=sparse_model, init_location="standard", init_scale="standard" 127 | ) 128 | assert self._test_accuracy(sparse_estimator) 129 | 130 | def test_accuracy_const_theta(self): 131 | """ 132 | This tests constTheta simulated data with 2 conditions and 0 batches sparse and dense. 133 | """ 134 | dense_model = get_generated_model( 135 | noise_model="poisson", num_conditions=2, num_batches=0, sparse=False, mode="constTheta" 136 | ) 137 | sparse_model = get_generated_model( 138 | noise_model="poisson", num_conditions=2, num_batches=0, sparse=True, mode="constTheta" 139 | ) 140 | 141 | dense_estimator = get_estimator( 142 | noise_model="poisson", model=dense_model, init_location="standard", init_scale="standard" 143 | ) 144 | assert self._test_accuracy(dense_estimator) 145 | 146 | sparse_estimator = get_estimator( 147 | noise_model="poisson", model=sparse_model, init_location="standard", init_scale="standard" 148 | ) 149 | assert self._test_accuracy(sparse_estimator) 150 | 151 | 152 | class TestAccuracyNorm(TestAccuracy): 153 | def test_accuracy_rand_theta(self): 154 | """ 155 | This tests randTheta simulated data with 2 conditions and 4 batches sparse and dense. 156 | """ 157 | dense_model = get_generated_model( 158 | noise_model="norm", num_conditions=2, num_batches=4, sparse=False, mode="randTheta" 159 | ) 160 | sparse_model = get_generated_model( 161 | noise_model="norm", num_conditions=2, num_batches=4, sparse=True, mode="randTheta" 162 | ) 163 | dense_estimator = get_estimator(noise_model="norm", model=dense_model) 164 | assert self._test_accuracy(dense_estimator) 165 | 166 | sparse_estimator = get_estimator(noise_model="norm", model=sparse_model) 167 | assert self._test_accuracy(sparse_estimator) 168 | 169 | def test_accuracy_const_theta(self): 170 | """ 171 | This tests constTheta simulated data with 2 conditions and 0 batches sparse and dense. 172 | """ 173 | dense_model = get_generated_model( 174 | noise_model="norm", num_conditions=2, num_batches=0, sparse=False, mode="constTheta" 175 | ) 176 | sparse_model = get_generated_model( 177 | noise_model="norm", num_conditions=2, num_batches=0, sparse=True, mode="constTheta" 178 | ) 179 | 180 | dense_estimator = get_estimator(noise_model="norm", model=dense_model) 181 | assert self._test_accuracy(dense_estimator) 182 | 183 | sparse_estimator = get_estimator(noise_model="norm", model=sparse_model) 184 | assert self._test_accuracy(sparse_estimator) 185 | 186 | 187 | if __name__ == "__main__": 188 | unittest.main() 189 | -------------------------------------------------------------------------------- /tests/numpy/test_accuracy_extreme_values.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import unittest 3 | from typing import List, Optional, Union 4 | 5 | import numpy as np 6 | from test_accuracy import TestAccuracy 7 | from utils import get_estimator, get_generated_model 8 | 9 | logger = logging.getLogger("batchglm") 10 | # logging.getLogger("batchglm").setLevel(logging.WARNING) 11 | 12 | 13 | class _TestAccuracyXtremeAll(TestAccuracy): 14 | """ 15 | Test whether numerical extremes throw error in initialisation or during first training steps. 16 | """ 17 | 18 | def _test_accuracy_extreme_values(self, idx: Union[List[int], int, np.ndarray], val: float, noise_model: str): 19 | model = get_generated_model(noise_model=noise_model, num_conditions=2, num_batches=4, sparse=False, mode=None) 20 | model._x[:, idx] = val 21 | estimator = get_estimator(noise_model=noise_model, model=model, init_location="standard", init_scale="standard") 22 | return self._test_accuracy(estimator) 23 | 24 | def _test_low_values(self, **kwargs): 25 | return self._test_accuracy_extreme_values(idx=0, val=0.0, **kwargs) 26 | 27 | def _test_zero_variance(self, **kwargs): 28 | self._modify_sim(idx=0, val=5.0, **kwargs) 29 | return self.basic_test(batched=False, train_loc=True, train_scale=True, sparse=False) 30 | 31 | 32 | class TestAccuracyXtremeNb(_TestAccuracyXtremeAll): 33 | """ 34 | Test whether optimizers yield exact results for negative binomial distributed data. 35 | """ 36 | 37 | def test_nb(self) -> bool: 38 | np.random.seed(1) 39 | ret_val = self._test_low_values(noise_model="nb") 40 | np.random.seed(1) 41 | return ret_val and self._test_zero_variance(noise_model="nb") 42 | 43 | 44 | class TestAccuracyXtremeNorm(_TestAccuracyXtremeAll): 45 | """ 46 | Test whether optimizers yield exact results for normal distributed data. 47 | """ 48 | 49 | def test_norm(self) -> bool: 50 | logger.error("TestAccuracyXtremeNorm.test_norm()") 51 | logger.info("Normal noise model not implemented for numpy") 52 | 53 | np.random.seed(1) 54 | ret_val = self._test_low_values(noise_model="norm") 55 | np.random.seed(1) 56 | return ret_val and self._test_zero_variance(noise_model="nb") 57 | 58 | 59 | class TestAccuracyXtremeBeta(_TestAccuracyXtremeAll): 60 | """ 61 | Test whether optimizers yield exact results for beta distributed data. 62 | """ 63 | 64 | def test_beta(self) -> bool: 65 | logger.error("TestAccuracyXtremeBeta.test_beta()") 66 | logger.info("Beta noise model not implemented for numpy") 67 | 68 | # np.random.seed(1) 69 | # self._test_low_values(noise_model="beta") 70 | # self._test_zero_variance(noise_model="beta") 71 | return True 72 | 73 | 74 | class TestAccuracyXtremePoisson(_TestAccuracyXtremeAll): 75 | """ 76 | Test whether optimizers yield exact results for Poisson distributed data. 77 | """ 78 | 79 | def test_poisson(self) -> bool: 80 | logger.error("TestAccuracyXtremePoisson.test_poisson()") 81 | logger.info("Poisson noise model not implemented for numpy") 82 | 83 | np.random.seed(1) 84 | self._test_low_values(noise_model="poisson") 85 | # self._test_zero_variance(noise_model="poisson") 86 | return True 87 | 88 | 89 | if __name__ == "__main__": 90 | unittest.main() 91 | -------------------------------------------------------------------------------- /tests/numpy/utils.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | import numpy as np 4 | 5 | from batchglm.models.base_glm import ModelGLM 6 | from batchglm.models.glm_beta import Model as BetaModel 7 | from batchglm.models.glm_nb import Model as NBModel 8 | from batchglm.models.glm_norm import Model as NormModel 9 | from batchglm.models.glm_poisson import Model as PoissonModel 10 | from batchglm.train.numpy.base_glm import EstimatorGlm 11 | from batchglm.train.numpy.glm_nb import Estimator as NBEstimator 12 | from batchglm.train.numpy.glm_norm import Estimator as NormEstimator 13 | from batchglm.train.numpy.glm_poisson import Estimator as PoissonEstimator 14 | 15 | 16 | def get_estimator(noise_model: str, **kwargs) -> EstimatorGlm: 17 | if noise_model == "nb": 18 | return NBEstimator(**kwargs) 19 | elif noise_model == "norm": 20 | return NormEstimator(**kwargs) 21 | # estimator = NormEstimator(**kwargs) 22 | elif noise_model == "beta": 23 | raise NotImplementedError("Beta Estimator is not yet implemented.") 24 | # estimator = BetaEstimator(**kwargs) 25 | elif noise_model == "poisson": 26 | return PoissonEstimator(**kwargs) 27 | raise ValueError(f"Noise model {noise_model} not recognized.") 28 | 29 | 30 | def get_model(noise_model: str) -> ModelGLM: 31 | if noise_model is None: 32 | raise ValueError("noise_model is None") 33 | if noise_model == "nb": 34 | return NBModel() 35 | elif noise_model == "norm": 36 | return NormModel() 37 | elif noise_model == "beta": 38 | return BetaModel() 39 | elif noise_model == "poisson": 40 | return PoissonModel() 41 | raise ValueError(f"Noise model {noise_model} not recognized.") 42 | 43 | 44 | def get_generated_model( 45 | noise_model: str, num_conditions: int, num_batches: int, sparse: bool, mode: Optional[str] = None 46 | ) -> ModelGLM: 47 | model = get_model(noise_model=noise_model) 48 | 49 | def random_uniform(low: float, high: float): 50 | return lambda shape: np.random.uniform(low=low, high=high, size=shape) 51 | 52 | def const(offset: float): 53 | return lambda shape: np.zeros(shape) + offset 54 | 55 | if mode is None: 56 | """Sample loc and scale with default functions""" 57 | rand_fn_ave = None 58 | rand_fn_loc = None 59 | rand_fn_scale = None 60 | 61 | elif mode == "randTheta": 62 | 63 | if noise_model in ["nb", "norm", "poisson"]: 64 | # too large mean breaks poisson 65 | rand_fn_ave = random_uniform(10, 1000 if noise_model != "poisson" else 15) 66 | rand_fn_loc = random_uniform(1, 3) 67 | rand_fn_scale = random_uniform(1, 3) 68 | elif noise_model == "beta": 69 | rand_fn_ave = random_uniform(0.1, 0.7) 70 | rand_fn_loc = random_uniform(0.0, 0.15) 71 | rand_fn_scale = random_uniform(0.0, 0.15) 72 | else: 73 | raise ValueError(f"Noise model {noise_model} not recognized.") 74 | 75 | elif mode == "constTheta": 76 | 77 | if noise_model in ["nb", "norm", "poisson"]: 78 | # too large mean breaks poisson 79 | rand_fn_ave = random_uniform(10, 1000 if noise_model != "poisson" else 15) 80 | rand_fn_loc = const(1.0) 81 | rand_fn_scale = const(1.0) 82 | elif noise_model == "beta": 83 | rand_fn_ave = random_uniform(0.1, 0.9) 84 | rand_fn_loc = const(0.05) 85 | rand_fn_scale = const(0.2) 86 | else: 87 | raise ValueError(f"Noise model {noise_model} not recognized.") 88 | 89 | else: 90 | raise ValueError(f"Mode {mode} not recognized.") 91 | 92 | model.generate_artificial_data( 93 | n_obs=2000, 94 | n_vars=100, 95 | num_conditions=num_conditions, 96 | num_batches=num_batches, 97 | intercept_scale=True, 98 | sparse=sparse, 99 | rand_fn_ave=rand_fn_ave, 100 | rand_fn_loc=rand_fn_loc, 101 | rand_fn_scale=rand_fn_scale, 102 | ) 103 | return model 104 | -------------------------------------------------------------------------------- /tests/run_data_utils_test.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import unittest 3 | 4 | import numpy as np 5 | import pandas as pd 6 | 7 | from batchglm.utils.data import constraint_system_from_star 8 | 9 | logger = logging.getLogger("batchglm") 10 | 11 | 12 | class TestConstraintSystemFromStar(unittest.TestCase): 13 | 14 | true_cmat = np.array( 15 | [ 16 | [1.0, 0.0, 0.0, 0.0], 17 | [0.0, 1.0, 0.0, 0.0], 18 | [0.0, 0.0, -1.0, 0.0], 19 | [0.0, 0.0, 1.0, 0.0], 20 | [0.0, 0.0, 0.0, -1.0], 21 | [0.0, 0.0, 0.0, 1.0], 22 | ] 23 | ) 24 | 25 | true_cmat_list = np.array( 26 | [[-1.0, 0.0, 0.0, 0.0], [1.0, 0.0, 0.0, 0.0], [0.0, 1.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0], [0.0, 0.0, 0.0, 1.0]] 27 | ) 28 | 29 | true_cmat_array = np.array([[-1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]) 30 | 31 | true_dmat = np.array( 32 | [ 33 | [1.0, 0.0, 0.0, 1.0, 0.0, 0.0], 34 | [0.0, 1.0, 0.0, 0.0, 1.0, 0.0], 35 | [0.0, 1.0, 0.0, 0.0, 1.0, 0.0], 36 | [1.0, 0.0, 1.0, 0.0, 0.0, 0.0], 37 | [1.0, 0.0, 1.0, 0.0, 0.0, 0.0], 38 | [0.0, 1.0, 0.0, 0.0, 0.0, 1.0], 39 | ] 40 | ) 41 | 42 | true_dmat_list = np.array( 43 | [ 44 | [1.0, 0.0, 1.0, 0.0, 0.0], 45 | [0.0, 1.0, 0.0, 1.0, 0.0], 46 | [0.0, 1.0, 0.0, 1.0, 0.0], 47 | [1.0, 0.0, 0.0, 0.0, 0.0], 48 | [1.0, 0.0, 0.0, 0.0, 0.0], 49 | [0.0, 1.0, 0.0, 0.0, 1.0], 50 | ] 51 | ) 52 | 53 | true_dmat_array = np.array([[1.0, 0.0, 1.0], [0.0, 1.0, 0.0], [1.0, 0.0, 0.0], [0.0, 1.0, 1.0]]) 54 | 55 | true_terms = ["condition", "batch"] 56 | true_coefs = ["condition[0]", "condition[1]", "batch[0]", "batch[1]", "batch[2]", "batch[3]"] 57 | 58 | true_terms_list = ["condition[0]", "condition[1]", "batch[T.1]", "batch[T.2]", "batch[T.3]"] 59 | true_coefs_list = ["condition[0]", "condition[1]", "batch[T.1]", "batch[T.2]", "batch[T.3]"] 60 | 61 | true_terms_array = ["condition[0]", "condition[1]", "batch[T.1]"] 62 | true_coefs_array = ["condition[0]", "condition[1]", "batch[T.1]"] 63 | 64 | # dict tests 65 | 66 | def execute_test_dict(self, *args, **kwargs): 67 | dmat, coef_names, cmat, term_names = constraint_system_from_star(*args, **kwargs) 68 | assert term_names == self.true_terms 69 | assert coef_names == self.true_coefs 70 | assert np.all(np.equal(cmat, self.true_cmat)) 71 | assert np.all(np.equal(dmat, self.true_dmat)) 72 | 73 | def test_constraint_system_dict(self): 74 | formula = "~0 + condition + batch" 75 | sample_description = pd.DataFrame({"condition": [0, 1, 1, 0, 0, 1], "batch": [1, 2, 2, 0, 0, 3]}) 76 | constraints = {"batch": "condition"} 77 | self.execute_test_dict(constraints, sample_description=sample_description, formula=formula) 78 | 79 | # list tests 80 | 81 | def execute_test_list(self, *args, **kwargs): 82 | dmat, coef_names, cmat, term_names = constraint_system_from_star(*args, **kwargs) 83 | assert term_names == self.true_terms_list 84 | assert coef_names == self.true_coefs_list 85 | assert np.all(np.equal(cmat, self.true_cmat_list)) 86 | assert np.all(np.equal(dmat, self.true_dmat_list)) 87 | 88 | def test_constraint_system_list(self): 89 | formula = "~0 + condition + batch" 90 | sample_description = pd.DataFrame({"condition": [0, 1, 1, 0, 0, 1], "batch": [1, 2, 2, 0, 0, 3]}) 91 | constraints = ["condition[0] + condition[1] = 0"] 92 | self.execute_test_list(constraints, sample_description=sample_description, formula=formula) 93 | 94 | def test_constraint_system_list_with_dmat(self): 95 | constraints = ["condition[0] + condition[1] = 0"] 96 | dmat = pd.DataFrame(self.true_dmat_list, columns=self.true_coefs_list) 97 | self.execute_test_list(constraints, dmat=dmat) 98 | 99 | # array tests 100 | 101 | def execute_test_array(self, *args, **kwargs): 102 | dmat, coef_names, cmat, term_names = constraint_system_from_star(*args, **kwargs) 103 | assert term_names == self.true_terms_array 104 | assert coef_names == self.true_coefs_array 105 | assert np.all(np.equal(cmat, self.true_cmat_array)) 106 | assert np.all(np.equal(dmat, self.true_dmat_array)) 107 | 108 | def test_constraint_system_array(self): 109 | formula = "~0 + condition + batch" 110 | sample_description = pd.DataFrame({"condition": [0, 1, 0, 1], "batch": [1, 0, 0, 1]}) 111 | constraints = np.array([[-1, 0, 0], [0, 1, 0], [0, 0, 1]]) 112 | self.execute_test_array(constraints, sample_description=sample_description, formula=formula) 113 | 114 | def test_constraint_system_array_with_dmat(self): 115 | constraints = np.array([[-1, 0, 0], [0, 1, 0], [0, 0, 1]]) 116 | dmat = pd.DataFrame(self.true_dmat_array, columns=self.true_coefs_array) 117 | self.execute_test_array(constraints, dmat=dmat) 118 | 119 | 120 | if __name__ == "__main__": 121 | unittest.main() 122 | -------------------------------------------------------------------------------- /tests/test_main.py: -------------------------------------------------------------------------------- 1 | """Test cases for the __main__ module.""" 2 | import pytest 3 | from click.testing import CliRunner 4 | 5 | from batchglm import __main__ 6 | 7 | 8 | @pytest.fixture 9 | def runner() -> CliRunner: 10 | """Fixture for invoking command-line interfaces.""" 11 | return CliRunner() 12 | 13 | 14 | def test_main_succeeds(runner: CliRunner) -> None: 15 | """It exits with a status code of zero.""" 16 | result = runner.invoke(__main__.main) 17 | assert result.exit_code == 0 18 | -------------------------------------------------------------------------------- /tests/test_types_dmat.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from typing import List, Union 3 | 4 | import dask 5 | import numpy as np 6 | import pandas as pd 7 | import patsy 8 | 9 | from batchglm.utils.input import parse_design 10 | 11 | 12 | def check_np_dask(dmat: Union[np.ndarray, dask.array.core.Array], params: List[str]) -> bool: 13 | parse_design(design_matrix=dmat, param_names=params) 14 | try: # must produce ValueError 15 | parse_design(design_matrix=dmat, param_names=None) 16 | return False 17 | except ValueError as ve: 18 | if not str(ve) == "Provide names when passing design_matrix as np.ndarray or dask.array.core.Array!": 19 | raise 20 | try: # must result in AssertionError 21 | parse_design(design_matrix=dmat, param_names=params[:-1]) 22 | return False 23 | except AssertionError as ae: 24 | if not ( 25 | str(ae) == "Length of provided param_names is not equal to number of coefficients in design_matrix." 26 | or str(ae).startswith("Datatype for design_matrix not understood") 27 | ): 28 | raise 29 | return True 30 | 31 | 32 | def check_pd_patsy(dmat: Union[pd.DataFrame, patsy.design_info.DesignMatrix], params: List[str]) -> bool: 33 | _, ret_params = parse_design(design_matrix=dmat, param_names=None) 34 | if ret_params != params: 35 | return False 36 | 37 | # generate new coefs to test ignoring passed params 38 | new_coef_list = ["a", "b", "c"] 39 | 40 | # param_names should be ignored 41 | _, ret_params = parse_design(design_matrix=dmat, param_names=new_coef_list) 42 | if params != ret_params: 43 | return False 44 | # param_names should be ignored 45 | _, ret_params = parse_design(design_matrix=dmat, param_names=new_coef_list[:-1]) 46 | if params != ret_params: 47 | return False 48 | return True 49 | 50 | 51 | class TestParseDesign(unittest.TestCase): 52 | """ 53 | Test various input data types for parsing of design and constraint matrices. 54 | The method "parse_design" in batchglm.models.base_glm.utils must return Tuple[np.ndarray, List[str]]. 55 | It must fail if no param_names are passed or the length of param_names is not equal to the length of params. 56 | """ 57 | 58 | def test_parse_design(self) -> bool: 59 | # create artificial data 60 | obs, coef = (500, 3) 61 | dmat = np.zeros(shape=(obs, coef)) 62 | coef_list = ["Intercept", "coef_0", "coef_1"] 63 | 64 | # check np 65 | if not (check_np_dask(dmat=dmat, params=coef_list)): 66 | return False 67 | # check dask 68 | if not (check_np_dask(dmat=dask.array.from_array(dmat, chunks=(1000, 1000)), params=coef_list)): 69 | return False 70 | # check pd 71 | pd_coef = pd.DataFrame(dmat, columns=coef_list) 72 | if not (check_pd_patsy(dmat=pd_coef, params=coef_list)): 73 | return False 74 | # check patsy 75 | if not (check_pd_patsy(dmat=patsy.dmatrix("~1 + coef_0 + coef_1", pd_coef), params=coef_list)): 76 | return False 77 | return True 78 | 79 | 80 | if __name__ == "__main__": 81 | unittest.main() 82 | --------------------------------------------------------------------------------