├── .bandit.yml
├── .cookietemple.yml
├── .darglint
├── .editorconfig
├── .flake8
├── .gitattributes
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   ├── feature_request.md
    │   └── general_question.md
    ├── dependabot.yml
    ├── labels.yml
    ├── pull_request_template.md
    ├── release-drafter.yml
    └── workflows
    │   ├── build_package.yml
    │   ├── labeler.yml
    │   ├── main_master_branch_protection.yml
    │   ├── publish_docs.yml
    │   ├── publish_package.yml
    │   ├── release-drafter.yml
    │   ├── run_cookietemple_lint.yml
    │   ├── run_tests.yml
    │   └── sync_project.yml
├── .gitignore
├── .pre-commit-config.yaml
├── .prettierignore
├── .readthedocs.yml
├── CODE_OF_CONDUCT.rst
├── Dockerfile
├── LICENSE
├── MANIFEST.in
├── Makefile
├── README.md
├── README.rst
├── batchglm
    ├── __init__.py
    ├── __main__.py
    ├── _version.py
    ├── api
    │   └── __init__.py
    ├── log_cfg.py
    ├── models
    │   ├── __init__.py
    │   ├── base_glm
    │   │   ├── __init__.py
    │   │   ├── external.py
    │   │   ├── model.py
    │   │   └── utils.py
    │   ├── glm_beta
    │   │   ├── __init__.py
    │   │   ├── external.py
    │   │   ├── model.py
    │   │   └── utils.py
    │   ├── glm_nb
    │   │   ├── __init__.py
    │   │   ├── external.py
    │   │   ├── model.py
    │   │   └── utils.py
    │   ├── glm_norm
    │   │   ├── __init__.py
    │   │   ├── external.py
    │   │   ├── model.py
    │   │   └── utils.py
    │   └── glm_poisson
    │   │   ├── __init__.py
    │   │   ├── external.py
    │   │   ├── model.py
    │   │   └── utils.py
    ├── pkg_constants.py
    ├── py.typed
    ├── train
    │   ├── __init__.py
    │   ├── base
    │   │   ├── __init__.py
    │   │   ├── estimator.py
    │   │   └── model_container.py
    │   └── numpy
    │   │   ├── __init__.py
    │   │   ├── base_glm
    │   │       ├── __init__.py
    │   │       ├── estimator.py
    │   │       ├── external.py
    │   │       ├── model_container.py
    │   │       └── training_strategies.py
    │   │   ├── glm_nb
    │   │       ├── __init__.py
    │   │       ├── estimator.py
    │   │       ├── external.py
    │   │       └── model_container.py
    │   │   ├── glm_norm
    │   │       ├── __init__.py
    │   │       ├── estimator.py
    │   │       ├── external.py
    │   │       ├── model_container.py
    │   │       └── utils.py
    │   │   └── glm_poisson
    │   │       ├── __init__.py
    │   │       ├── estimator.py
    │   │       ├── exceptions.py
    │   │       ├── external.py
    │   │       └── model_container.py
    └── utils
    │   ├── __init__.py
    │   ├── data.py
    │   ├── input.py
    │   ├── linalg.py
    │   └── plotting.py
├── codecov.yml
├── cookietemple.cfg
├── docs
    ├── Makefile
    ├── _static
    │   ├── css
    │   │   └── custom.css
    │   └── custom_cookietemple.css
    ├── api
    │   ├── .gitignore
    │   └── index.rst
    ├── authors.rst
    ├── code_of_conduct.rst
    ├── conf.py
    ├── contributing.rst
    ├── index.rst
    ├── installation.rst
    ├── make.bat
    ├── readme.rst
    ├── reference.rst
    ├── references.rst
    ├── requirements.txt
    ├── tutorials.rst
    └── usage.rst
├── makefiles
    ├── Linux.mk
    └── Windows.mk
├── noxfile.py
├── poetry.lock
├── pyproject.toml
├── requirements.txt
├── setup.cfg
├── setup.py
├── tests
    ├── __init__.py
    ├── numpy
    │   ├── test_accuracy.py
    │   ├── test_accuracy_extreme_values.py
    │   └── utils.py
    ├── run_data_utils_test.py
    ├── test_main.py
    └── test_types_dmat.py
└── versioneer.py


/.bandit.yml:
--------------------------------------------------------------------------------
1 | # (optional) list included tests here:
2 | tests: []
3 | 
4 | # (optional) list skipped tests here:
5 | skips: ["B403", "B404", "B603", "B607"]
6 | 


--------------------------------------------------------------------------------
/.cookietemple.yml:
--------------------------------------------------------------------------------
 1 | cookietemple_version: '1.3.11 # <<COOKIETEMPLE_NO_BUMP>>'
 2 | domain: cli
 3 | language: python
 4 | project_slug: batchglm
 5 | project_slug_no_hyphen: batchglm
 6 | template_version: '2.0.2 # <<COOKIETEMPLE_NO_BUMP>>'
 7 | template_handle: cli-python
 8 | github_username: theislab
 9 | creator_github_username: picciama
10 | is_github_repo: true
11 | is_repo_private: false
12 | is_github_orga: true
13 | github_orga: theislab
14 | full_name: Mario Picciani
15 | email: mario.picciani@tum.de
16 | project_name: batchglm
17 | project_short_description: batchglm. A cookietemple based .
18 | version: 0.7.4
19 | license: BSD
20 | 


--------------------------------------------------------------------------------
/.darglint:
--------------------------------------------------------------------------------
1 | [darglint]
2 | strictness = short
3 | 


--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
 1 | # http://editorconfig.org
 2 | 
 3 | root = true
 4 | 
 5 | [*]
 6 | indent_style = space
 7 | indent_size = 4
 8 | trim_trailing_whitespace = true
 9 | insert_final_newline = true
10 | charset = utf-8
11 | end_of_line = lf
12 | 
13 | [*.bat]
14 | indent_style = tab
15 | end_of_line = crlf
16 | 
17 | [LICENSE]
18 | insert_final_newline = false
19 | 
20 | [Makefile]
21 | indent_style = tab
22 | 


--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
 1 | [flake8]
 2 | select = B,B9,C,E,F,N,RST,S,W
 3 | ignore = C901,E203,F401,RST201,RST301,S101,W503
 4 | max-line-length = 120
 5 | max-complexity = 10
 6 | docstring-convention = all
 7 | docstring_style=sphinx
 8 | per-file-ignores =
 9 | 	tests/*:S101
10 | 	versioneer.py:B,B9,C,N,RST202,S404,S603,W605
11 | 	batchglm/_version.py:B,C,N,S404,S603
12 | 	batchglm/utils/linalg.py:N803
13 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | batchglm/_version.py export-subst
2 | * text=auto eol=lf
3 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a bug report to help us improve
 4 | title: "Bug Summary"
 5 | labels: "bug"
 6 | assignees: ""
 7 | ---
 8 | 
 9 | **Describe the bug**
10 | 
11 | <!-- A clear and concise description of what the bug is. -->
12 | 
13 | **To Reproduce**
14 | 
15 | Steps to reproduce the behavior:
16 | 
17 | 1. ...
18 | 2. ...
19 | 3. ...
20 | 
21 | **Expected behavior**
22 | 
23 | <!-- A clear and concise description of what you expected to happen. -->
24 | 
25 | **System [please complete the following information]:**
26 | 
27 | -   OS: e.g. [Ubuntu 18.04]
28 | -   Language Version: [e.g. Python 3.8]
29 | -   Virtual environment: [e.g. Conda]
30 | 
31 | **Additional context**
32 | 
33 | <!-- Add any other context about the problem here. -->
34 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest a new feature
 4 | title: "Feature Request Summary"
 5 | labels: "enhancement"
 6 | assignees: ""
 7 | ---
 8 | 
 9 | **Is your feature request related to a problem? Please describe.**
10 | 
11 | <!-- A clear and concise description of what the problem is. Ex. I'm always frustrated when ... -->
12 | 
13 | **Describe the solution you would like**
14 | 
15 | <!-- A clear and concise description of what you want to happen. -->
16 | 
17 | **Additional context**
18 | 
19 | <!-- Add any other context or screenshots about the feature request here. -->
20 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/general_question.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: General question
 3 | about: Ask a question about anything related to this project
 4 | title: "Question"
 5 | labels: "question"
 6 | assignees: ""
 7 | ---
 8 | 
 9 | **Question**
10 | 
11 | <!-- Please ask your question here. It can be about the usage of this project, the internals, the implementation or whatever interests you.
12 | Please use the BUG template for bugs and the FEATURE REQUEST template for feature requests. -->
13 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | updates:
 3 |     - package-ecosystem: github-actions
 4 |       directory: "/"
 5 |       schedule:
 6 |           interval: weekly
 7 |       open-pull-requests-limit: 3
 8 |       target-branch: development
 9 |       labels:
10 |           - DEPENDABOT
11 |       commit-message:
12 |           prefix: "[DEPENDABOT]"
13 | 
14 |     - package-ecosystem: pip
15 |       directory: "/.github/workflows"
16 |       schedule:
17 |           interval: weekly
18 |       open-pull-requests-limit: 3
19 |       target-branch: development
20 |       labels:
21 |           - DEPENDABOT
22 |       commit-message:
23 |           prefix: "[DEPENDABOT]"
24 | 
25 |     - package-ecosystem: pip
26 |       directory: "/docs"
27 |       schedule:
28 |           interval: weekly
29 |       open-pull-requests-limit: 3
30 |       target-branch: development
31 |       labels:
32 |           - DEPENDABOT
33 |       commit-message:
34 |           prefix: "[DEPENDABOT]"
35 | 
36 |     - package-ecosystem: pip
37 |       directory: "/"
38 |       schedule:
39 |           interval: weekly
40 |       open-pull-requests-limit: 3
41 |       target-branch: development
42 |       labels:
43 |           - DEPENDABOT
44 |       commit-message:
45 |           prefix: "[DEPENDABOT]"
46 | 


--------------------------------------------------------------------------------
/.github/labels.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | # Labels names are important as they are used by Release Drafter to decide
 3 | # regarding where to record them in changelog or if to skip them.
 4 | #
 5 | # The repository labels will be automatically configured using this file and
 6 | # the GitHub Action https://github.com/marketplace/actions/github-labeler.
 7 | - name: breaking
 8 |   description: Breaking Changes
 9 |   color: bfd4f2
10 | - name: bug
11 |   description: Something isn't working
12 |   color: d73a4a
13 | - name: build
14 |   description: Build System and Dependencies
15 |   color: bfdadc
16 | - name: ci
17 |   description: Continuous Integration
18 |   color: 4a97d6
19 | - name: dependencies
20 |   description: Pull requests that update a dependency file
21 |   color: 0366d6
22 | - name: documentation
23 |   description: Improvements or additions to documentation
24 |   color: 0075ca
25 | - name: duplicate
26 |   description: This issue or pull request already exists
27 |   color: cfd3d7
28 | - name: enhancement
29 |   description: New feature or request
30 |   color: a2eeef
31 | - name: github_actions
32 |   description: Pull requests that update Github_actions code
33 |   color: "000000"
34 | - name: good first issue
35 |   description: Good for newcomers
36 |   color: 7057ff
37 | - name: help wanted
38 |   description: Extra attention is needed
39 |   color: 008672
40 | - name: invalid
41 |   description: This doesn't seem right
42 |   color: e4e669
43 | - name: performance
44 |   description: Performance
45 |   color: "016175"
46 | - name: python
47 |   description: Pull requests that update Python code
48 |   color: 2b67c6
49 | - name: question
50 |   description: Further information is requested
51 |   color: d876e3
52 | - name: refactoring
53 |   description: Refactoring
54 |   color: ef67c4
55 | - name: removal
56 |   description: Removals and Deprecations
57 |   color: 9ae7ea
58 | - name: style
59 |   description: Style
60 |   color: c120e5
61 | - name: testing
62 |   description: Testing
63 |   color: b1fc6f
64 | - name: wontfix
65 |   description: This will not be worked on
66 |   color: ffffff
67 | - name: skip-changelog
68 |   description: Changes that should be omitted from the release notes
69 |   color: ededed
70 | 


--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
 1 | <!-- Many thanks for contributing to this project! -->
 2 | 
 3 | **PR Checklist**
 4 | 
 5 | <!-- Please fill in the appropriate checklist below (delete whatever is not relevant). These are the most common things requested on pull requests (PRs). -->
 6 | 
 7 | -   [ ] This comment contains a description of changes (with reason)
 8 | -   [ ] Referenced issue is linked
 9 | -   [ ] If you've fixed a bug or added code that should be tested, add tests!
10 | -   [ ] Documentation in `docs` is updated
11 | 
12 | **Description of changes**
13 | 
14 | <!-- Please state what you've changed and how it might affect the user. -->
15 | 
16 | **Technical details**
17 | 
18 | <!-- Please state any technical details such as limitations, reasons for additional dependencies, benchmarks etc. here. -->
19 | 
20 | **Additional context**
21 | 
22 | <!-- Add any other context or screenshots here. -->
23 | 


--------------------------------------------------------------------------------
/.github/release-drafter.yml:
--------------------------------------------------------------------------------
 1 | name-template: "0.7.4 🌈" # <<COOKIETEMPLE_FORCE_BUMP>>
 2 | tag-template: 0.7.4 # <<COOKIETEMPLE_FORCE_BUMP>>
 3 | exclude-labels:
 4 |     - "skip-changelog"
 5 | 
 6 | categories:
 7 |     - title: "🚀 Features"
 8 |       labels:
 9 |           - feature
10 |           - enhancement
11 |     - title: "🐛 Bug Fixes"
12 |       labels:
13 |           - fix
14 |           - bugfix
15 |           - bug
16 |     - title: "🧰 Maintenance"
17 |       label: chore
18 |     - title: ":package: Dependencies"
19 |       labels:
20 |           - dependencies
21 |           - build
22 |           - dependabot
23 |           - DEPENDABOT
24 | version-resolver:
25 |     major:
26 |         labels:
27 |             - major
28 |     minor:
29 |         labels:
30 |             - minor
31 |     patch:
32 |         labels:
33 |             - patch
34 |     default: patch
35 | autolabeler:
36 |     - label: chore
37 |       files:
38 |           - "*.md"
39 |       branch:
40 |           - '/docs{0,1}\/.+/'
41 |     - label: bug
42 |       branch:
43 |           - /fix\/.+/
44 |       title:
45 |           - /fix/i
46 |     - label: enhancement
47 |       branch:
48 |           - /feature\/.+/
49 |       body:
50 |           - "/JIRA-[0-9]{1,4}/"
51 | template: |
52 |     ## Changes
53 | 
54 |     $CHANGES
55 | 


--------------------------------------------------------------------------------
/.github/workflows/build_package.yml:
--------------------------------------------------------------------------------
 1 | name: Build batchglm Package
 2 | 
 3 | on: [push, pull_request]
 4 | 
 5 | jobs:
 6 |     build:
 7 |         runs-on: ${{ matrix.os }}
 8 |         if: "!contains(github.event.head_commit.message, '[skip ci]') && !contains(github.event.head_commit.message, '[ci skip]')"
 9 |         strategy:
10 |             matrix:
11 |                 os: [macos-latest, ubuntu-latest, windows-latest]
12 |                 python: [3.8, 3.9]
13 | 
14 |         steps:
15 |             - uses: actions/checkout@v2
16 |               name: Check out source-code repository
17 | 
18 |             - name: Setup Python
19 |               uses: actions/setup-python@v2
20 |               with:
21 |                   python-version: ${{ matrix.python }}
22 | 
23 |             - name: Install Poetry
24 |               run: |
25 |                   pip install poetry
26 |                   poetry --version
27 | 
28 |             - name: Build package
29 |               run: poetry build --ansi
30 | 
31 |             - name: Install required twine packaging dependencies
32 |               run: pip install setuptools wheel twine
33 | 
34 |             - name: Check twine package
35 |               run: twine check dist/*
36 | 


--------------------------------------------------------------------------------
/.github/workflows/labeler.yml:
--------------------------------------------------------------------------------
 1 | name: Labeler
 2 | 
 3 | on:
 4 |     push:
 5 |         branches:
 6 |             - main
 7 |             - master
 8 | 
 9 | jobs:
10 |     labeler:
11 |         runs-on: ubuntu-latest
12 |         steps:
13 |             - name: Check out the repository
14 |               uses: actions/checkout@v2.3.3
15 | 
16 |             - name: Run Labeler
17 |               uses: crazy-max/ghaction-github-labeler@v3.1.1
18 |               with:
19 |                   skip-delete: true
20 | 


--------------------------------------------------------------------------------
/.github/workflows/main_master_branch_protection.yml:
--------------------------------------------------------------------------------
 1 | name: PR to master branch from patch/release branch only
 2 | 
 3 | on:
 4 |     pull_request:
 5 |         branches:
 6 |             - master
 7 |             - main
 8 | 
 9 | jobs:
10 |     check_target:
11 |         runs-on: ubuntu-latest
12 |         name: Check Target branch
13 |         steps:
14 |             # PRs to the repository master branch are only ok if coming from any patch or release branch
15 |             - name: Check PRs
16 |               run: |
17 |                   { [[ $GITHUB_HEAD_REF = *"release"* ]]; } || [[ $GITHUB_HEAD_REF == *"patch"* ]]
18 | 
19 |             # If the above check failed, post a comment on the PR explaining the failure
20 |             # NOTE - this may not work if the PR is coming from a fork, due to limitations in GitHub actions secrets
21 |             - name: Post PR comment
22 |               if: failure()
23 |               uses: mshick/add-pr-comment@v1
24 |               with:
25 |                   message: |
26 |                       Hi @${{ github.event.pull_request.user.login }},
27 | 
28 |                       It looks like this pull-request is has been made against the ${{github.event.pull_request.head.repo.full_name}} `master` or `main` branch.
29 |                       The `master`/`main` branch should always contain code from the latest release.
30 |                       Because of this, PRs to `master`/`main` are only allowed if they come from any ${{github.event.pull_request.head.repo.full_name}} `release` or `patch` branch.
31 | 
32 |                       You do not need to close this PR, you can change the target branch to `development` by clicking the _"Edit"_ button at the top of this page.
33 | 
34 |                       Thanks again for your contribution!
35 |                   repo-token: ${{ secrets.GITHUB_TOKEN }}
36 |                   allow-repeats: false
37 | 
38 |     check_version:
39 |         name: No SNAPSHOT version on master branch
40 |         runs-on: ubuntu-latest
41 |         steps:
42 |             - name: Set up Python
43 |               uses: actions/setup-python@v2
44 |               with:
45 |                   python-version: "3.8"
46 |             # PRs to the repository master branch are only ok if coming from any patch or release branch
47 |             - name: Install mlf-core
48 |               run: pip install mlf-core
49 | 
50 |             - name: Check project version
51 |               run: |
52 |                   PROJECTVERSION=$(mlf-core bump-version --project-version . | tail -n1)
53 |                   echo $PROJECTVERSION;
54 |                   if [[ $PROJECTVERSION == *"SNAPSHOT"* ]];then
55 |                       exit -1
56 |                   else
57 |                       exit 0
58 |                   fi
59 | 
60 |             # If the above check failed, post a comment on the PR explaining the failure
61 |             # NOTE - this may not work if the PR is coming from a fork, due to limitations in GitHub actions secrets
62 |             - name: Post PR comment
63 |               if: failure()
64 |               uses: mshick/add-pr-comment@v1
65 |               with:
66 |                   message: |
67 |                       Hi @${{ github.event.pull_request.user.login }},
68 | 
69 |                       It looks like this pull-request is has been made against the ${{github.event.pull_request.head.repo.full_name}} `master`/`main` branch.
70 |                       A version check determined that you are using a SNAPSHOT version.
71 |                       The `master`/`main` branch should never have any SNAPSHOT versions, since only fully stable code should be on the `master`/`main` branch.
72 |                   repo-token: ${{ secrets.GITHUB_TOKEN }}
73 |                   allow-repeats: false
74 | 


--------------------------------------------------------------------------------
/.github/workflows/publish_docs.yml:
--------------------------------------------------------------------------------
 1 | name: Build Documentation
 2 | 
 3 | on: [push]
 4 | 
 5 | jobs:
 6 |     build:
 7 |         runs-on: ubuntu-latest
 8 | 
 9 |         steps:
10 |             - uses: actions/checkout@v2
11 |               name: Check out source-code repository
12 | 
13 |             - name: Setup Python
14 |               uses: actions/setup-python@v1
15 |               with:
16 |                   python-version: 3.8
17 | 
18 |             - name: Install pip
19 |               run: |
20 |                   python -m pip install --upgrade pip
21 | 
22 |             - name: Install doc dependencies
23 |               run: |
24 |                   pip install -r docs/requirements.txt
25 | 
26 |             - name: Build docs
27 |               run: |
28 |                   cd docs
29 |                   make html
30 | 
31 |             - name: Deploy
32 |               if: ${{ github.ref == 'refs/heads/master' || github.ref == 'refs/heads/main'}}
33 |               uses: peaceiris/actions-gh-pages@v3
34 |               with:
35 |                   github_token: ${{ secrets.GITHUB_TOKEN }}
36 |                   publish_dir: ./docs/_build/html
37 | 


--------------------------------------------------------------------------------
/.github/workflows/publish_package.yml:
--------------------------------------------------------------------------------
 1 | name: Publish batchglm to PyPI
 2 | 
 3 | on:
 4 |     release:
 5 |         types: [published]
 6 | 
 7 | jobs:
 8 |     release:
 9 |         name: Release
10 |         runs-on: ubuntu-latest
11 |         steps:
12 |             - name: Check out the repository
13 |               uses: actions/checkout@v2.3.4
14 |               with:
15 |                   fetch-depth: 2
16 | 
17 |             - name: Set up Python
18 |               uses: actions/setup-python@v2.1.4
19 |               with:
20 |                   python-version: "3.9"
21 | 
22 |             - name: Install Poetry
23 |               run: |
24 |                   pip install poetry
25 |                   poetry --version
26 | 
27 |             - name: Build package
28 |               run: |
29 |                   poetry build --ansi
30 | 
31 |             - name: Publish package on PyPI
32 |               uses: pypa/gh-action-pypi-publish@v1.4.2
33 |               with:
34 |                   # TODO COOKIETEMPLE: Configure your PyPI Token to enable automatic deployment to PyPi on releases
35 |                   # https://help.github.com/en/actions/configuring-and-managing-workflows/creating-and-storing-encrypted-secrets
36 |                   user: __token__
37 |                   password: ${{ secrets.PYPI_TOKEN }}
38 | 


--------------------------------------------------------------------------------
/.github/workflows/release-drafter.yml:
--------------------------------------------------------------------------------
 1 | name: Release Drafter
 2 | on:
 3 |     push:
 4 |         branches:
 5 |             - development
 6 |     pull_request:
 7 |         branches:
 8 |             - development
 9 |         types:
10 |             - opened
11 |             - reopened
12 |             - synchronize
13 | jobs:
14 |     update_release_draft:
15 |         runs-on: ubuntu-latest
16 |         steps:
17 |             - uses: release-drafter/release-drafter@v5
18 |               env:
19 |                   GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}"
20 | 


--------------------------------------------------------------------------------
/.github/workflows/run_cookietemple_lint.yml:
--------------------------------------------------------------------------------
 1 | name: cookietemple lint
 2 | 
 3 | on: [push]
 4 | 
 5 | jobs:
 6 |     run:
 7 |         runs-on: ubuntu-latest
 8 | 
 9 |         steps:
10 |             - uses: actions/checkout@v2
11 |               name: Check out source-code repository
12 | 
13 |             - name: Setup Python
14 |               uses: actions/setup-python@v1
15 |               with:
16 |                   python-version: 3.9
17 | 
18 |             - name: Install cookietemple
19 |               run: pip install cookietemple==1.3.11
20 | 
21 |             - name: Run cookietemple lint
22 |               run: cookietemple lint .
23 | 


--------------------------------------------------------------------------------
/.github/workflows/run_tests.yml:
--------------------------------------------------------------------------------
  1 | name: Run batchglm Tests
  2 | 
  3 | on:
  4 |     - push
  5 |     - pull_request
  6 | 
  7 | jobs:
  8 |     tests:
  9 |         name: ${{ matrix.session }} ${{ matrix.python-version }} / ${{ matrix.os }}
 10 |         runs-on: ${{ matrix.os }}
 11 |         strategy:
 12 |             fail-fast: false
 13 |             matrix:
 14 |                 include:
 15 |                     - {
 16 |                           python-version: 3.8,
 17 |                           os: ubuntu-latest,
 18 |                           session: "pre-commit",
 19 |                       }
 20 |                     - {
 21 |                           python-version: 3.8,
 22 |                           os: ubuntu-latest,
 23 |                           session: "safety",
 24 |                       }
 25 |                     - {
 26 |                           python-version: 3.8,
 27 |                           os: ubuntu-latest,
 28 |                           session: "mypy",
 29 |                       }
 30 |                     - {
 31 |                           python-version: 3.8,
 32 |                           os: ubuntu-latest,
 33 |                           session: "tests",
 34 |                       }
 35 |                     - {
 36 |                           python-version: 3.8,
 37 |                           os: windows-latest,
 38 |                           session: "tests",
 39 |                       }
 40 |                     - {
 41 |                           python-version: 3.8,
 42 |                           os: macos-latest,
 43 |                           session: "tests",
 44 |                       }
 45 |                     - {
 46 |                           python-version: 3.8,
 47 |                           os: ubuntu-latest,
 48 |                           session: "typeguard",
 49 |                       }
 50 |                     - {
 51 |                           python-version: 3.8,
 52 |                           os: ubuntu-latest,
 53 |                           session: "xdoctest",
 54 |                       }
 55 |                     - {
 56 |                           python-version: 3.8,
 57 |                           os: ubuntu-latest,
 58 |                           session: "docs-build",
 59 |                       }
 60 | 
 61 |         env:
 62 |             NOXSESSION: ${{ matrix.session }}
 63 | 
 64 |         steps:
 65 |             - name: Check out the repository
 66 |               uses: actions/checkout@v2.3.4
 67 | 
 68 |             - name: Set up Python ${{ matrix.python-version }}
 69 |               uses: actions/setup-python@v2.2.2
 70 |               with:
 71 |                   python-version: ${{ matrix.python-version }}
 72 | 
 73 |             - name: Install Poetry
 74 |               run: |
 75 |                   pipx install poetry
 76 |                   poetry --version
 77 | 
 78 |             - name: Install nox nox-poetry rich
 79 |               run: |
 80 |                   pipx install nox
 81 |                   pipx inject nox nox-poetry
 82 |                   pipx inject nox rich
 83 |                   nox --version
 84 | 
 85 |             - name: Compute pre-commit cache key
 86 |               if: matrix.session == 'pre-commit'
 87 |               id: pre-commit-cache
 88 |               shell: python
 89 |               run: |
 90 |                   import hashlib
 91 |                   import sys
 92 | 
 93 |                   python = "py{}.{}".format(*sys.version_info[:2])
 94 |                   payload = sys.version.encode() + sys.executable.encode()
 95 |                   digest = hashlib.sha256(payload).hexdigest()
 96 |                   result = "${{ runner.os }}-{}-{}-pre-commit".format(python, digest[:8])
 97 | 
 98 |                   print("::set-output name=result::{}".format(result))
 99 | 
100 |             - name: Restore pre-commit cache
101 |               uses: actions/cache@v2.1.6
102 |               if: matrix.session == 'pre-commit'
103 |               with:
104 |                   path: ~/.cache/pre-commit
105 |                   key: ${{ steps.pre-commit-cache.outputs.result }}-${{ hashFiles('.pre-commit-config.yaml') }}
106 |                   restore-keys: |
107 |                       ${{ steps.pre-commit-cache.outputs.result }}-
108 | 
109 |             - name: Run Nox
110 |               run: nox --force-color --python=${{ matrix.python-version }}
111 | 
112 |             - name: Upload coverage data
113 |               if: always() && matrix.session == 'tests'
114 |               uses: "actions/upload-artifact@v2.2.3"
115 |               with:
116 |                   name: coverage-data
117 |                   path: ".coverage.*"
118 | 
119 |             - name: Upload documentation
120 |               if: matrix.session == 'docs-build'
121 |               uses: actions/upload-artifact@v2.2.4
122 |               with:
123 |                   name: docs
124 |                   path: docs/_build
125 | 
126 |     coverage:
127 |         runs-on: ubuntu-latest
128 |         needs: tests
129 |         steps:
130 |             - name: Check out the repository
131 |               uses: actions/checkout@v2.3.4
132 | 
133 |             - name: Set up Python 3.8
134 |               uses: actions/setup-python@v2.2.2
135 |               with:
136 |                   python-version: 3.8
137 | 
138 |             - name: Install Poetry
139 |               run: |
140 |                   pipx install poetry
141 |                   poetry --version
142 | 
143 |             - name: Install nox nox-poetry rich
144 |               run: |
145 |                   pipx install nox
146 |                   pipx inject nox nox-poetry
147 |                   pipx inject nox rich
148 |                   nox --version
149 | 
150 |             - name: Download coverage data
151 |               uses: actions/download-artifact@v2.0.10
152 |               with:
153 |                   name: coverage-data
154 | 
155 |             - name: Combine coverage data and display human readable report
156 |               run: nox --force-color --session=coverage
157 | 
158 |             - name: Create coverage report
159 |               run: nox --force-color --session=coverage -- xml -i
160 | 
161 |             - name: Upload coverage report
162 |               uses: codecov/codecov-action@v2.1.0
163 | 


--------------------------------------------------------------------------------
/.github/workflows/sync_project.yml:
--------------------------------------------------------------------------------
 1 | name: cookietemple sync
 2 | 
 3 | on:
 4 |     schedule:
 5 |         - cron: "0 1 * * *" # 1 am UTC
 6 |     workflow_dispatch:
 7 | 
 8 | jobs:
 9 |     build:
10 |         runs-on: ubuntu-latest
11 |         steps:
12 |             - name: Setup Python
13 |               uses: actions/setup-python@v1
14 |               with:
15 |                   python-version: 3.8
16 | 
17 |             - name: Install cookietemple
18 |               run: pip install cookietemple
19 | 
20 |             - uses: actions/checkout@v2
21 |               with:
22 |                   fetch-depth: 0
23 |                   token: "${{ secrets.CT_SYNC_TOKEN }}"
24 |               name: Check out source-code repository
25 | 
26 |             - uses: oleksiyrudenko/gha-git-credentials@v2.1
27 |               with:
28 |                   name: "picciama"
29 |                   email: "mario.picciani@tum.de"
30 |                   actor: "picciama"
31 |                   token: "${{ secrets.CT_SYNC_TOKEN}}"
32 | 
33 |             - name: Sync project
34 |               run: cookietemple sync . ${{ secrets.CT_SYNC_TOKEN }} zethson
35 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | cache
  2 | data
  3 | config.ini
  4 | .metadata
  5 | .Rhistory
  6 | playground/*
  7 | resources/*
  8 | **/__pycache__
  9 | **/.DS_Store
 10 | #**/*.ipynb
 11 | tutorials
 12 | 
 13 | !**/.gitignore
 14 | 
 15 | # Byte-compiled / optimized / DLL files
 16 | __pycache__/
 17 | *.py[cod]
 18 | *$py.class
 19 | 
 20 | # C extensions
 21 | *.so
 22 | 
 23 | # Distribution / packaging
 24 | .Python
 25 | build/
 26 | develop-eggs/
 27 | dist/
 28 | downloads/
 29 | eggs/
 30 | .eggs/
 31 | lib/
 32 | lib64/
 33 | parts/
 34 | sdist/
 35 | var/
 36 | wheels/
 37 | pip-wheel-metadata/
 38 | share/python-wheels/
 39 | *.egg-info/
 40 | .installed.cfg
 41 | *.egg
 42 | MANIFEST
 43 | 
 44 | 
 45 | # PyInstaller
 46 | #  Usually these files are written by a python script from a template
 47 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 48 | *.manifest
 49 | *.spec
 50 | 
 51 | # Installer logs
 52 | pip-log.txt
 53 | pip-delete-this-directory.txt
 54 | 
 55 | # Unit test / coverage reports
 56 | htmlcov/
 57 | .tox/
 58 | .nox/
 59 | .coverage
 60 | .coverage.*
 61 | .cache
 62 | nosetests.xml
 63 | coverage.xml
 64 | *.cover
 65 | *.py,cover
 66 | .hypothesis/
 67 | .pytest_cache/
 68 | 
 69 | # Translations
 70 | *.mo
 71 | *.pot
 72 | 
 73 | # Django stuff:
 74 | *.log
 75 | local_settings.py
 76 | db.sqlite3
 77 | db.sqlite3-journal
 78 | 
 79 | # Flask stuff:
 80 | instance/
 81 | .webassets-cache
 82 | 
 83 | # Scrapy stuff:
 84 | .scrapy
 85 | 
 86 | # Sphinx documentation
 87 | docs/_build/
 88 | 
 89 | # PyBuilder
 90 | target/
 91 | 
 92 | # Jupyter Notebook
 93 | .ipynb_checkpoints
 94 | 
 95 | # IPython
 96 | profile_default/
 97 | ipython_config.py
 98 | 
 99 | # pyenv
100 | .python-version
101 | 
102 | 
103 | # pipenv
104 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
105 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
106 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
107 | #   install all needed dependencies.
108 | #Pipfile.lock
109 | 
110 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
111 | __pypackages__/
112 | 
113 | # Celery stuff
114 | celerybeat-schedule
115 | celerybeat.pid
116 | 
117 | # SageMath parsed files
118 | *.sage.py
119 | 
120 | # Environments
121 | .env
122 | .venv
123 | env/
124 | venv/
125 | ENV/
126 | env.bak/
127 | venv.bak/
128 | 
129 | # Spyder project settings
130 | .spyderproject
131 | .spyproject
132 | 
133 | # Rope project settings
134 | .ropeproject
135 | 
136 | # mkdocs documentation
137 | /site
138 | 
139 | # mypy
140 | .mypy_cache/
141 | .dmypy.json
142 | dmypy.json
143 | .pytype/
144 | 
145 | # Pyre type checker
146 | .pyre/
147 | 
148 | # Jetbrains IDE
149 | .idea/
150 | 
151 | # Coala
152 | *.orig
153 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |     - repo: local
 3 |       hooks:
 4 |           - id: black
 5 |             name: black
 6 |             entry: black
 7 |             language: system
 8 |             types: [python]
 9 |             require_serial: true
10 |           - id: check-added-large-files
11 |             name: Check for added large files
12 |             entry: check-added-large-files
13 |             language: system
14 |           - id: check-toml
15 |             name: Check Toml
16 |             entry: check-toml
17 |             language: system
18 |             types: [toml]
19 |           - id: check-yaml
20 |             name: Check Yaml
21 |             entry: check-yaml
22 |             language: system
23 |             types: [yaml]
24 |           - id: end-of-file-fixer
25 |             name: Fix End of Files
26 |             entry: end-of-file-fixer
27 |             language: system
28 |             types: [text]
29 |             stages: [commit, push, manual]
30 |             exclude: docs/
31 |           - id: flake8
32 |             name: flake8
33 |             entry: flake8
34 |             language: system
35 |             types: [python]
36 |             require_serial: true
37 |           - id: trailing-whitespace
38 |             name: Trim Trailing Whitespace
39 |             entry: trailing-whitespace-fixer
40 |             language: system
41 |             types: [text]
42 |             stages: [commit, push, manual]
43 |     - repo: https://github.com/pre-commit/mirrors-prettier
44 |       rev: v2.7.1
45 |       hooks:
46 |           - id: prettier
47 |     - repo: https://github.com/pycqa/isort
48 |       rev: 5.10.1
49 |       hooks:
50 |           - id: isort
51 |             name: isort (python)
52 |             args: ["--profile", "black"]
53 |           - id: isort
54 |             name: isort (cython)
55 |             types: [cython]
56 |           - id: isort
57 |             name: isort (pyi)
58 |             types: [pyi]
59 | 


--------------------------------------------------------------------------------
/.prettierignore:
--------------------------------------------------------------------------------
1 | .cookietemple.yml
2 | 


--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
 1 | # Read the Docs configuration file
 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 3 | 
 4 | version: 2
 5 | 
 6 | # Build documentation in the docs/ directory with Sphinx
 7 | sphinx:
 8 |     configuration: docs/conf.py
 9 | 
10 | # Build documentation with MkDocs
11 | #mkdocs:
12 | #  configuration: mkdocs.yml
13 | 
14 | # Optionally build your docs in additional formats such as PDF and ePub
15 | formats: all
16 | 
17 | # Optionally set the version of Python and requirements required to build your docs
18 | python:
19 |     version: 3.8
20 |     install:
21 |         - requirements: docs/requirements.txt
22 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.rst:
--------------------------------------------------------------------------------
 1 | Contributor Covenant Code of Conduct
 2 | ====================================
 3 | 
 4 | Our Pledge
 5 | ----------
 6 | 
 7 | In the interest of fostering an open and welcoming environment, we as
 8 | contributors and maintainers pledge to making participation in our
 9 | project and our community a harassment-free experience for everyone,
10 | regardless of age, body size, disability, ethnicity, gender identity and
11 | expression, level of experience, nationality, personal appearance, race,
12 | religion, or sexual identity and orientation.
13 | 
14 | Our Standards
15 | -------------
16 | 
17 | Examples of behavior that contributes to creating a positive environment
18 | include:
19 | 
20 | -  Using welcoming and inclusive language
21 | -  Being respectful of differing viewpoints and experiences
22 | -  Gracefully accepting constructive criticism
23 | -  Focusing on what is best for the community
24 | -  Showing empathy towards other community members
25 | 
26 | Examples of unacceptable behavior by participants include:
27 | 
28 | -  The use of sexualized language or imagery and unwelcome sexual
29 |    attention or advances
30 | -  Trolling, insulting/derogatory comments, and personal or political
31 |    attacks
32 | -  Public or private harassment
33 | -  Publishing others’ private information, such as a physical or
34 |    electronic address, without explicit permission
35 | -  Other conduct which could reasonably be considered inappropriate in a
36 |    professional setting
37 | 
38 | Our Responsibilities
39 | --------------------
40 | 
41 | Project maintainers are responsible for clarifying the standards of
42 | acceptable behavior and are expected to take appropriate and fair
43 | corrective action in response to any instances of unacceptable behavior.
44 | 
45 | Project maintainers have the right and responsibility to remove, edit,
46 | or reject comments, commits, code, wiki edits, issues, and other
47 | contributions that are not aligned to this Code of Conduct, or to ban
48 | temporarily or permanently any contributor for other behaviors that they
49 | deem inappropriate, threatening, offensive, or harmful.
50 | 
51 | Scope
52 | -----
53 | 
54 | This Code of Conduct applies both within project spaces and in public
55 | spaces when an individual is representing the project or its community.
56 | Examples of representing a project or community include using an
57 | official project e-mail address, posting via an official social media
58 | account, or acting as an appointed representative at an online or
59 | offline event. Representation of a project may be further defined and
60 | clarified by project maintainers.
61 | 
62 | Enforcement
63 | -----------
64 | 
65 | Instances of abusive, harassing, or otherwise unacceptable behavior may
66 | be reported by opening an issue. The project team
67 | will review and investigate all complaints, and will respond in a way
68 | that it deems appropriate to the circumstances. The project team is
69 | obligated to maintain confidentiality with regard to the reporter of an
70 | incident. Further details of specific enforcement policies may be posted
71 | separately.
72 | 
73 | Project maintainers who do not follow or enforce the Code of Conduct in
74 | good faith may face temporary or permanent repercussions as determined
75 | by other members of the project’s leadership.
76 | 
77 | Attribution
78 | -------------------
79 | 
80 | This Code of Conduct is adapted from the Contributor Covenant, version 1.4,
81 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
82 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.8.1-alpine
 2 | 
 3 | # A few Utilities to able to install C based libraries such as numpy
 4 | RUN apk update
 5 | RUN apk add make automake gcc g++ git
 6 | 
 7 | RUN pip install batchglm
 8 | 
 9 | CMD batchglm
10 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2018, David S. Fischer, Florian R. Hölzlwimmer.
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | * Redistributions of source code must retain the above copyright notice, this
10 |   list of conditions and the following disclaimer.
11 | 
12 | * Redistributions in binary form must reproduce the above copyright notice,
13 |   this list of conditions and the following disclaimer in the documentation
14 |   and/or other materials provided with the distribution.
15 | 
16 | * Neither the name of the copyright holder nor the names of its
17 |   contributors may be used to endorse or promote products derived from
18 |   this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include versioneer.py
2 | include batchglm/_version.py
3 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | ifeq ($(OS),Windows_NT)
2 | include makefiles/Windows.mk
3 | else
4 | include makefiles/Linux.mk
5 | endif
6 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Fast and scalable fitting of over-determined generalized-linear models (GLMs)
 2 | 
 3 | batchglm was developed in the context of [diffxpy](https://github.com/theislab/diffxpy) to allow fast model fitting for differential expression analysis for single-cell RNA-seq data. However, one can use batchglm or its concepts in other scenarios where over-determined GLMs are encountered.
 4 | 
 5 | ```
 6 | pip install -r requirements.txt
 7 | ```
 8 | 
 9 | To run unit tests:
10 | 
11 | ```
12 | pip install -e .
13 | python -m unittest
14 | ```
15 | 
16 | <!--
17 | # Installation
18 | 1. Install [tensorflow](https://www.tensorflow.org/install/), see below. Please use the pip installation if you are unsure.
19 | 2. Clone the GitHub repository of batchglm.
20 | 3. cd into the clone.
21 | 4. pip install -e .
22 | 
23 | ## Tensorflow installation
24 | Tensorflow can be installed like any other package or can be compiled from source to allow for optimization of the software to the given hardware. Compiling tensorflow from source can significantly improve the performance, since this allows tensorflow to make use of all available CPU-specific instructions. Hardware optimization takes longer but is only required once during installation and is recommended if batchglm is used often or on large data sets. We summarize a few key steps here, an extensive up-to-date installation guide can be found here: https://www.tensorflow.org/install/
25 | 
26 | ### Out-of-the-box tensorflow installation
27 | You can install [tensorflow](https://www.tensorflow.org/install/) via pip or via conda.
28 | 
29 | #### pip
30 | - CPU-only: <br/>
31 |   `pip install tensorflow`
32 | - GPU: <br/>
33 |   `pip install tensorflow-gpu`
34 | 
35 | ### Hardware-optimized tensorflow installation (compiling from source)
36 | Please refer to https://www.tensorflow.org/install/. -->
37 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | batchglm
 2 | ===========================
 3 | 
 4 | |PyPI| |Python Version| |License| |Read the Docs| |Build| |Tests| |Codecov| |pre-commit| |Black|
 5 | 
 6 | .. |PyPI| image:: https://img.shields.io/pypi/v/batchglm.svg
 7 |    :target: https://pypi.org/project/batchglm/
 8 |    :alt: PyPI
 9 | .. |Python Version| image:: https://img.shields.io/pypi/pyversions/batchglm
10 |    :target: https://pypi.org/project/batchglm
11 |    :alt: Python Version
12 | .. |License| image:: https://img.shields.io/github/license/theislab/batchglm
13 |    :target: https://opensource.org/licenses/BSD
14 |    :alt: License
15 | .. |Read the Docs| image:: https://img.shields.io/readthedocs/batchglm/latest.svg?label=Read%20the%20Docs
16 |    :target: https://batchglm.readthedocs.io/
17 |    :alt: Read the documentation at https://batchglm.readthedocs.io/
18 | .. |Build| image:: https://github.com/theislab/batchglm/workflows/Build%20batchglm%20Package/badge.svg
19 |    :target: https://github.com/theislab/batchglm/actions?workflow=Package
20 |    :alt: Build Package Status
21 | .. |Tests| image:: https://github.com/theislab/batchglm/workflows/Run%20batchglm%20Tests/badge.svg
22 |    :target: https://github.com/theislab/batchglm/actions?workflow=Tests
23 |    :alt: Run Tests Status
24 | .. |Codecov| image:: https://codecov.io/gh/theislab/batchglm/branch/master/graph/badge.svg
25 |    :target: https://codecov.io/gh/theislab/batchglm
26 |    :alt: Codecov
27 | .. |pre-commit| image:: https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white
28 |    :target: https://github.com/pre-commit/pre-commit
29 |    :alt: pre-commit
30 | .. |Black| image:: https://img.shields.io/badge/code%20style-black-000000.svg
31 |    :target: https://github.com/psf/black
32 |    :alt: Black
33 | 
34 | 
35 | Features
36 | --------
37 | 
38 | - Fit many (i.e a batch!) of GLM's all at once using `numpy` (coming soon: `tensorflow2` or `statsmodels`) with a simple API
39 | - Integrates with and provides utilities for working with familiar libraries like `patsy` and `dask`.
40 | 
41 | Installation
42 | ------------
43 | 
44 | You can install *batchglm* via pip_ from PyPI_:
45 | 
46 | .. code:: console
47 | 
48 |    $ pip install batchglm
49 | 
50 | 
51 | Usage
52 | -----
53 | 
54 | Please see the API documentation for details or the jupyter notebook tutorials (TODO: need notebooks - separate docs?)
55 | 
56 | 
57 | Credits
58 | -------
59 | 
60 | This package was created with cookietemple_ using Cookiecutter_ based on Hypermodern_Python_Cookiecutter_.
61 | 
62 | .. _cookietemple: https://cookietemple.com
63 | .. _Cookiecutter: https://github.com/audreyr/cookiecutter
64 | .. _PyPI: https://pypi.org/
65 | .. _Hypermodern_Python_Cookiecutter: https://github.com/cjolowicz/cookiecutter-hypermodern-python
66 | .. _pip: https://pip.pypa.io/
67 | .. _Usage: https://batchglm.readthedocs.io/en/latest/usage.html
68 | 


--------------------------------------------------------------------------------
/batchglm/__init__.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from . import models, pkg_constants, train, utils
 4 | 
 5 | # from ._version import get_versions
 6 | from .log_cfg import logger, setup_logging, unconfigure_logging
 7 | 
 8 | # __version__ = _version.get_versions()["version"]
 9 | # del get_versions
10 | 
11 | # we need this for the sparse package, see https://github.com/pydata/sparse/issues/10
12 | os.environ["SPARSE_AUTO_DENSIFY"] = "1"
13 | 


--------------------------------------------------------------------------------
/batchglm/__main__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """Command-line interface."""
 3 | import click
 4 | from rich import traceback
 5 | 
 6 | 
 7 | @click.command()
 8 | @click.version_option(version="0.7.4", message=click.style("batchglm Version: 0.7.4"))
 9 | def main() -> None:
10 |     """batchglm."""
11 | 
12 | 
13 | if __name__ == "__main__":
14 |     traceback.install()
15 |     main(prog_name="batchglm")  # pragma: no cover
16 | 


--------------------------------------------------------------------------------
/batchglm/api/__init__.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from .. import models, pkg_constants, train, utils
 4 | 
 5 | # from .._version import get_versions
 6 | from ..log_cfg import logger, setup_logging, unconfigure_logging
 7 | 
 8 | # __version__ = get_versions()["version"]
 9 | # del get_versions
10 | 
11 | # we need this for the sparse package, see https://github.com/pydata/sparse/issues/10
12 | os.environ["SPARSE_AUTO_DENSIFY"] = "1"
13 | 


--------------------------------------------------------------------------------
/batchglm/log_cfg.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import sys
 3 | 
 4 | logger = logging.getLogger(".".join(__name__.split(".")[:-1]))
 5 | 
 6 | _is_interactive = bool(getattr(sys, "ps1", sys.flags.interactive))
 7 | 
 8 | 
 9 | def unconfigure_logging():
10 |     if logger.hasHandlers():
11 |         for handler in logger.handlers:
12 |             logger.removeHandler(handler)
13 | 
14 |     logger.setLevel(logging.NOTSET)
15 | 
16 | 
17 | def setup_logging(verbosity="WARNING", stream=None, format=logging.BASIC_FORMAT):
18 |     unconfigure_logging()
19 | 
20 |     if isinstance(verbosity, str):
21 |         verbosity = getattr(logging, verbosity)
22 | 
23 |     logger.setLevel(verbosity)
24 | 
25 |     if stream is not None:
26 |         if isinstance(stream, str):
27 |             if stream.lower() == "stdout":
28 |                 stream = sys.stdout
29 |             elif stream.lower() == "stderr":
30 |                 stream = sys.stderr
31 |             else:
32 |                 raise ValueError("Unknown stream %s" % stream)
33 | 
34 |         handler = logging.StreamHandler(stream)
35 |         handler.setFormatter(logging.Formatter(format, None))
36 |         logger.addHandler(handler)
37 | 
38 | 
39 | # If we are in an interactive environment (like Jupyter), set loglevel to INFO and pipe the output to stdout.
40 | if _is_interactive:
41 |     setup_logging(logging.INFO)
42 | else:
43 |     setup_logging(logging.WARNING)
44 | 


--------------------------------------------------------------------------------
/batchglm/models/__init__.py:
--------------------------------------------------------------------------------
1 | from . import base_glm, glm_beta, glm_nb, glm_norm, glm_poisson
2 | 


--------------------------------------------------------------------------------
/batchglm/models/base_glm/__init__.py:
--------------------------------------------------------------------------------
1 | # from .estimator import _EstimatorGLM
2 | from ...utils.input import InputDataGLM
3 | from .model import ModelGLM
4 | from .utils import closedform_glm_mean, closedform_glm_scale
5 | 


--------------------------------------------------------------------------------
/batchglm/models/base_glm/external.py:
--------------------------------------------------------------------------------
1 | import batchglm.utils.data as data_utils
2 | from batchglm import pkg_constants
3 | from batchglm.utils.linalg import groupwise_solve_lm
4 | 


--------------------------------------------------------------------------------
/batchglm/models/base_glm/utils.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import math
  3 | from typing import Callable, List, Optional, Tuple, Union
  4 | 
  5 | import dask.array
  6 | import numpy as np
  7 | import pandas as pd
  8 | import patsy
  9 | import scipy.sparse
 10 | import sparse
 11 | 
 12 | from .external import groupwise_solve_lm
 13 | 
 14 | logger = logging.getLogger("batchglm")
 15 | 
 16 | 
 17 | def densify(arr):
 18 |     if isinstance(arr, dask.array.core.Array):
 19 |         arr = arr.compute()
 20 |     if isinstance(arr, sparse.COO) or isinstance(arr, scipy.sparse.csr_matrix):
 21 |         return arr.todense()
 22 |     else:
 23 |         return arr
 24 | 
 25 | 
 26 | def generate_sample_description(
 27 |     num_observations: int,
 28 |     num_conditions: int,
 29 |     num_batches: int,
 30 |     intercept_scale: bool,
 31 |     shuffle_assignments: bool,
 32 | ) -> Tuple[patsy.DesignMatrix, patsy.DesignMatrix, pd.DataFrame]:
 33 |     """Build a sample description.
 34 | 
 35 |     :param num_observations: Number of observations to simulate.
 36 |     :param num_conditions: number of conditions; will be repeated like [1,2,3,1,2,3]
 37 |     :param num_batches: number of conditions; will be repeated like [1,1,2,2,3,3]
 38 |     :param intercept_scale: If true, returns a single-coefficient design matrix (formula = "~1").
 39 |         If false, returns a design matrix identical to the loc model.
 40 |     :param shuffle_assignments: If true, shuffle the assignments in the xarray.
 41 |         UNSUPPORTED: Must be removed as it is disfunctional!!!
 42 |     """
 43 |     if num_conditions == 0:
 44 |         num_conditions = 1
 45 |     if num_batches == 0:
 46 |         num_batches = 1
 47 | 
 48 |     # condition column
 49 |     reps_conditions = math.ceil(num_observations / num_conditions)
 50 |     conditions = np.squeeze(np.tile([np.arange(num_conditions)], reps_conditions))
 51 |     conditions = conditions[range(num_observations)].astype(str)
 52 | 
 53 |     # batch column
 54 |     reps_batches = math.ceil(num_observations / num_batches)
 55 |     batches = np.repeat(range(num_batches), reps_batches)
 56 |     batches = batches[range(num_observations)].astype(str)
 57 |     sample_description = pd.DataFrame({"condition": conditions, "batch": batches})
 58 | 
 59 |     if shuffle_assignments:
 60 |         sample_description = sample_description.isel(
 61 |             observations=np.random.permutation(sample_description.observations.values)
 62 |         )
 63 | 
 64 |     sim_design_loc = patsy.dmatrix("~1+condition+batch", sample_description)
 65 | 
 66 |     if intercept_scale:
 67 |         sim_design_scale = patsy.dmatrix("~1", sample_description)
 68 |     else:
 69 |         sim_design_scale = sim_design_loc
 70 | 
 71 |     return sim_design_loc, sim_design_scale, sample_description
 72 | 
 73 | 
 74 | def closedform_glm_mean(
 75 |     x: Union[np.ndarray, scipy.sparse.csr_matrix, dask.array.core.Array],
 76 |     dmat: Union[np.ndarray, dask.array.core.Array],
 77 |     constraints: Optional[Union[np.ndarray, dask.array.core.Array]] = None,
 78 |     size_factors: Optional[np.ndarray] = None,
 79 |     link_fn: Optional[Callable] = None,
 80 |     inv_link_fn: Optional[Callable] = None,
 81 | ):
 82 |     r"""
 83 |     Calculate a closed-form solution for the mean parameters of GLMs.
 84 | 
 85 |     :param x: The input data array
 86 |     :param dmat: some design matrix
 87 |     :param constraints: tensor (all parameters x dependent parameters)
 88 |         Tensor that encodes how complete parameter set which includes dependent
 89 |         parameters arises from indepedent parameters: all = <constraints, indep>.
 90 |         This form of constraints is used in vector generalized linear models (VGLMs).
 91 |     :param size_factors: size factors for X
 92 |     :param link_fn: linker function for GLM
 93 |     :param inv_link_fn: inverse linker function for GLM
 94 |     :return: tuple: (groupwise_means, mu, rmsd)
 95 |     """
 96 |     if size_factors is not None:
 97 |         x = np.divide(x, size_factors)
 98 | 
 99 |     def apply_fun(grouping):
100 | 
101 |         groupwise_means = np.asarray(
102 |             np.vstack([np.mean(densify(x[np.where(grouping == g)[0], :]), axis=0) for g in np.unique(grouping)])
103 |         )
104 |         if link_fn is None:
105 |             return groupwise_means
106 |         else:
107 |             return link_fn(groupwise_means)
108 | 
109 |     linker_groupwise_means, mu, rmsd, rank, s = groupwise_solve_lm(
110 |         dmat=dmat, apply_fun=apply_fun, constraints=constraints
111 |     )
112 |     if inv_link_fn is not None:
113 |         return inv_link_fn(linker_groupwise_means), mu, rmsd
114 |     else:
115 |         return linker_groupwise_means, mu, rmsd
116 | 
117 | 
118 | def closedform_glm_scale(
119 |     x: Union[np.ndarray, scipy.sparse.csr_matrix, dask.array.core.Array],
120 |     design_scale: Union[np.ndarray, dask.array.core.Array],
121 |     constraints: Optional[Union[np.ndarray, dask.array.core.Array]] = None,
122 |     size_factors: Optional[np.ndarray] = None,
123 |     groupwise_means: Optional[np.ndarray] = None,
124 |     link_fn: Optional[Callable] = None,
125 |     inv_link_fn: Optional[Callable] = None,
126 |     compute_scales_fun: Optional[Callable] = None,
127 | ):
128 |     r"""
129 |     Calculate a closed-form solution for the scale parameters of GLMs.
130 | 
131 |     :param x: The sample data
132 |     :param design_scale: design matrix for scale
133 |     :param constraints: some design constraints
134 |     :param size_factors: size factors for X
135 |     :param groupwise_means: optional, in case if already computed this can be specified to spare double-calculation
136 |     :param compute_scales_fun: TODO
137 |     :param inv_link_fn: TODO
138 |     :param link_fn: TODO
139 |     :return: tuple (groupwise_scales, logphi, rmsd)
140 |     """
141 |     if size_factors is not None:
142 |         x = x / size_factors
143 | 
144 |     # to circumvent nonlocal error
145 |     provided_groupwise_means = groupwise_means
146 | 
147 |     def apply_fun(grouping):
148 |         # Calculate group-wise means if not supplied. These are required for variance and MME computation.
149 |         if provided_groupwise_means is None:
150 |             gw_means = np.asarray(
151 |                 np.vstack([np.mean(densify(x[np.where(grouping == g)[0], :]), axis=0) for g in np.unique(grouping)])
152 |             )
153 |         else:
154 |             gw_means = provided_groupwise_means
155 | 
156 |         # calculated variance via E(x)^2 or directly depending on whether `mu` was specified
157 |         if isinstance(x, scipy.sparse.csr_matrix):
158 |             expect_xsq = np.asarray(
159 |                 np.vstack(
160 |                     [
161 |                         np.asarray(np.mean(densify(x[np.where(grouping == g)[0], :]).power(2), axis=0))
162 |                         for g in np.unique(grouping)
163 |                     ]
164 |                 )
165 |             )
166 |         else:
167 |             expect_xsq = np.vstack(
168 |                 [np.mean(np.square(densify(x[np.where(grouping == g)[0], :])), axis=0) for g in np.unique(grouping)]
169 |             )
170 |         expect_x_sq = np.square(gw_means)
171 |         variance = expect_xsq - expect_x_sq
172 | 
173 |         if compute_scales_fun is not None:
174 |             groupwise_scales = compute_scales_fun(variance, gw_means)
175 |         else:
176 |             groupwise_scales = variance
177 | 
178 |         if link_fn is not None:
179 |             return link_fn(groupwise_scales)
180 |         else:
181 |             return groupwise_scales
182 | 
183 |     linker_groupwise_scales, scaleparam, rmsd, rank, _ = groupwise_solve_lm(
184 |         dmat=design_scale, apply_fun=apply_fun, constraints=constraints
185 |     )
186 |     if inv_link_fn is not None:
187 |         return inv_link_fn(linker_groupwise_scales), scaleparam, rmsd
188 |     else:
189 |         return linker_groupwise_scales, scaleparam, rmsd
190 | 


--------------------------------------------------------------------------------
/batchglm/models/glm_beta/__init__.py:
--------------------------------------------------------------------------------
1 | from .model import Model
2 | 


--------------------------------------------------------------------------------
/batchglm/models/glm_beta/external.py:
--------------------------------------------------------------------------------
1 | import batchglm.utils.data as data_utils
2 | from batchglm import pkg_constants
3 | from batchglm.models.base_glm import ModelGLM, closedform_glm_mean, closedform_glm_scale
4 | from batchglm.utils.linalg import groupwise_solve_lm
5 | 


--------------------------------------------------------------------------------
/batchglm/models/glm_beta/model.py:
--------------------------------------------------------------------------------
  1 | import abc
  2 | from typing import Any, Callable, Dict, Optional, Tuple, Union
  3 | 
  4 | import dask
  5 | import numpy as np
  6 | 
  7 | from .external import ModelGLM
  8 | 
  9 | 
 10 | class Model(ModelGLM, metaclass=abc.ABCMeta):
 11 |     """
 12 |     Generalized Linear Model (GLM) with beta distributed noise, logit link for location and log link for scale.
 13 |     """
 14 | 
 15 |     def link_loc(self, data) -> Union[np.ndarray, dask.array.core.Array]:
 16 |         return np.log(1 / (1 / data - 1))
 17 | 
 18 |     def inverse_link_loc(self, data) -> Union[np.ndarray, dask.array.core.Array]:
 19 |         return 1 / (1 + np.exp(-data))
 20 | 
 21 |     def link_scale(self, data) -> Union[np.ndarray, dask.array.core.Array]:
 22 |         return np.log(data)
 23 | 
 24 |     def inverse_link_scale(self, data) -> Union[np.ndarray, dask.array.core.Array]:
 25 |         return np.exp(data)
 26 | 
 27 |     @property
 28 |     def eta_loc(self) -> Union[np.ndarray, dask.array.core.Array]:
 29 |         eta = np.matmul(self.design_loc, self.theta_location_constrained)
 30 |         assert self.size_factors is None, "size factors not allowed"
 31 |         return eta
 32 | 
 33 |     def eta_loc_j(self, j) -> Union[np.ndarray, dask.array.core.Array]:
 34 |         # Make sure that dimensionality of sliced array is kept:
 35 |         if isinstance(j, int) or isinstance(j, np.int32) or isinstance(j, np.int64):
 36 |             j = [j]
 37 |         eta = np.matmul(self.design_loc, self.theta_location_constrained[:, j])
 38 |         assert self.size_factors is None, "size factors not allowed"
 39 |         eta = self.np_clip_param(eta, "eta_loc")
 40 |         return eta
 41 | 
 42 |     # Re-parameterizations:
 43 | 
 44 |     @property
 45 |     def mean(self) -> Union[np.ndarray, dask.array.core.Array]:
 46 |         return self.location
 47 | 
 48 |     @property
 49 |     def samplesize(self) -> Union[np.ndarray, dask.array.core.Array]:
 50 |         return self.scale
 51 | 
 52 |     @property
 53 |     def p(self) -> Union[np.ndarray, dask.array.core.Array]:
 54 |         return self.mean * self.samplesize
 55 | 
 56 |     @property
 57 |     def q(self) -> Union[np.ndarray, dask.array.core.Array]:
 58 |         return (1 - self.mean) * self.samplesize
 59 | 
 60 |     # parameter contraints:
 61 | 
 62 |     def bounds(self, sf, dmax, dtype) -> Tuple[Dict[str, Any], Dict[str, Any]]:
 63 | 
 64 |         zero = np.nextafter(0, np.inf, dtype=dtype)
 65 |         one = np.nextafter(1, -np.inf, dtype=dtype)
 66 | 
 67 |         bounds_min = {
 68 |             "theta_location": np.log(zero / (1 - zero)) / sf,
 69 |             "theta_scale": np.log(zero) / sf,
 70 |             "eta_loc": np.log(zero / (1 - zero)) / sf,
 71 |             "eta_scale": np.log(zero) / sf,
 72 |             "mean": np.nextafter(0, np.inf, dtype=dtype),
 73 |             "samplesize": np.nextafter(0, np.inf, dtype=dtype),
 74 |             "probs": dtype(0),
 75 |             "log_probs": np.log(zero),
 76 |         }
 77 |         bounds_max = {
 78 |             "theta_location": np.log(one / (1 - one)) / sf,
 79 |             "theta_scale": np.nextafter(np.log(dmax), -np.inf, dtype=dtype) / sf,
 80 |             "eta_loc": np.log(one / (1 - one)) / sf,
 81 |             "eta_scale": np.nextafter(np.log(dmax), -np.inf, dtype=dtype) / sf,
 82 |             "mean": one,
 83 |             "samplesize": np.nextafter(dmax, -np.inf, dtype=dtype) / sf,
 84 |             "probs": dtype(1),
 85 |             "log_probs": dtype(0),
 86 |         }
 87 | 
 88 |         return bounds_min, bounds_max
 89 | 
 90 |     # simulator:
 91 | 
 92 |     @property
 93 |     def rand_fn_ave(self) -> Optional[Callable]:
 94 |         return lambda shape: np.random.uniform(0.2, 0.8, shape)
 95 | 
 96 |     @property
 97 |     def rand_fn(self) -> Optional[Callable]:
 98 |         return None
 99 | 
100 |     @property
101 |     def rand_fn_loc(self) -> Optional[Callable]:
102 |         return lambda shape: np.random.uniform(0.05, 0.15, shape)
103 | 
104 |     @property
105 |     def rand_fn_scale(self) -> Optional[Callable]:
106 |         return lambda shape: np.random.uniform(0.2, 0.5, shape)
107 | 
108 |     def generate_data(self):
109 |         """
110 |         Sample random data based on beta distribution and parameters.
111 |         """
112 |         return np.random.beta(a=self.p, b=self.q, size=None)
113 | 


--------------------------------------------------------------------------------
/batchglm/models/glm_beta/utils.py:
--------------------------------------------------------------------------------
 1 | from typing import Union
 2 | 
 3 | import numpy as np
 4 | import scipy.sparse
 5 | 
 6 | from .external import closedform_glm_mean, closedform_glm_scale
 7 | 
 8 | 
 9 | def closedform_beta_glm_logitmean(
10 |     x: Union[np.ndarray, scipy.sparse.csr_matrix],
11 |     design_loc: np.ndarray,
12 |     constraints_loc,
13 |     size_factors=None,
14 |     link_fn=lambda x: np.log(1 / (1 / x - 1)),
15 |     inv_link_fn=lambda x: 1 / (1 + np.exp(-x)),
16 | ):
17 |     r"""
18 |     Calculates a closed-form solution for the `mean` parameters of beta GLMs.
19 | 
20 |     :param x: The sample data
21 |     :param design_loc: design matrix for location
22 |     :param constraints: tensor (all parameters x dependent parameters)
23 |         Tensor that encodes how complete parameter set which includes dependent
24 |         parameters arises from indepedent parameters: all = <constraints, indep>.
25 |         This form of constraints is used in vector generalized linear models (VGLMs).
26 |     :param size_factors: size factors for X
27 |     :return: tuple: (groupwise_means, mean, rmsd)
28 |     """
29 |     return closedform_glm_mean(
30 |         x=x,
31 |         dmat=design_loc,
32 |         constraints=constraints_loc,
33 |         size_factors=size_factors,
34 |         link_fn=link_fn,
35 |         inv_link_fn=inv_link_fn,
36 |     )
37 | 
38 | 
39 | def closedform_beta_glm_logsamplesize(
40 |     x: Union[np.ndarray, scipy.sparse.csr_matrix],
41 |     design_scale: np.ndarray,
42 |     constraints=None,
43 |     size_factors=None,
44 |     groupwise_means=None,
45 |     link_fn=np.log,
46 |     invlink_fn=np.exp,
47 | ):
48 |     r"""
49 |     Calculates a closed-form solution for the log-scale parameters of beta GLMs.
50 | 
51 |     :param x: The sample data
52 |     :param design_scale: design matrix for scale
53 |     :param constraints: some design constraints
54 |     :param size_factors: size factors for X
55 |     :param groupwise_means: optional, in case if already computed this can be specified to spare double-calculation
56 |     :return: tuple (groupwise_scales, logsd, rmsd)
57 |     """
58 | 
59 |     def compute_scales_fun(variance, mean):
60 |         groupwise_scales = mean * (1 - mean) / variance - 1
61 |         return groupwise_scales
62 | 
63 |     return closedform_glm_scale(
64 |         x=x,
65 |         design_scale=design_scale,
66 |         constraints=constraints,
67 |         size_factors=size_factors,
68 |         groupwise_means=groupwise_means,
69 |         link_fn=link_fn,
70 |         inv_link_fn=invlink_fn,
71 |         compute_scales_fun=compute_scales_fun,
72 |     )
73 | 


--------------------------------------------------------------------------------
/batchglm/models/glm_nb/__init__.py:
--------------------------------------------------------------------------------
1 | from .model import Model
2 | 


--------------------------------------------------------------------------------
/batchglm/models/glm_nb/external.py:
--------------------------------------------------------------------------------
1 | import batchglm.utils.data as data_utils
2 | from batchglm import pkg_constants
3 | from batchglm.models.base_glm import ModelGLM, closedform_glm_mean, closedform_glm_scale
4 | from batchglm.utils.linalg import groupwise_solve_lm
5 | 


--------------------------------------------------------------------------------
/batchglm/models/glm_nb/model.py:
--------------------------------------------------------------------------------
  1 | import abc
  2 | from typing import Any, Callable, Dict, Optional, Tuple, Union
  3 | 
  4 | import dask.array
  5 | import numpy as np
  6 | 
  7 | from .external import ModelGLM
  8 | 
  9 | 
 10 | class Model(ModelGLM, metaclass=abc.ABCMeta):
 11 |     """
 12 |     Generalized Linear Model (GLM) with negative binomial noise.
 13 |     """
 14 | 
 15 |     def link_loc(self, data) -> Union[np.ndarray, dask.array.core.Array]:
 16 |         return np.log(data)
 17 | 
 18 |     def inverse_link_loc(self, data) -> Union[np.ndarray, dask.array.core.Array]:
 19 |         return np.exp(data)
 20 | 
 21 |     def link_scale(self, data) -> Union[np.ndarray, dask.array.core.Array]:
 22 |         return np.log(data)
 23 | 
 24 |     def inverse_link_scale(self, data) -> Union[np.ndarray, dask.array.core.Array]:
 25 |         return np.exp(data)
 26 | 
 27 |     @property
 28 |     def eta_loc(self) -> Union[np.ndarray, dask.array.core.Array]:
 29 |         eta = np.matmul(self.design_loc, self.theta_location_constrained)
 30 |         if self.size_factors is not None:
 31 |             eta += self.size_factors
 32 |         eta = self.np_clip_param(eta, "eta_loc")
 33 |         return eta
 34 | 
 35 |     def eta_loc_j(self, j) -> Union[np.ndarray, dask.array.core.Array]:
 36 |         # Make sure that dimensionality of sliced array is kept:
 37 |         if isinstance(j, int) or isinstance(j, np.int32) or isinstance(j, np.int64):
 38 |             j = [j]
 39 |         eta = np.matmul(self.design_loc, self.theta_location_constrained[:, j])
 40 |         if self.size_factors is not None:
 41 |             eta += self.size_factors
 42 |         eta = self.np_clip_param(eta, "eta_loc")
 43 |         return eta
 44 | 
 45 |     # Re-parameterizations:
 46 | 
 47 |     @property
 48 |     def mu(self) -> Union[np.ndarray, dask.array.core.Array]:
 49 |         return self.location
 50 | 
 51 |     @property
 52 |     def phi(self) -> Union[np.ndarray, dask.array.core.Array]:
 53 |         return self.scale
 54 | 
 55 |     # param constraints:
 56 | 
 57 |     def bounds(self, sf, dmax, dtype) -> Tuple[Dict[str, Any], Dict[str, Any]]:
 58 | 
 59 |         bounds_min = {
 60 |             "theta_location": np.log(np.nextafter(0, np.inf, dtype=dtype)) / sf,
 61 |             "theta_scale": np.log(np.nextafter(0, np.inf, dtype=dtype)) / sf,
 62 |             "eta_loc": np.log(np.nextafter(0, np.inf, dtype=dtype)) / sf,
 63 |             "eta_scale": np.log(np.nextafter(0, np.inf, dtype=dtype)) / sf,
 64 |             "loc": np.nextafter(0, np.inf, dtype=dtype),
 65 |             "scale": np.nextafter(0, np.inf, dtype=dtype),
 66 |             "likelihood": dtype(0),
 67 |             "ll": np.log(np.nextafter(0, np.inf, dtype=dtype)),
 68 |         }
 69 |         bounds_max = {
 70 |             "theta_location": np.nextafter(np.log(dmax), -np.inf, dtype=dtype) / sf,
 71 |             "theta_scale": np.nextafter(np.log(dmax), -np.inf, dtype=dtype) / sf,
 72 |             "eta_loc": np.nextafter(np.log(dmax), -np.inf, dtype=dtype) / sf,
 73 |             "eta_scale": np.nextafter(np.log(dmax), -np.inf, dtype=dtype) / sf,
 74 |             "loc": np.nextafter(dmax, -np.inf, dtype=dtype) / sf,
 75 |             "scale": np.nextafter(dmax, -np.inf, dtype=dtype) / sf,
 76 |             "likelihood": dtype(1),
 77 |             "ll": dtype(0),
 78 |         }
 79 |         return bounds_min, bounds_max
 80 | 
 81 |     # simulator:
 82 | 
 83 |     @property
 84 |     def rand_fn_ave(self) -> Optional[Callable]:
 85 |         return lambda shape: np.random.poisson(500, shape) + 1
 86 | 
 87 |     @property
 88 |     def rand_fn(self) -> Optional[Callable]:
 89 |         return lambda shape: np.abs(np.random.uniform(0.5, 2, shape))
 90 | 
 91 |     @property
 92 |     def rand_fn_loc(self) -> Optional[Callable]:
 93 |         return None
 94 | 
 95 |     @property
 96 |     def rand_fn_scale(self) -> Optional[Callable]:
 97 |         return None
 98 | 
 99 |     def generate_data(self) -> np.ndarray:
100 |         """
101 |         Sample random data based on negative binomial distribution and parameters.
102 |         """
103 |         return np.random.negative_binomial(n=self.phi, p=1 - self.mu / (self.phi + self.mu), size=None)
104 | 


--------------------------------------------------------------------------------
/batchglm/models/glm_nb/utils.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | from typing import Callable, Optional, Tuple, Union
  3 | 
  4 | import dask
  5 | import numpy as np
  6 | import scipy.sparse
  7 | 
  8 | from .external import closedform_glm_mean, closedform_glm_scale
  9 | 
 10 | logger = logging.getLogger("batchglm")
 11 | 
 12 | 
 13 | def closedform_nb_glm_logmu(
 14 |     x: Union[np.ndarray, scipy.sparse.csr_matrix, dask.array.core.Array],
 15 |     design_loc: Union[np.ndarray, dask.array.core.Array],
 16 |     constraints_loc: Union[np.ndarray, dask.array.core.Array],
 17 |     size_factors: Optional[np.ndarray] = None,
 18 |     link_fn: Callable = np.log,
 19 |     inv_link_fn: Callable = np.exp,
 20 | ):
 21 |     r"""
 22 |     Calculates a closed-form solution for the `mu` parameters of negative-binomial GLMs.
 23 | 
 24 |     :param x: The sample data
 25 |     :param design_loc: design matrix for location
 26 |     :param constraints_loc: tensor (all parameters x dependent parameters)
 27 |         Tensor that encodes how complete parameter set which includes dependent
 28 |         parameters arises from indepedent parameters: all = <constraints, indep>.
 29 |         This form of constraints is used in vector generalized linear models (VGLMs).
 30 |     :param size_factors: size factors for X
 31 |     :return: tuple: (groupwise_means, mu, rmsd)
 32 |     """
 33 |     return closedform_glm_mean(
 34 |         x=x,
 35 |         dmat=design_loc,
 36 |         constraints=constraints_loc,
 37 |         size_factors=size_factors,
 38 |         link_fn=link_fn,
 39 |         inv_link_fn=inv_link_fn,
 40 |     )
 41 | 
 42 | 
 43 | def closedform_nb_glm_logphi(
 44 |     x: Union[np.ndarray, scipy.sparse.csr_matrix, dask.array.core.Array],
 45 |     design_scale: Union[np.ndarray, dask.array.core.Array],
 46 |     constraints: Optional[Union[np.ndarray, dask.array.core.Array]] = None,
 47 |     size_factors: Optional[np.ndarray] = None,
 48 |     groupwise_means: Optional[np.ndarray] = None,
 49 |     link_fn: Callable = np.log,
 50 |     invlink_fn: Callable = np.exp,
 51 | ):
 52 |     r"""
 53 |     Calculates a closed-form solution for the log-scale parameters of negative-binomial GLMs.
 54 |     Based on the Method-of-Moments estimator.
 55 | 
 56 |     :param x: The sample data
 57 |     :param design_scale: design matrix for scale
 58 |     :param constraints: some design constraints
 59 |     :param size_factors: size factors for X
 60 |     :param groupwise_means: optional, in case if already computed this can be specified to spare double-calculation
 61 |     :return: tuple (groupwise_scales, logphi, rmsd)
 62 |     """
 63 | 
 64 |     def compute_scales_fun(variance, mean):
 65 |         denominator = np.fmax(variance - mean, np.sqrt(np.nextafter(0, 1, dtype=variance.dtype)))
 66 |         groupwise_scales = np.square(mean) / denominator
 67 |         return groupwise_scales
 68 | 
 69 |     return closedform_glm_scale(
 70 |         x=x,
 71 |         design_scale=design_scale,
 72 |         constraints=constraints,
 73 |         size_factors=size_factors,
 74 |         groupwise_means=groupwise_means,
 75 |         link_fn=link_fn,
 76 |         inv_link_fn=invlink_fn,
 77 |         compute_scales_fun=compute_scales_fun,
 78 |     )
 79 | 
 80 | 
 81 | def init_par(model, init_location: str, init_scale: str) -> Tuple[np.ndarray, np.ndarray, bool, bool]:
 82 |     r"""
 83 |     standard:
 84 |     Only initialise intercept and keep other coefficients as zero.
 85 | 
 86 |     closed-form:
 87 |     Initialize with Maximum Likelihood / Maximum of Momentum estimators
 88 | 
 89 |     Idea:
 90 |     $$
 91 |         \theta &= f(x) \\
 92 |         \Rightarrow f^{-1}(\theta) &= x \\
 93 |             &= (D \cdot D^{+}) \cdot x \\
 94 |             &= D \cdot (D^{+} \cdot x) \\
 95 |             &= D \cdot x' = f^{-1}(\theta)
 96 |     $$
 97 |     """
 98 |     train_loc = False
 99 | 
100 |     def auto_loc(dmat: Union[np.ndarray, dask.array.core.Array]) -> str:
101 |         """
102 |         Checks if dmat is one-hot encoded and returns 'closed_form' if so, else 'standard'
103 | 
104 |         :param dmat The design matrix to check.
105 |         """
106 |         unique_params = np.unique(dmat)
107 |         if isinstance(unique_params, dask.array.core.Array):
108 |             unique_params = unique_params.compute()
109 |         if len(unique_params) == 2 and unique_params[0] == 0.0 and unique_params[1] == 1.0:
110 |             return "closed_form"
111 |         logger.warning(
112 |             (
113 |                 "Cannot use 'closed_form' init for loc model: "
114 |                 "design_loc is not one-hot encoded. Falling back to standard initialization."
115 |             )
116 |         )
117 |         return "standard"
118 | 
119 |     groupwise_means = None
120 | 
121 |     init_location_str = init_location.lower()
122 |     # Chose option if auto was chosen
123 |     if init_location_str == "auto":
124 | 
125 |         init_location_str = auto_loc(model.design_loc)
126 | 
127 |     if init_location_str == "closed_form":
128 |         groupwise_means, init_theta_location, rmsd_a = closedform_nb_glm_logmu(
129 |             x=model.x,
130 |             design_loc=model.design_loc,
131 |             constraints_loc=model.constraints_loc,
132 |             size_factors=model.size_factors,
133 |             link_fn=lambda mu: np.log(mu + np.nextafter(0, 1, dtype=mu.dtype)),
134 |         )
135 |         # train mu, if the closed-form solution is inaccurate
136 |         train_loc = not (np.all(np.abs(rmsd_a) < 1e-20) or rmsd_a.size == 0)
137 |         if model.size_factors is not None:
138 |             if np.any(model.size_factors != 1):
139 |                 train_loc = True
140 | 
141 |     elif init_location_str == "standard":
142 |         overall_means = np.mean(model.x, axis=0)  # directly calculate the mean
143 |         init_theta_location = np.zeros([model.num_loc_params, model.num_features])
144 |         init_theta_location[0, :] = np.log(overall_means)
145 |         train_loc = True
146 |     elif init_location_str == "all_zero":
147 |         init_theta_location = np.zeros([model.num_loc_params, model.num_features])
148 |         train_loc = True
149 |     else:
150 |         raise ValueError("init_location string %s not recognized" % init_location)
151 | 
152 |     init_scale_str = init_scale.lower()
153 |     if init_scale_str == "auto":
154 |         init_scale_str = "standard"
155 | 
156 |     if init_scale_str == "standard":
157 |         groupwise_scales, init_scale_intercept, rmsd_b = closedform_nb_glm_logphi(
158 |             x=model.x,
159 |             design_scale=model.design_scale[:, [0]],
160 |             constraints=model.constraints_scale[[0], :][:, [0]],
161 |             size_factors=model.size_factors,
162 |             groupwise_means=None,
163 |             link_fn=lambda r: np.log(r + np.nextafter(0, 1, dtype=r.dtype)),
164 |         )
165 |         init_theta_scale = np.zeros([model.num_scale_params, model.num_features])
166 |         init_theta_scale[0, :] = init_scale_intercept
167 |     elif init_scale_str == "closed_form":
168 |         if not np.array_equal(model.design_loc, model.design_scale):
169 |             raise ValueError("Cannot use 'closed_form' init for scale model: design_scale != design_loc.")
170 |         if init_location_str is not None and init_location_str != init_scale_str:
171 |             raise ValueError(
172 |                 "Cannot use 'closed_form' init for scale model: init_location != 'closed_form' which is required."
173 |             )
174 | 
175 |         groupwise_scales, init_theta_scale, rmsd_b = closedform_nb_glm_logphi(
176 |             x=model.x,
177 |             design_scale=model.design_scale,
178 |             constraints=model.constraints_scale,
179 |             size_factors=model.size_factors,
180 |             groupwise_means=groupwise_means,
181 |             link_fn=lambda r: np.log(r),
182 |         )
183 |     elif init_scale_str == "all_zero":
184 |         init_theta_scale = np.zeros([model.num_scale_params, model.x.shape[1]])
185 |     else:
186 |         raise ValueError("init_scale string %s not recognized" % init_scale_str)
187 | 
188 |     return init_theta_location, init_theta_scale, train_loc, True
189 | 


--------------------------------------------------------------------------------
/batchglm/models/glm_norm/__init__.py:
--------------------------------------------------------------------------------
1 | from .model import Model
2 | 


--------------------------------------------------------------------------------
/batchglm/models/glm_norm/external.py:
--------------------------------------------------------------------------------
1 | import batchglm.utils.data as data_utils
2 | from batchglm import pkg_constants
3 | from batchglm.models.base_glm import ModelGLM, closedform_glm_mean, closedform_glm_scale
4 | from batchglm.utils.linalg import groupwise_solve_lm
5 | 


--------------------------------------------------------------------------------
/batchglm/models/glm_norm/model.py:
--------------------------------------------------------------------------------
  1 | import abc
  2 | from typing import Any, Callable, Dict, Optional, Tuple, Union
  3 | 
  4 | import dask
  5 | import numpy as np
  6 | 
  7 | from .external import ModelGLM
  8 | 
  9 | 
 10 | class Model(ModelGLM, metaclass=abc.ABCMeta):
 11 | 
 12 |     """Generalized Linear Model (GLM) with normal noise."""
 13 | 
 14 |     def link_loc(self, data) -> Union[np.ndarray, dask.array.core.Array]:
 15 |         return data
 16 | 
 17 |     def inverse_link_loc(self, data) -> Union[np.ndarray, dask.array.core.Array]:
 18 |         return data
 19 | 
 20 |     def link_scale(self, data) -> Union[np.ndarray, dask.array.core.Array]:
 21 |         return np.log(data)
 22 | 
 23 |     def inverse_link_scale(self, data) -> Union[np.ndarray, dask.array.core.Array]:
 24 |         return np.exp(data)
 25 | 
 26 |     @property
 27 |     def eta_loc(self) -> Union[np.ndarray, dask.array.core.Array]:
 28 |         eta = np.matmul(self.design_loc, self.theta_location_constrained)
 29 |         if self.size_factors is not None:
 30 |             eta *= self.size_factors
 31 |         return eta
 32 | 
 33 |     def eta_loc_j(self, j) -> Union[np.ndarray, dask.array.core.Array]:
 34 |         # Make sure that dimensionality of sliced array is kept:
 35 |         if isinstance(j, int) or isinstance(j, np.int32) or isinstance(j, np.int64):
 36 |             j = [j]
 37 |         eta = np.matmul(self.design_loc, self.theta_location_constrained[:, j])
 38 |         if self.size_factors is not None:
 39 |             eta *= self.size_factors
 40 |         eta = self.np_clip_param(eta, "eta_loc")
 41 |         return eta
 42 | 
 43 |     # Re-parameterizations:
 44 | 
 45 |     @property
 46 |     def mean(self) -> Union[np.ndarray, dask.array.core.Array]:
 47 |         return self.location
 48 | 
 49 |     @property
 50 |     def sd(self) -> Union[np.ndarray, dask.array.core.Array]:
 51 |         return self.scale
 52 | 
 53 |     # param constraints:
 54 | 
 55 |     def bounds(self, sf, dmax, dtype) -> Tuple[Dict[str, Any], Dict[str, Any]]:
 56 | 
 57 |         bounds_min = {
 58 |             "theta_location": np.nextafter(-dmax, np.inf, dtype=dtype) / sf,
 59 |             "theta_scale": np.log(np.nextafter(0, np.inf, dtype=dtype)) / sf,
 60 |             "eta_loc": np.nextafter(-dmax, np.inf, dtype=dtype) / sf,
 61 |             "eta_scale": np.log(np.nextafter(0, np.inf, dtype=dtype)) / sf,
 62 |             "mean": np.nextafter(-dmax, np.inf, dtype=dtype) / sf,
 63 |             "sd": np.nextafter(0, np.inf, dtype=dtype),
 64 |             "probs": dtype(0),
 65 |             "log_probs": np.log(np.nextafter(0, np.inf, dtype=dtype)),
 66 |         }
 67 |         bounds_max = {
 68 |             "theta_location": np.nextafter(dmax, -np.inf, dtype=dtype) / sf,
 69 |             "theta_scale": np.nextafter(np.log(dmax), -np.inf, dtype=dtype) / sf,
 70 |             "eta_loc": np.nextafter(dmax, -np.inf, dtype=dtype) / sf,
 71 |             "eta_scale": np.nextafter(np.log(dmax), -np.inf, dtype=dtype) / sf,
 72 |             "mean": np.nextafter(dmax, -np.inf, dtype=dtype) / sf,
 73 |             "sd": np.nextafter(dmax, -np.inf, dtype=dtype) / sf,
 74 |             "probs": dtype(1),
 75 |             "log_probs": dtype(0),
 76 |         }
 77 |         return bounds_min, bounds_max
 78 | 
 79 |     # simulator:
 80 | 
 81 |     @property
 82 |     def rand_fn_ave(self) -> Optional[Callable]:
 83 |         return lambda shape: np.random.uniform(10, 1000, shape)
 84 | 
 85 |     @property
 86 |     def rand_fn(self) -> Optional[Callable]:
 87 |         return None
 88 | 
 89 |     @property
 90 |     def rand_fn_loc(self) -> Optional[Callable]:
 91 |         return lambda shape: np.random.uniform(50, 100, shape)
 92 | 
 93 |     @property
 94 |     def rand_fn_scale(self) -> Optional[Callable]:
 95 |         return lambda shape: np.random.uniform(1.5, 10, shape)
 96 | 
 97 |     def generate_data(self):
 98 |         """
 99 |         Sample random data based on normal distribution and parameters.
100 |         """
101 |         return np.random.normal(loc=self.mean, scale=self.sd, size=None)
102 | 


--------------------------------------------------------------------------------
/batchglm/models/glm_norm/utils.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | from typing import Tuple, Union
  3 | 
  4 | import dask
  5 | import numpy as np
  6 | import scipy.sparse
  7 | 
  8 | from .external import closedform_glm_scale
  9 | 
 10 | logger = logging.getLogger("batchglm")
 11 | 
 12 | 
 13 | def closedform_norm_glm_logsd(
 14 |     x: Union[np.ndarray, scipy.sparse.csr_matrix, dask.array.core.Array],
 15 |     design_scale: Union[np.ndarray, dask.array.core.Array],
 16 |     constraints=None,
 17 |     size_factors=None,
 18 |     groupwise_means=None,
 19 |     link_fn=np.log,
 20 | ):
 21 |     r"""
 22 |     Calculates a closed-form solution for the log-scale parameters of normal GLMs.
 23 | 
 24 |     :param x: The sample data
 25 |     :param design_scale: design matrix for scale
 26 |     :param constraints: some design constraints
 27 |     :param size_factors: size factors for X
 28 |     :param groupwise_means: optional, in case if already computed this can be specified to spare double-calculation
 29 |     :return: tuple (groupwise_scales, logsd, rmsd)
 30 |     """
 31 | 
 32 |     def compute_scales_fun(variance, mean):
 33 |         groupwise_scales = np.sqrt(variance)
 34 |         return groupwise_scales
 35 | 
 36 |     return closedform_glm_scale(
 37 |         x=x,
 38 |         design_scale=design_scale,
 39 |         constraints=constraints,
 40 |         size_factors=size_factors,
 41 |         groupwise_means=groupwise_means,
 42 |         link_fn=link_fn,
 43 |         compute_scales_fun=compute_scales_fun,
 44 |     )
 45 | 
 46 | 
 47 | def init_par(model, init_location: str, init_scale: str) -> Tuple[np.ndarray, np.ndarray, bool, bool]:
 48 |     r"""
 49 |     standard:
 50 |     Only initialise intercept and keep other coefficients as zero.
 51 | 
 52 |     closed-form:
 53 |     Initialize with Maximum Likelihood / Maximum of Momentum estimators
 54 | 
 55 |     Idea:
 56 |     $$
 57 |         \theta &= f(x) \\
 58 |         \Rightarrow f^{-1}(\theta) &= x \\
 59 |             &= (D \cdot D^{+}) \cdot x \\
 60 |             &= D \cdot (D^{+} \cdot x) \\
 61 |             &= D \cdot x' = f^{-1}(\theta)
 62 |     $$
 63 |     """
 64 |     groupwise_means = None
 65 | 
 66 |     init_location_str = init_location.lower()
 67 |     # Chose option if auto was chosen
 68 |     auto_or_closed_form = init_location_str == "auto" or init_location_str == "closed_form"
 69 |     if auto_or_closed_form or init_location_str == "all_zero":
 70 |         if auto_or_closed_form:
 71 |             logger.warning(
 72 |                 (
 73 |                     "There is no need for closed form location model initialization"
 74 |                     "because it is already closed form - falling back to zeros"
 75 |                 )
 76 |             )
 77 |         init_theta_location = np.zeros([model.num_loc_params, model.num_features])
 78 |     elif init_location_str == "standard":
 79 |         overall_means = np.mean(model.x, axis=0)  # directly calculate the mean
 80 |         init_theta_location = np.zeros([model.num_loc_params, model.num_features])
 81 |         init_theta_location[0, :] = overall_means  # identity linked.
 82 |     else:
 83 |         raise ValueError("init_location string %s not recognized" % init_location)
 84 | 
 85 |     init_scale_str = init_scale.lower()
 86 |     if init_scale_str == "auto":
 87 |         init_scale_str = "standard"
 88 | 
 89 |     if init_scale_str == "standard":
 90 |         groupwise_scales, init_scale_intercept, rmsd_b = closedform_norm_glm_logsd(
 91 |             x=model.x,
 92 |             design_scale=model.design_scale[:, [0]],
 93 |             constraints=model.constraints_scale[[0], :][:, [0]],
 94 |             size_factors=model.size_factors,
 95 |             groupwise_means=None,
 96 |             link_fn=lambda r: np.log(r + np.nextafter(0, 1, dtype=r.dtype)),
 97 |         )
 98 |         init_theta_scale = np.zeros([model.num_scale_params, model.num_features])
 99 |         init_theta_scale[0, :] = init_scale_intercept
100 |     elif init_scale_str == "closed_form":
101 |         groupwise_scales, init_theta_scale, rmsd_b = closedform_norm_glm_logsd(
102 |             x=model.x,
103 |             design_scale=model.design_scale,
104 |             constraints=model.constraints_scale,
105 |             size_factors=model.size_factors,
106 |             groupwise_means=groupwise_means,
107 |         )
108 |     elif init_scale_str == "all_zero":
109 |         init_theta_scale = np.zeros([model.num_scale_params, model.x.shape[1]])
110 |     else:
111 |         raise ValueError("init_scale string %s not recognized" % init_scale_str)
112 | 
113 |     return init_theta_location, init_theta_scale, True, True
114 | 


--------------------------------------------------------------------------------
/batchglm/models/glm_poisson/__init__.py:
--------------------------------------------------------------------------------
1 | from .model import Model
2 | 


--------------------------------------------------------------------------------
/batchglm/models/glm_poisson/external.py:
--------------------------------------------------------------------------------
1 | import batchglm.utils.data as data_utils
2 | from batchglm import pkg_constants
3 | from batchglm.models.base_glm import ModelGLM, closedform_glm_mean, closedform_glm_scale
4 | from batchglm.utils.linalg import groupwise_solve_lm
5 | 


--------------------------------------------------------------------------------
/batchglm/models/glm_poisson/model.py:
--------------------------------------------------------------------------------
  1 | import abc
  2 | from typing import Any, Callable, Dict, Optional, Tuple, Union
  3 | 
  4 | import dask.array
  5 | import numpy as np
  6 | 
  7 | from .external import ModelGLM
  8 | 
  9 | 
 10 | class Model(ModelGLM, metaclass=abc.ABCMeta):
 11 |     """
 12 |     Generalized Linear Model (GLM) with Poisson noise.
 13 |     """
 14 | 
 15 |     def link_loc(self, data) -> Union[np.ndarray, dask.array.core.Array]:
 16 |         return np.log(data)
 17 | 
 18 |     def inverse_link_loc(self, data) -> Union[np.ndarray, dask.array.core.Array]:
 19 |         return np.exp(data)
 20 | 
 21 |     def link_scale(self, data) -> Union[np.ndarray, dask.array.core.Array]:
 22 |         return np.log(data)
 23 | 
 24 |     def inverse_link_scale(self, data) -> Union[np.ndarray, dask.array.core.Array]:
 25 |         return np.exp(data)
 26 | 
 27 |     @property
 28 |     def eta_loc(self) -> Union[np.ndarray, dask.array.core.Array]:
 29 |         eta = np.matmul(self.design_loc, self.theta_location_constrained)
 30 |         if self.size_factors is not None:
 31 |             eta += self.size_factors
 32 |         eta = self.np_clip_param(eta, "eta_loc")
 33 |         return eta
 34 | 
 35 |     def eta_loc_j(self, j) -> Union[np.ndarray, dask.array.core.Array]:
 36 |         # Make sure that dimensionality of sliced array is kept:
 37 |         if isinstance(j, int) or isinstance(j, np.int32) or isinstance(j, np.int64):
 38 |             j = [j]
 39 |         eta = np.matmul(self.design_loc, self.theta_location_constrained[:, j])
 40 |         if self.size_factors is not None:
 41 |             eta += self.size_factors
 42 |         eta = self.np_clip_param(eta, "eta_loc")
 43 |         return eta
 44 | 
 45 |     # Re-parameterizations:
 46 | 
 47 |     @property
 48 |     def lam(self) -> Union[np.ndarray, dask.array.core.Array]:
 49 |         return self.location
 50 | 
 51 |     # param constraints:
 52 | 
 53 |     def bounds(self, sf, dmax, dtype) -> Tuple[Dict[str, Any], Dict[str, Any]]:
 54 | 
 55 |         bounds_min = {
 56 |             "theta_location": np.log(np.nextafter(0, np.inf, dtype=dtype)) / sf,
 57 |             "eta_loc": np.log(np.nextafter(0, np.inf, dtype=dtype)) / sf,
 58 |             "loc": np.nextafter(0, np.inf, dtype=dtype),
 59 |             "scale": np.nextafter(0, np.inf, dtype=dtype),
 60 |             "likelihood": dtype(0),
 61 |             "ll": np.log(np.nextafter(0, np.inf, dtype=dtype)),
 62 |             # Not used and should be removed: https://github.com/theislab/batchglm/issues/148
 63 |             "theta_scale": np.log(np.nextafter(0, np.inf, dtype=dtype)) / sf,
 64 |             "eta_scale": np.log(np.nextafter(0, np.inf, dtype=dtype)) / sf,
 65 |         }
 66 |         bounds_max = {
 67 |             "theta_location": np.nextafter(np.log(dmax), -np.inf, dtype=dtype) / sf,
 68 |             "eta_loc": np.nextafter(np.log(dmax), -np.inf, dtype=dtype) / sf,
 69 |             "loc": np.nextafter(dmax, -np.inf, dtype=dtype) / sf,
 70 |             "scale": np.nextafter(dmax, -np.inf, dtype=dtype) / sf,
 71 |             "likelihood": dtype(1),
 72 |             "ll": dtype(10000),  # poisson models can have large log likelhoods initially
 73 |             # Not used and should be removed: https://github.com/theislab/batchglm/issues/148
 74 |             "theta_scale": np.log(dmax) / sf,
 75 |             "eta_scale": np.log(dmax) / sf,
 76 |         }
 77 |         return bounds_min, bounds_max
 78 | 
 79 |     # simulator:
 80 | 
 81 |     @property
 82 |     def rand_fn_ave(self) -> Optional[Callable]:
 83 |         return lambda shape: np.random.poisson(500, shape) + 1
 84 | 
 85 |     @property
 86 |     def rand_fn(self) -> Optional[Callable]:
 87 |         return lambda shape: np.abs(np.random.uniform(0.5, 2, shape))
 88 | 
 89 |     @property
 90 |     def rand_fn_loc(self) -> Optional[Callable]:
 91 |         return None
 92 | 
 93 |     @property
 94 |     def rand_fn_scale(self) -> Optional[Callable]:
 95 |         return None
 96 | 
 97 |     def generate_data(self) -> np.ndarray:
 98 |         """
 99 |         Sample random data based on poisson distribution and parameters.
100 |         """
101 |         # see https://github.com/astronomyk/SimCADO/issues/59 for why we cast lam
102 |         return np.random.poisson(lam=self.lam)
103 | 


--------------------------------------------------------------------------------
/batchglm/models/glm_poisson/utils.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | from typing import Callable, Optional, Tuple, Union
  3 | 
  4 | import dask
  5 | import numpy as np
  6 | import scipy.sparse
  7 | 
  8 | from .external import closedform_glm_mean
  9 | 
 10 | logger = logging.getLogger("batchglm")
 11 | 
 12 | 
 13 | def closedform_poisson_glm_loglam(
 14 |     x: Union[np.ndarray, scipy.sparse.csr_matrix, dask.array.core.Array],
 15 |     design_loc: Union[np.ndarray, dask.array.core.Array],
 16 |     constraints_loc: Union[np.ndarray, dask.array.core.Array],
 17 |     size_factors: Optional[np.ndarray] = None,
 18 |     link_fn: Callable = np.log,
 19 |     inv_link_fn: Callable = np.exp,
 20 | ):
 21 |     r"""
 22 |     Calculates a closed-form solution for the `lam` parameters of poisson GLMs.
 23 | 
 24 |     :param x: The sample data
 25 |     :param design_loc: design matrix for location
 26 |     :param constraints_loc: tensor (all parameters x dependent parameters)
 27 |         Tensor that encodes how complete parameter set which includes dependent
 28 |         parameters arises from indepedent parameters: all = <constraints, indep>.
 29 |         This form of constraints is used in vector generalized linear models (VGLMs).
 30 |     :param size_factors: size factors for X
 31 |     :return: tuple: (groupwise_means, mu, rmsd)
 32 |     """
 33 |     return closedform_glm_mean(
 34 |         x=x,
 35 |         dmat=design_loc,
 36 |         constraints=constraints_loc,
 37 |         size_factors=size_factors,
 38 |         link_fn=link_fn,
 39 |         inv_link_fn=inv_link_fn,
 40 |     )
 41 | 
 42 | 
 43 | def init_par(model, init_location: str) -> Tuple[np.ndarray, np.ndarray, bool, bool]:
 44 |     r"""
 45 |     standard:
 46 |     Only initialise intercept and keep other coefficients as zero.
 47 | 
 48 |     closed-form:
 49 |     Initialize with Maximum Likelihood / Maximum of Momentum estimators
 50 | 
 51 |     Idea:
 52 |     $$
 53 |         \theta &= f(x) \\
 54 |         \Rightarrow f^{-1}(\theta) &= x \\
 55 |             &= (D \cdot D^{+}) \cdot x \\
 56 |             &= D \cdot (D^{+} \cdot x) \\
 57 |             &= D \cdot x' = f^{-1}(\theta)
 58 |     $$
 59 |     """
 60 |     train_loc = False
 61 | 
 62 |     def auto_loc(dmat: Union[np.ndarray, dask.array.core.Array]) -> str:
 63 |         """
 64 |         Checks if dmat is one-hot encoded and returns 'closed_form' if so, else 'standard'
 65 | 
 66 |         :param dmat The design matrix to check.
 67 |         """
 68 |         unique_params = np.unique(dmat)
 69 |         if isinstance(unique_params, dask.array.core.Array):
 70 |             unique_params = unique_params.compute()
 71 |         if len(unique_params) == 2 and unique_params[0] == 0.0 and unique_params[1] == 1.0:
 72 |             return "closed_form"
 73 |         logger.warning(
 74 |             (
 75 |                 "Cannot use 'closed_form' init for loc model: "
 76 |                 "design_loc is not one-hot encoded. Falling back to standard initialization."
 77 |             )
 78 |         )
 79 |         return "standard"
 80 | 
 81 |     groupwise_means = None
 82 | 
 83 |     init_location_str = init_location.lower()
 84 |     # Chose option if auto was chosen
 85 |     if init_location_str == "auto":
 86 | 
 87 |         init_location_str = auto_loc(model.design_loc)
 88 | 
 89 |     if init_location_str == "closed_form":
 90 |         groupwise_means, init_theta_location, rmsd_a = closedform_poisson_glm_loglam(
 91 |             x=model.x,
 92 |             design_loc=model.design_loc,
 93 |             constraints_loc=model.constraints_loc,
 94 |             size_factors=model.size_factors,
 95 |             link_fn=lambda lam: np.log(lam + np.nextafter(0, 1, dtype=lam.dtype)),
 96 |         )
 97 |         # train mu, if the closed-form solution is inaccurate
 98 |         train_loc = not (np.all(np.abs(rmsd_a) < 1e-20) or rmsd_a.size == 0)
 99 |         if model.size_factors is not None:
100 |             if np.any(model.size_factors != 1):
101 |                 train_loc = True
102 | 
103 |     elif init_location_str == "standard":
104 |         overall_means = np.mean(model.x, axis=0)  # directly calculate the mean
105 |         init_theta_location = np.zeros([model.num_loc_params, model.num_features])
106 |         init_theta_location[0, :] = np.log(overall_means)
107 |         train_loc = True
108 |     elif init_location_str == "all_zero":
109 |         init_theta_location = np.zeros([model.num_loc_params, model.num_features])
110 |         train_loc = True
111 |     else:
112 |         raise ValueError("init_location string %s not recognized" % init_location)
113 | 
114 |     # Scale is not used so just return init_theta_location for what would be init_theta_scale
115 |     return init_theta_location, init_theta_location, train_loc, True
116 | 


--------------------------------------------------------------------------------
/batchglm/pkg_constants.py:
--------------------------------------------------------------------------------
 1 | import multiprocessing
 2 | import os
 3 | 
 4 | ACCURACY_MARGIN_RELATIVE_TO_LIMIT = float(os.environ.get("BATCHGLM_ACCURACY_MARGIN", 2.5))
 5 | FIM_MODE = str(os.environ.get("FIM_MODE", "analytic"))
 6 | HESSIAN_MODE = str(os.environ.get("HESSIAN_MODE", "analytic"))
 7 | JACOBIAN_MODE = str(os.environ.get("JACOBIAN_MODE", "analytic"))
 8 | CHOLESKY_LSTSQS = False
 9 | CHOLESKY_LSTSQS_BATCHED = False
10 | EVAL_ON_BATCHED = False
11 | 
12 | # Trust region hyper parameters:
13 | TRUST_REGION_RADIUS_INIT = 100.0
14 | TRUST_REGION_ETA0 = 0.0
15 | TRUST_REGION_ETA1 = 0.25
16 | TRUST_REGION_ETA2 = 0.25
17 | TRUST_REGION_T1 = 0.5  # Fast collapse to avoid trailing.
18 | TRUST_REGION_T2 = 1.5  # Allow expansion if not shrinking.
19 | TRUST_REGION_UPPER_BOUND = 1e5
20 | 
21 | TRUST_REGIONT_T1_IRLS_GD_TR_SCALE = 1
22 | 
23 | # Convergence hyper-parameters:
24 | LLTOL_BY_FEATURE = 1e-10
25 | XTOL_BY_FEATURE_LOC = 1e-8
26 | XTOL_BY_FEATURE_SCALE = 1e-6
27 | GTOL_BY_FEATURE_LOC = 1e-8
28 | GTOL_BY_FEATURE_SCALE = 1e-8
29 | 


--------------------------------------------------------------------------------
/batchglm/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theislab/batchglm/b893fd0ce020669ff38583e4ec135b10926093ae/batchglm/py.typed


--------------------------------------------------------------------------------
/batchglm/train/__init__.py:
--------------------------------------------------------------------------------
1 | from . import numpy
2 | 


--------------------------------------------------------------------------------
/batchglm/train/base/__init__.py:
--------------------------------------------------------------------------------
1 | from .estimator import BaseEstimatorGlm
2 | from .model_container import BaseModelContainer
3 | 


--------------------------------------------------------------------------------
/batchglm/train/base/estimator.py:
--------------------------------------------------------------------------------
 1 | import abc
 2 | 
 3 | 
 4 | class BaseEstimatorGlm(metaclass=abc.ABCMeta):
 5 |     @abc.abstractmethod
 6 |     def model_container(self):
 7 |         pass
 8 | 
 9 |     @abc.abstractmethod
10 |     def initialize(self):
11 |         pass
12 | 
13 |     @abc.abstractmethod
14 |     def train_sequence(self):
15 |         pass
16 | 
17 |     @abc.abstractmethod
18 |     def finalize(self):
19 |         pass
20 | 


--------------------------------------------------------------------------------
/batchglm/train/base/model_container.py:
--------------------------------------------------------------------------------
 1 | import abc
 2 | 
 3 | from ...models.base_glm import ModelGLM
 4 | 
 5 | 
 6 | class BaseModelContainer(metaclass=abc.ABCMeta):
 7 |     @abc.abstractmethod
 8 |     def error_codes(self):
 9 |         pass
10 | 
11 |     @abc.abstractmethod
12 |     def niter(self):
13 |         pass
14 | 
15 |     @abc.abstractmethod
16 |     def ll(self):
17 |         pass
18 | 
19 |     @abc.abstractmethod
20 |     def jac(self):
21 |         pass
22 | 
23 |     @abc.abstractmethod
24 |     def hessian(self):
25 |         pass
26 | 
27 |     @abc.abstractmethod
28 |     def fisher_inv(self):
29 |         pass
30 | 
31 |     @property
32 |     @abc.abstractmethod
33 |     def theta_location(self):
34 |         pass
35 | 
36 |     @property
37 |     @abc.abstractmethod
38 |     def model(self) -> ModelGLM:
39 |         pass
40 | 
41 |     def theta_location_constrained(self):
42 |         pass
43 | 
44 |     def theta_scale_constrained(self):
45 |         pass
46 | 


--------------------------------------------------------------------------------
/batchglm/train/numpy/__init__.py:
--------------------------------------------------------------------------------
1 | from . import glm_nb, glm_poisson
2 | 


--------------------------------------------------------------------------------
/batchglm/train/numpy/base_glm/__init__.py:
--------------------------------------------------------------------------------
1 | from .estimator import EstimatorGlm
2 | from .model_container import NumpyModelContainer
3 | 


--------------------------------------------------------------------------------
/batchglm/train/numpy/base_glm/external.py:
--------------------------------------------------------------------------------
1 | from batchglm import pkg_constants
2 | from batchglm.models.base_glm import InputDataGLM, ModelGLM
3 | from batchglm.train.base import BaseEstimatorGlm, BaseModelContainer
4 | from batchglm.utils.data import dask_compute
5 | from batchglm.utils.linalg import groupwise_solve_lm
6 | 


--------------------------------------------------------------------------------
/batchglm/train/numpy/base_glm/model_container.py:
--------------------------------------------------------------------------------
  1 | import abc
  2 | from typing import Union
  3 | 
  4 | import dask.array
  5 | import numpy as np
  6 | 
  7 | from .external import BaseModelContainer, ModelGLM, dask_compute
  8 | 
  9 | 
 10 | class NumpyModelContainer(BaseModelContainer):
 11 |     """
 12 |     Build variables to be optimized.
 13 | 
 14 |     Attributes
 15 |     ----------
 16 |     theta_location : np.ndarray
 17 |         Location model parameters
 18 |     theta_scale : np.ndarray
 19 |         Scale model parameters
 20 |     converged : np.ndarray
 21 |         Whether or not given parameters are converged
 22 |     params : Union[np.ndarray, dask.array.core.Array]
 23 |         Model parameters
 24 |     converged : np.ndarray
 25 |         Whether or not a parameter has converged
 26 |     idx_train_loc : np.ndarray
 27 |         Training indices for location model
 28 |     idx_train_scale : np.ndarray
 29 |         Training indices for scale model
 30 |     npar_location : int
 31 |         number of location parameters
 32 |     dtype : str
 33 |         data type to be used
 34 |     """
 35 | 
 36 |     params: Union[np.ndarray, dask.array.core.Array]
 37 |     converged: np.ndarray
 38 |     npar_location: int
 39 |     dtype: str
 40 | 
 41 |     def __init__(
 42 |         self,
 43 |         model: ModelGLM,
 44 |         init_theta_location: Union[np.ndarray, dask.array.core.Array],
 45 |         init_theta_scale: Union[np.ndarray, dask.array.core.Array],
 46 |         chunk_size_genes: int,
 47 |         dtype: str,
 48 |     ):
 49 |         """
 50 |         :param init_location:
 51 |             Initialisation for all parameters of mean model. (mean model size x features)
 52 |         :param init_scale:
 53 |             Initialisation for all parameters of dispersion model. (dispersion model size x features)
 54 |         :param chunk_size_genes:
 55 |             chunk size for dask
 56 |         :param dtype:
 57 |             Precision used in tensorflow.
 58 |         """
 59 | 
 60 |         self._model = model
 61 |         init_theta_location_clipped = model.np_clip_param(
 62 |             np.asarray(init_theta_location, dtype=dtype), "theta_location"
 63 |         )
 64 |         init_theta_scale_clipped = model.np_clip_param(np.asarray(init_theta_scale, dtype=dtype), "theta_scale")
 65 |         self.params = dask.array.from_array(
 66 |             np.concatenate(
 67 |                 [
 68 |                     init_theta_location_clipped,
 69 |                     init_theta_scale_clipped,
 70 |                 ],
 71 |                 axis=0,
 72 |             ),
 73 |             chunks=(1000, chunk_size_genes),
 74 |         )
 75 |         self.npar_location = init_theta_location_clipped.shape[0]
 76 | 
 77 |         # Properties to follow gene-wise convergence.
 78 |         self.converged = np.repeat(a=False, repeats=self.params.shape[1])  # Initialise to non-converged.
 79 | 
 80 |         self.dtype = dtype
 81 |         self.idx_train_loc = np.arange(0, init_theta_location.shape[0])
 82 |         self.idx_train_scale = np.arange(
 83 |             init_theta_location.shape[0], init_theta_location.shape[0] + init_theta_scale.shape[0]
 84 |         )
 85 | 
 86 |         # overriding the location and scale parameter by referencing the getter functions within the properties.
 87 |         self._model._theta_location_getter = self._theta_location_getter
 88 |         self._model._theta_scale_getter = self._theta_scale_getter
 89 | 
 90 |     # Is this actually used in diffxpy? Why?
 91 |     @property
 92 |     def niter(self):
 93 |         return None
 94 | 
 95 |     # Is this actually used in diffxpy? Why?
 96 |     @property
 97 |     def error_codes(self):
 98 |         return np.array()
 99 | 
100 |     @property
101 |     def model(self) -> ModelGLM:
102 |         return self._model
103 | 
104 |     @property
105 |     def fisher_inv(self) -> np.ndarray:
106 |         return self._fisher_inv
107 | 
108 |     def _theta_location_getter(self) -> dask.array.core.Array:
109 |         theta_location = self.params[0 : self.npar_location]
110 |         return self.np_clip_param(theta_location, "theta_location")
111 | 
112 |     def _theta_scale_getter(self) -> dask.array.core.Array:
113 |         theta_scale = self.params[self.npar_location :]
114 |         return self.np_clip_param(theta_scale, "theta_scale")
115 | 
116 |     def __getattr__(self, attr: str):
117 |         if attr.startswith("__") and attr.endswith("__"):
118 |             raise AttributeError()
119 |         return self.model.__getattribute__(attr)
120 | 
121 |     @property
122 |     def idx_not_converged(self) -> np.ndarray:
123 |         """Find which features are not converged"""
124 |         return np.where(np.logical_not(self.converged))[0]
125 | 
126 |     @property
127 |     def theta_location(self) -> dask.array.core.Array:
128 |         """Location parameters"""
129 |         return self._theta_location_getter()
130 | 
131 |     @theta_location.setter
132 |     def theta_location(self, value: Union[np.ndarray, dask.array.core.Array]):
133 |         # Threshold new entry:
134 |         value = self.np_clip_param(value, "theta_location")
135 |         # Write either new dask array or into numpy array:
136 |         if isinstance(self.params, dask.array.core.Array):
137 |             temp = self.params.compute()
138 |             temp[0 : self.npar_location] = value
139 |             self.params = dask.array.from_array(temp, chunks=self.params.chunksize)
140 |         else:
141 |             self.params[0 : self.npar_location] = value
142 | 
143 |     @property
144 |     def theta_scale(self) -> dask.array.core.Array:
145 |         """Scale parameters"""
146 |         return self._theta_scale_getter()
147 | 
148 |     @theta_scale.setter
149 |     def theta_scale(self, value: Union[np.ndarray, dask.array.core.Array]):
150 |         # Threshold new entry:
151 |         value = self.np_clip_param(value, "theta_scale")
152 |         # Write either new dask array or into numpy array:
153 |         if isinstance(self.params, dask.array.core.Array):
154 |             temp = self.params.compute()
155 |             temp[self.npar_location :] = value
156 |             self.params = dask.array.from_array(temp, chunks=self.params.chunksize)
157 |         else:
158 |             self.params[self.npar_location :] = value
159 | 
160 |     @property
161 |     def theta_location_constrained(self) -> Union[np.ndarray, dask.array.core.Array]:
162 |         """dot product of location constraints with location parameter giving new constrained parameters"""
163 |         return np.dot(self.constraints_loc, self.theta_location)
164 | 
165 |     @property
166 |     def theta_scale_constrained(self) -> Union[np.ndarray, dask.array.core.Array]:
167 |         """dot product of scale constraints with scale parameter giving new constrained parameters"""
168 |         return np.dot(self.constraints_scale, self.theta_scale)
169 | 
170 |     def theta_scale_j(self, j: Union[int, np.ndarray]) -> dask.array.core.Array:
171 |         if isinstance(j, int) or isinstance(j, np.int32) or isinstance(j, np.int64):
172 |             j = np.full(1, j)
173 |         return self.np_clip_param(self.params[self.npar_location :, j], "theta_scale")
174 | 
175 |     def theta_scale_j_setter(self, value: Union[np.ndarray, dask.array.core.Array], j: Union[int, np.ndarray]):
176 |         """Setter ofr a specific theta_scale value."""
177 |         # Threshold new entry:
178 |         value = self.np_clip_param(value, "theta_scale")
179 |         # Write either new dask array or into numpy array:
180 |         if isinstance(self.params, dask.array.core.Array):
181 |             temp = self.params.compute()
182 |             temp[self.npar_location :, j] = value
183 |             self.params = dask.array.from_array(temp, chunks=self.params.chunksize)
184 |         else:
185 |             self.params[self.npar_location :, j] = value
186 | 
187 |     # jacobians
188 | 
189 |     @abc.abstractmethod
190 |     def jac_weight(self) -> Union[np.ndarray, dask.array.core.Array]:
191 |         pass
192 | 
193 |     @abc.abstractmethod
194 |     def jac_weight_j(self, j: Union[int, np.ndarray]) -> Union[np.ndarray, dask.array.core.Array]:
195 |         pass
196 | 
197 |     @property
198 |     def jac(self) -> Union[np.ndarray, dask.array.core.Array]:
199 |         return np.concatenate([self.jac_location, self.jac_scale], axis=-1)
200 | 
201 |     @property
202 |     def jac_location(self) -> Union[np.ndarray, dask.array.core.Array]:
203 |         """
204 |         Location jacobian.
205 |         :return: (features x inferred param)
206 |         """
207 |         w = self.fim_weight_location_location  # (observations x features)
208 |         ybar = self.ybar  # (observations x features)
209 |         xh = self.xh_loc  # (observations x inferred param)
210 |         inner = np.einsum("ob,of->fob", xh, w)
211 |         return np.einsum("fob,of->fb", inner, ybar)
212 | 
213 |     def jac_location_j(self, j: Union[int, np.ndarray]) -> Union[np.ndarray, dask.array.core.Array]:
214 |         """
215 |         Location jacobian indexed by j, the dependent variable of interest.
216 |         :return: (features x inferred param)
217 |         """
218 |         # Make sure that dimensionality of sliced array is kept:
219 |         if isinstance(j, int) or isinstance(j, np.int32) or isinstance(j, np.int64):
220 |             j = np.full(1, j)
221 |         w = self.fim_weight_location_location_j(j=j)  # (observations x features)
222 |         ybar = self.ybar_j(j=j)  # (observations x features)
223 |         xh = self.xh_loc  # (observations x inferred param)
224 |         return np.einsum("fob,of->fb", np.einsum("ob,of->fob", xh, w), ybar)
225 | 
226 |     @property
227 |     def jac_scale(self) -> Union[np.ndarray, dask.array.core.Array]:
228 |         """
229 | 
230 |         :return: (features x inferred param)
231 |         """
232 |         w = self.jac_weight_scale  # (observations x features)
233 |         xh = self.xh_scale  # (observations x inferred param)
234 |         return w.transpose() @ xh
235 | 
236 |     @dask_compute
237 |     def jac_scale_j(self, j: Union[int, np.ndarray]) -> Union[np.ndarray, dask.array.core.Array]:
238 |         """
239 | 
240 |         :return: (features x inferred param)
241 |         """
242 |         # Make sure that dimensionality of sliced array is kept:
243 |         if isinstance(j, int) or isinstance(j, np.int32) or isinstance(j, np.int64):
244 |             j = np.full(1, j)
245 |         w = self.jac_weight_scale_j(j=j)  # (observations x features)
246 |         xh = self.xh_scale  # (observations x inferred param)
247 |         return w.transpose() @ xh
248 | 
249 |     @abc.abstractmethod
250 |     def jac_weight_scale_j(self, j: Union[int, np.ndarray]) -> Union[np.ndarray, dask.array.core.Array]:
251 |         pass
252 | 
253 |     # hessians
254 | 
255 |     @property
256 |     @abc.abstractmethod
257 |     def hessian_weight_location_location(self) -> Union[np.ndarray, dask.array.core.Array]:
258 |         pass
259 | 
260 |     @property
261 |     def hessian_location_location(self) -> Union[np.ndarray, dask.array.core.Array]:
262 |         """
263 |         :return: (features x inferred param x inferred param)
264 |         """
265 |         w = self.hessian_weight_location_location
266 |         xh = self.xh_loc
267 |         return np.einsum("fob,oc->fbc", np.einsum("ob,of->fob", xh, w), xh)
268 | 
269 |     @property
270 |     @abc.abstractmethod
271 |     def hessian_weight_location_scale(self) -> Union[np.ndarray, dask.array.core.Array]:
272 |         pass
273 | 
274 |     @property
275 |     def hessian_location_scale(self) -> Union[np.ndarray, dask.array.core.Array]:
276 |         """
277 | 
278 |         :return: (features x inferred param x inferred param)
279 |         """
280 |         w = self.hessian_weight_location_scale
281 |         return np.einsum("fob,oc->fbc", np.einsum("ob,of->fob", self.xh_loc, w), self.xh_scale)
282 | 
283 |     @property
284 |     @abc.abstractmethod
285 |     def hessian_weight_scale_scale(self) -> Union[np.ndarray, dask.array.core.Array]:
286 |         pass
287 | 
288 |     @property
289 |     def hessian_scale_scale(self) -> Union[np.ndarray, dask.array.core.Array]:
290 |         """
291 | 
292 |         :return: (features x inferred param x inferred param)
293 |         """
294 |         w = self.hessian_weight_scale_scale
295 |         xh = self.xh_scale
296 |         return np.einsum("fob,oc->fbc", np.einsum("ob,of->fob", xh, w), xh)
297 | 
298 |     @property
299 |     def hessian(self) -> Union[np.ndarray, dask.array.core.Array]:
300 |         """
301 | 
302 |         :return: (features x inferred param x inferred param)
303 |         """
304 |         h_aa = self.hessian_location_location
305 |         h_bb = self.hessian_scale_scale
306 |         h_ab = self.hessian_location_scale
307 |         h_ba = np.transpose(h_ab, axes=[0, 2, 1])
308 |         return np.concatenate([np.concatenate([h_aa, h_ab], axis=2), np.concatenate([h_ba, h_bb], axis=2)], axis=1)
309 | 
310 |     # fim
311 | 
312 |     @abc.abstractmethod
313 |     def fim_weight_location_location_j(self, j: Union[int, np.ndarray]) -> Union[np.ndarray, dask.array.core.Array]:
314 |         pass
315 | 
316 |     @property
317 |     def fim_location_location(self) -> Union[np.ndarray, dask.array.core.Array]:
318 |         """
319 |         Location-location coefficient block of FIM
320 | 
321 |         :return: (features x inferred param x inferred param)
322 |         """
323 |         w = self.fim_weight_location_location  # (observations x features)
324 |         # constraints: (observed param x inferred param)
325 |         # design: (observations x observed param)
326 |         # w: (observations x features)
327 |         # fim: (features x inferred param x inferred param)
328 |         xh = self.xh_loc
329 |         return np.einsum("fob,oc->fbc", np.einsum("ob,of->fob", xh, w), xh)
330 | 
331 |     @property
332 |     @abc.abstractmethod
333 |     def fim_location_scale(self) -> np.ndarray:
334 |         pass
335 | 
336 |     @property
337 |     def fim_scale_scale(self) -> Union[np.ndarray, dask.array.core.Array]:
338 |         pass
339 | 
340 |     @property
341 |     def fim(self) -> Union[np.ndarray, dask.array.core.Array]:
342 |         """
343 |         Full FIM
344 | 
345 |         :return: (features x inferred param x inferred param)
346 |         """
347 |         fim_location_location = self.fim_location_location
348 |         fim_scale_scale = self.fim_scale_scale
349 |         fim_location_scale = self.fim_location_scale
350 |         fim_ba = np.transpose(fim_location_scale, axes=[0, 2, 1])
351 |         return np.concatenate(
352 |             [
353 |                 np.concatenate([fim_location_location, fim_location_scale], axis=2),
354 |                 np.concatenate([fim_ba, fim_scale_scale], axis=2),
355 |             ],
356 |             axis=1,
357 |         )
358 | 
359 |     @abc.abstractmethod
360 |     def fim_weight(self) -> Union[np.ndarray, dask.array.core.Array]:
361 |         pass
362 | 
363 |     @property
364 |     @abc.abstractmethod
365 |     def fim_weight_location_location(self) -> Union[np.ndarray, dask.array.core.Array]:
366 |         """
367 |         This is exactly W in (11) and in equation (7) as well and will be used as such in the
368 |         calculation of the Jacobian.
369 |         """
370 |         pass
371 | 
372 |     @property
373 |     @abc.abstractmethod
374 |     def ll(self) -> Union[np.ndarray, dask.array.core.Array]:
375 |         pass
376 | 
377 |     @abc.abstractmethod
378 |     def ll_j(self, j: Union[int, np.ndarray]) -> Union[np.ndarray, dask.array.core.Array]:
379 |         pass
380 | 
381 |     @property  # type: ignore
382 |     @dask_compute
383 |     def ll_byfeature(self) -> np.ndarray:
384 |         return np.sum(self.ll, axis=0)
385 | 
386 |     @dask_compute
387 |     def ll_byfeature_j(self, j: Union[int, np.ndarray]) -> np.ndarray:
388 |         return np.sum(self.ll_j(j=j), axis=0)
389 | 
390 |     @property
391 |     @abc.abstractmethod
392 |     def ybar(self) -> Union[np.ndarray, dask.array.core.Array]:
393 |         """
394 |         This is Z in equation (8).
395 |         """
396 |         pass
397 | 
398 |     @abc.abstractmethod
399 |     def ybar_j(self, j: Union[int, np.ndarray]) -> Union[np.ndarray, dask.array.core.Array]:
400 |         """
401 |         This is Z in equation (8) indexed by j i.e the dependent variable of interest.
402 |         """
403 |         pass
404 | 


--------------------------------------------------------------------------------
/batchglm/train/numpy/base_glm/training_strategies.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum
 2 | 
 3 | 
 4 | class TrainingStrategies(Enum):
 5 | 
 6 |     AUTO = None
 7 |     DEFAULT = [
 8 |         {
 9 |             "max_steps": 1000,
10 |             "method_scale": "brent",
11 |             "update_scale_freq": 5,
12 |             "ftol_scale": 1e-6,
13 |             "max_iter_scale": 1000,
14 |         },
15 |     ]
16 |     GD = [
17 |         {"max_steps": 1000, "method_scale": "gd", "update_scale_freq": 5, "ftol_scale": 1e-6, "max_iter_scale": 100},
18 |     ]
19 | 


--------------------------------------------------------------------------------
/batchglm/train/numpy/glm_nb/__init__.py:
--------------------------------------------------------------------------------
1 | from .estimator import Estimator
2 | from .model_container import ModelContainer
3 | 


--------------------------------------------------------------------------------
/batchglm/train/numpy/glm_nb/estimator.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | from typing import Optional, Tuple, Union
 3 | 
 4 | import numpy as np
 5 | 
 6 | from .external import EstimatorGlm, Model, init_par
 7 | from .model_container import ModelContainer
 8 | 
 9 | 
10 | class Estimator(EstimatorGlm):
11 |     """
12 |     Estimator for Generalized Linear Models (GLMs) with negative binomial noise.
13 |     Uses the natural logarithm as linker function.
14 | 
15 |     Attributes
16 |     ----------
17 |     model_vars : ModelVars
18 |         model variables
19 |     """
20 | 
21 |     def __init__(
22 |         self,
23 |         model: Model,
24 |         init_location: str = "AUTO",
25 |         init_scale: str = "AUTO",
26 |         # batch_size: Optional[Union[Tuple[int, int], int]] = None,
27 |         quick_scale: bool = False,
28 |         dtype: str = "float64",
29 |     ):
30 |         """
31 |         Performs initialisation and creates a new estimator.
32 | 
33 |         :param init_location: (Optional)
34 |             Low-level initial values for a. Can be:
35 | 
36 |             - str:
37 |                 * "auto": automatically choose best initialization
38 |                 * "random": initialize with random values
39 |                 * "standard": initialize intercept with observed mean
40 |                 * "init_model": initialize with another model (see `ìnit_model` parameter)
41 |                 * "closed_form": try to initialize with closed form
42 |             - np.ndarray: direct initialization of 'a'
43 |         :param init_scale: (Optional)
44 |             Low-level initial values for b. Can be:
45 | 
46 |             - str:
47 |                 * "auto": automatically choose best initialization
48 |                 * "random": initialize with random values
49 |                 * "standard": initialize with zeros
50 |                 * "init_model": initialize with another model (see `ìnit_model` parameter)
51 |                 * "closed_form": try to initialize with closed form
52 |             - np.ndarray: direct initialization of 'b'
53 |         :param quick_scale: bool
54 |             Whether `scale` will be fitted faster and maybe less accurate.
55 |             Useful in scenarios where fitting the exact `scale` is not absolutely necessary.
56 |         :param dtype: Numerical precision.
57 |         """
58 |         init_theta_location, init_theta_scale, train_loc, train_scale = init_par(
59 |             model=model, init_location=init_location, init_scale=init_scale
60 |         )
61 |         self._train_loc = train_loc
62 |         self._train_scale = train_scale
63 |         if quick_scale:
64 |             self._train_scale = False
65 |         sys.stdout.write("training location model: %s\n" % str(self._train_loc))
66 |         sys.stdout.write("training scale model: %s\n" % str(self._train_scale))
67 |         init_theta_location = init_theta_location.astype(dtype)
68 |         init_theta_scale = init_theta_scale.astype(dtype)
69 | 
70 |         _model_container = ModelContainer(
71 |             model=model,
72 |             init_theta_location=init_theta_location,
73 |             init_theta_scale=init_theta_scale,
74 |             chunk_size_genes=model.chunk_size_genes,
75 |             dtype=dtype,
76 |         )
77 |         super(Estimator, self).__init__(model_container=_model_container, dtype=dtype)
78 | 


--------------------------------------------------------------------------------
/batchglm/train/numpy/glm_nb/external.py:
--------------------------------------------------------------------------------
 1 | import batchglm.utils.data as data_utils
 2 | from batchglm import pkg_constants
 3 | from batchglm.models.base_glm.utils import closedform_glm_mean, closedform_glm_scale
 4 | from batchglm.models.glm_nb.model import Model
 5 | from batchglm.models.glm_nb.utils import init_par
 6 | 
 7 | # import necessary base_glm layers
 8 | from batchglm.train.numpy.base_glm import EstimatorGlm, NumpyModelContainer
 9 | from batchglm.utils.data import dask_compute
10 | from batchglm.utils.linalg import groupwise_solve_lm
11 | 


--------------------------------------------------------------------------------
/batchglm/train/numpy/glm_nb/model_container.py:
--------------------------------------------------------------------------------
  1 | from typing import Callable, Union
  2 | 
  3 | import dask
  4 | import numpy as np
  5 | import scipy
  6 | 
  7 | from .external import NumpyModelContainer, dask_compute
  8 | 
  9 | 
 10 | class ModelContainer(NumpyModelContainer):
 11 |     @property
 12 |     def fim_weight(self):
 13 |         raise NotImplementedError("This method is currently unimplemented as it isn't used by any built-in procedures.")
 14 | 
 15 |     @property
 16 |     def fim_weight_location_location(self) -> Union[np.ndarray, dask.array.core.Array]:
 17 |         """
 18 |         Fisher inverse matrix weights
 19 |         :return: observations x features
 20 |         """
 21 |         return self.location * self.scale / (self.scale + self.location)
 22 | 
 23 |     def fim_weight_location_location_j(self, j) -> Union[np.ndarray, dask.array.core.Array]:
 24 |         """
 25 |         Fisher inverse matrix weights at j
 26 |         :return: observations x features
 27 |         """
 28 |         return self.location_j(j=j) * self.scale_j(j=j) / (self.scale_j(j=j) + self.location_j(j=j))
 29 | 
 30 |     @property
 31 |     def ybar(self) -> Union[np.ndarray, dask.array.core.Array]:
 32 |         """
 33 |         :return: observations x features
 34 |         """
 35 |         return np.asarray(self.x - self.location) / self.location
 36 | 
 37 |     def ybar_j(self, j: Union[int, np.ndarray]) -> Union[np.ndarray, dask.array.core.Array]:
 38 |         """
 39 |         :return: observations x features
 40 |         """
 41 |         # Make sure that dimensionality of sliced array is kept:
 42 |         if isinstance(j, int) or isinstance(j, np.int32) or isinstance(j, np.int64):
 43 |             j = np.full(1, j)
 44 |         if isinstance(self.x, np.ndarray) or isinstance(self.x, dask.array.core.Array):
 45 |             return (self.x[:, j] - self.location_j(j=j)) / self.location_j(j=j)
 46 |         else:
 47 |             return np.asarray(self.x[:, j] - self.location_j(j=j)) / self.location_j(j=j)
 48 | 
 49 |     @property
 50 |     def jac_weight(self):
 51 |         raise NotImplementedError("This method is currently unimplemented as it isn't used by any built-in procedures.")
 52 | 
 53 |     @property
 54 |     def jac_weight_j(self):
 55 |         raise NotImplementedError("This method is currently unimplemented as it isn't used by any built-in procedures.")
 56 | 
 57 |     @property
 58 |     def jac_weight_scale(self) -> Union[np.ndarray, dask.array.core.Array]:
 59 |         """
 60 |         Scale model jacobian
 61 |         :return: observations x features
 62 |         """
 63 |         scale = self.scale
 64 |         loc = self.location
 65 |         if isinstance(self.x, scipy.sparse.csr_matrix):
 66 |             scale_plus_x = np.asarray(scale + self.x)
 67 |         else:
 68 |             scale_plus_x = scale + self.x
 69 |         r_plus_mu = scale + loc
 70 | 
 71 |         # Define graphs for individual terms of constant term of hessian:
 72 |         const1 = scipy.special.digamma(scale_plus_x) - scipy.special.digamma(scale)
 73 |         const2 = -scale_plus_x / r_plus_mu
 74 |         const3 = np.log(scale) + np.ones_like(scale) - np.log(r_plus_mu)
 75 |         return scale * (const1 + const2 + const3)
 76 | 
 77 |     def jac_weight_scale_j(self, j: Union[int, np.ndarray]) -> Union[np.ndarray, dask.array.core.Array]:
 78 |         """
 79 |         Scale model jacobian at location j
 80 |         :param j: Location
 81 |         :return: observations x features
 82 |         """
 83 |         # Make sure that dimensionality of sliced array is kept:
 84 |         if isinstance(j, int) or isinstance(j, np.int32) or isinstance(j, np.int64):
 85 |             j = np.full(1, j)
 86 |         scale = self.scale_j(j=j)
 87 |         loc = self.location_j(j=j)
 88 |         if isinstance(self.x, scipy.sparse.csr_matrix):
 89 |             scale_plus_x = np.asarray(scale + self.x[:, j])
 90 |         else:
 91 |             scale_plus_x = scale + self.x[:, j]
 92 |         r_plus_mu = scale + loc
 93 | 
 94 |         # Define graphs for individual terms of constant term of hessian:
 95 |         const1 = scipy.special.digamma(scale_plus_x) - scipy.special.digamma(scale)
 96 |         const2 = -scale_plus_x / r_plus_mu
 97 |         const3 = np.log(scale) + np.ones_like(scale) - np.log(r_plus_mu)
 98 |         return scale * (const1 + const2 + const3)
 99 | 
100 |     @property
101 |     def fim_location_scale(self) -> np.ndarray:
102 |         """
103 |         Location-scale coefficient block of FIM
104 | 
105 |         The negative binomial model is not fit as whole with IRLS but only the location model.
106 |         The location model is conditioned on the scale model estimates, which is why we only
107 |         supply the FIM of the location model and return an empty FIM for scale model components.
108 |         Note that there is also no closed form FIM for the scale-scale block. Returning a zero-array
109 |         here leads to singular matrices for the whole location-scale FIM in some cases that throw
110 |         linear algebra errors when inverted.
111 | 
112 |         :return: (features x inferred param x inferred param)
113 |         """
114 |         return np.zeros([self.theta_scale.shape[1], 0, 0])
115 | 
116 |     @property
117 |     def fim_scale_scale(self) -> np.ndarray:
118 |         """
119 |         Scale-scale coefficient block of FIM
120 | 
121 |         The negative binomial model is not fit as whole with IRLS but only the location model.
122 |         The location model is conditioned on the scale model estimates, which is why we only
123 |         supply the FIM of the location model and return an empty FIM for scale model components.
124 |         Note that there is also no closed form FIM for the scale-scale block. Returning a zero-array
125 |         here leads to singular matrices for the whole location-scale FIM in some cases that throw
126 |         linear algebra errors when inverted.
127 | 
128 |         :return: (features x inferred param x inferred param)
129 |         """
130 |         return np.zeros([self.theta_scale.shape[1], 0, 0])
131 | 
132 |     @property
133 |     def hessian_weight_location_scale(self) -> np.ndarray:
134 |         """scale-location block of the hessian matrix"""
135 |         scale = self.scale
136 |         loc = self.location
137 |         return np.multiply(loc * scale, np.asarray(self.x - loc) / np.square(loc + scale))
138 | 
139 |     @property
140 |     def hessian_weight_location_location(self) -> np.ndarray:
141 |         """location-location block of the hessian matrix"""
142 |         scale = self.scale
143 |         loc = self.location
144 |         if isinstance(self.x, np.ndarray) or isinstance(self.x, dask.array.core.Array):
145 |             x_by_scale_plus_one = self.x / scale + np.ones_like(scale)
146 |         else:
147 |             x_by_scale_plus_one = np.asarray(self.x.divide(scale) + np.ones_like(scale))
148 | 
149 |         return -loc * x_by_scale_plus_one / np.square((loc / scale) + np.ones_like(loc))
150 | 
151 |     @property
152 |     def hessian_weight_scale_scale(self) -> np.ndarray:
153 |         """scale-scale block of the hessian matrix"""
154 |         scale = self.scale
155 |         loc = self.location
156 |         scale_plus_x = np.asarray(self.x + scale)
157 |         scale_plus_loc = scale + loc
158 |         # Define graphs for individual terms of constant term of hessian:
159 |         const1 = scipy.special.digamma(scale_plus_x) + scale * scipy.special.polygamma(n=1, x=scale_plus_x)
160 |         const2 = -scipy.special.digamma(scale) + scale * scipy.special.polygamma(n=1, x=scale)
161 |         const3 = -loc * scale_plus_x + np.ones_like(scale) * 2.0 * scale * scale_plus_loc / np.square(scale_plus_loc)
162 |         const4 = np.log(scale) + np.ones_like(scale) * 2.0 - np.log(scale_plus_loc)
163 |         return scale * (const1 + const2 + const3 + const4)
164 | 
165 |     @property
166 |     def ll(self) -> Union[np.ndarray, dask.array.core.Array]:
167 |         """log-likelihood"""
168 |         scale = self.scale
169 |         loc = self.location
170 |         log_r_plus_mu = np.log(scale + loc)
171 |         if isinstance(self.x, np.ndarray) or isinstance(self.x, dask.array.core.Array):
172 |             # dense numpy or dask
173 |             ll = (
174 |                 scipy.special.gammaln(scale + self.x)
175 |                 - scipy.special.gammaln(self.x + np.ones_like(scale))
176 |                 - scipy.special.gammaln(scale)
177 |                 + self.x * (self.eta_loc - log_r_plus_mu)
178 |                 + np.multiply(scale, self.eta_scale - log_r_plus_mu)
179 |             )
180 |         else:
181 |             # sparse scipy
182 |             ll = (
183 |                 scipy.special.gammaln(np.asarray(scale + self.x))
184 |                 - scipy.special.gammaln(self.x + np.ones_like(scale))
185 |                 - scipy.special.gammaln(scale)
186 |                 + np.asarray(
187 |                     self.x.multiply(self.eta_loc - log_r_plus_mu) + np.multiply(scale, self.eta_scale - log_r_plus_mu)
188 |                 )
189 |             )
190 |             ll = np.asarray(ll)
191 |         return self.np_clip_param(ll, "ll")
192 | 
193 |     def ll_j(self, j: Union[int, np.ndarray]) -> Union[np.ndarray, dask.array.core.Array]:
194 |         """
195 |         Log likelhiood for observation j
196 |         :param j: observation
197 |         """
198 |         # Make sure that dimensionality of sliced array is kept:
199 |         if isinstance(j, int) or isinstance(j, np.int32) or isinstance(j, np.int64):
200 |             j = np.full(1, j)
201 |         scale = self.scale_j(j=j)
202 |         loc = self.location_j(j=j)
203 |         log_r_plus_mu = np.log(scale + loc)
204 |         if isinstance(self.x, np.ndarray) or isinstance(self.x, dask.array.core.Array):
205 |             # dense numpy or dask
206 |             ll = (
207 |                 scipy.special.gammaln(scale + self.x[:, j])
208 |                 - scipy.special.gammaln(self.x[:, j] + np.ones_like(scale))
209 |                 - scipy.special.gammaln(scale)
210 |                 + self.x[:, j] * (self.eta_loc_j(j=j) - log_r_plus_mu)
211 |                 + np.multiply(scale, self.eta_scale_j(j=j) - log_r_plus_mu)
212 |             )
213 |         else:
214 |             # sparse scipy
215 |             ll = (
216 |                 scipy.special.gammaln(np.asarray(scale + self.x[:, j]))
217 |                 - scipy.special.gammaln(self.x + np.ones_like(scale))
218 |                 - scipy.special.gammaln(scale)
219 |                 + np.asarray(
220 |                     self.x[:, j].multiply(self.eta_loc_j(j=j) - log_r_plus_mu)
221 |                     + np.multiply(scale, self.eta_scale_j(j=j) - log_r_plus_mu)
222 |                 )
223 |             )
224 |             ll = np.asarray(ll)
225 |         return self.np_clip_param(ll, "ll")
226 | 
227 |     def ll_handle(self) -> Callable:
228 |         def fun(x, eta_loc, theta_scale, xh_scale):
229 |             eta_scale = np.matmul(xh_scale, theta_scale)
230 |             scale = np.exp(eta_scale)
231 |             loc = np.exp(eta_loc)
232 |             log_r_plus_mu = np.log(scale + loc)
233 |             if isinstance(x, np.ndarray) or isinstance(x, dask.array.core.Array):
234 |                 # dense numpy or dask
235 |                 ll = (
236 |                     scipy.special.gammaln(scale + x)
237 |                     - scipy.special.gammaln(x + np.ones_like(scale))
238 |                     - scipy.special.gammaln(scale)
239 |                     + x * (eta_loc - log_r_plus_mu)
240 |                     + np.multiply(scale, eta_scale - log_r_plus_mu)
241 |                 )
242 |             else:
243 |                 raise ValueError("type x %s not supported" % type(x))
244 |             return self.np_clip_param(ll, "ll")
245 | 
246 |         return fun
247 | 
248 |     def jac_scale_handle(self) -> Callable:
249 |         def fun(x, eta_loc, theta_scale, xh_scale):
250 |             scale = np.exp(theta_scale)
251 |             loc = np.exp(eta_loc)
252 |             scale_plus_x = scale + x
253 |             r_plus_mu = scale + loc
254 | 
255 |             # Define graphs for individual terms of constant term of hessian:
256 |             const1 = scipy.special.digamma(scale_plus_x) - scipy.special.digamma(scale)
257 |             const2 = -scale_plus_x / r_plus_mu
258 |             const3 = np.log(scale) + np.ones_like(scale) - np.log(r_plus_mu)
259 |             return scale * (const1 + const2 + const3)
260 | 
261 |         return fun
262 | 


--------------------------------------------------------------------------------
/batchglm/train/numpy/glm_norm/__init__.py:
--------------------------------------------------------------------------------
1 | from .estimator import Estimator
2 | from .model_container import ModelContainer
3 | 


--------------------------------------------------------------------------------
/batchglm/train/numpy/glm_norm/estimator.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | import numpy as np
 4 | 
 5 | from .external import EstimatorGlm, Model, init_par
 6 | from .model_container import ModelContainer
 7 | 
 8 | logger = logging.getLogger("batchglm")
 9 | 
10 | 
11 | class Estimator(EstimatorGlm):
12 |     def __init__(
13 |         self,
14 |         model: Model,
15 |         init_location: str = "AUTO",
16 |         init_scale: str = "AUTO",
17 |         # batch_size: Optional[Union[Tuple[int, int], int]] = None,
18 |         quick_scale: bool = False,
19 |         dtype: str = "float64",
20 |     ):
21 |         """
22 |         Performs initialisation and creates a new estimator.
23 |         :param model:
24 |             The GLM model to be fit
25 |         :param init_location: (Optional)
26 |             Low-level initial values for a. Can be:
27 | 
28 |             - str:
29 |                 * "auto": automatically choose best initialization
30 |                 * "standard": initialize intercept with observed mean
31 |                 * "closed_form": try to initialize with closed form
32 |             - np.ndarray: direct initialization of 'a'
33 |         :param init_scale: (Optional)
34 |             Low-level initial values for b. Can be:
35 | 
36 |             - str:
37 |                 * "auto": automatically choose best initialization
38 |                 * "random": initialize with random values
39 |                 * "standard": initialize with zeros
40 |                 * "closed_form": try to initialize with closed form
41 |             - np.ndarray: direct initialization of 'b'
42 |         :param quick_scale: bool
43 |             Whether `scale` will be fitted faster and maybe less accurate.
44 |             Useful in scenarios where fitting the exact `scale` is not absolutely necessary.
45 |         :param dtype: Numerical precision.
46 |         """
47 |         init_theta_location, init_theta_scale, train_loc, train_scale = init_par(
48 |             model=model, init_location=init_location, init_scale=init_scale
49 |         )
50 |         init_theta_location = init_theta_location.astype(dtype)
51 |         init_theta_scale = init_theta_scale.astype(dtype)
52 |         self._train_scale = train_scale
53 |         self._train_loc = train_loc
54 |         if quick_scale:
55 |             self._train_scale = False
56 |         _model_container = ModelContainer(
57 |             model=model,
58 |             init_theta_location=init_theta_location,
59 |             init_theta_scale=init_theta_scale,
60 |             chunk_size_genes=model.chunk_size_genes,
61 |             dtype=dtype,
62 |         )
63 |         super(Estimator, self).__init__(model_container=_model_container, dtype=dtype)
64 | 
65 |     def train(
66 |         self,
67 |         **kwargs,
68 |     ):
69 |         model = self._model_container.model
70 |         if self._train_loc:
71 |             theta_location, _, _, _ = np.linalg.lstsq(model.design_loc, model.x)
72 |             self._model_container.theta_location = theta_location
73 |         self._train_loc = False
74 |         super().train(**kwargs)
75 |         self._train_loc = True
76 | 


--------------------------------------------------------------------------------
/batchglm/train/numpy/glm_norm/external.py:
--------------------------------------------------------------------------------
 1 | import batchglm.utils.data as data_utils
 2 | from batchglm import pkg_constants
 3 | from batchglm.models.base_glm.utils import closedform_glm_scale
 4 | from batchglm.models.glm_norm.model import Model
 5 | from batchglm.models.glm_norm.utils import closedform_norm_glm_logsd, init_par
 6 | 
 7 | # import necessary base_glm layers
 8 | from batchglm.train.numpy.base_glm import EstimatorGlm, NumpyModelContainer
 9 | from batchglm.utils.linalg import groupwise_solve_lm
10 | 


--------------------------------------------------------------------------------
/batchglm/train/numpy/glm_norm/model_container.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | from typing import Callable, Union
  3 | 
  4 | import dask
  5 | import numpy as np
  6 | 
  7 | from .external import NumpyModelContainer
  8 | 
  9 | 
 10 | def ll(scale, loc, x):
 11 |     resid = loc - x
 12 |     ll = -0.5 * np.log(2 * math.pi) - np.log(scale) - 0.5 * np.power(resid / scale, 2)
 13 |     return ll
 14 | 
 15 | 
 16 | class ModelContainer(NumpyModelContainer):
 17 |     @property
 18 |     def fim_weight(self):
 19 |         raise NotImplementedError("This method is currently unimplemented as it isn't used by any built-in procedures.")
 20 | 
 21 |     @property
 22 |     def jac_weight(self):
 23 |         raise NotImplementedError("This method is currently unimplemented as it isn't used by any built-in procedures.")
 24 | 
 25 |     @property
 26 |     def jac_weight_j(self):
 27 |         raise NotImplementedError("This method is currently unimplemented as it isn't used by any built-in procedures.")
 28 | 
 29 |     @property
 30 |     def ybar(self) -> Union[np.ndarray, dask.array.core.Array]:
 31 |         """
 32 |         :return: observations x features
 33 |         """
 34 |         return np.asarray(self.x - self.location)
 35 | 
 36 |     def ybar_j(self, j: Union[int, np.ndarray]) -> Union[np.ndarray, dask.array.core.Array]:
 37 |         """
 38 |         :return: observations x features
 39 |         """
 40 |         # Make sure that dimensionality of sliced array is kept:
 41 |         if isinstance(j, int) or isinstance(j, np.int32) or isinstance(j, np.int64):
 42 |             j = np.full(1, j)
 43 |         if isinstance(self.x, np.ndarray) or isinstance(self.x, dask.array.core.Array):
 44 |             return (self.x[:, j] - self.location_j(j=j)) / self.location_j(j=j)
 45 |         else:
 46 |             return np.asarray(self.x[:, j] - self.location_j(j=j))
 47 | 
 48 |     @property
 49 |     def fim_weight_location_location(self) -> Union[np.ndarray, dask.array.core.Array]:
 50 |         return 1 / np.power(self.scale, 2)
 51 | 
 52 |     def fim_weight_location_location_j(self, j: Union[int, np.ndarray]) -> Union[np.ndarray, dask.array.core.Array]:
 53 |         return 1 / (self.scale_j(j=j) * self.scale_j(j=j))
 54 | 
 55 |     @property
 56 |     def jac_weight_scale(self) -> Union[np.ndarray, dask.array.core.Array]:
 57 |         return -np.ones_like(self.x) - np.power((self.x - self.location) / self.scale, 2)
 58 | 
 59 |     def jac_weight_scale_j(self, j: Union[int, np.ndarray]) -> Union[np.ndarray, dask.array.core.Array]:
 60 |         return -np.ones_like(self.x[:, j]) - np.power((self.x[:, j] - self.location_j(j=j)) / self.scale_j(j=j), 2)
 61 | 
 62 |     @property
 63 |     def fim_location_scale(self) -> np.ndarray:
 64 |         return np.zeros([self.model.x.shape[1], self.theta_location.shape[0], self.theta_scale.shape[0]])
 65 | 
 66 |     @property
 67 |     def fim_weight_scale_scale(self) -> np.ndarray:
 68 |         return np.full(self.scale.shape, 2)
 69 | 
 70 |     @property
 71 |     def fim_scale_scale(self) -> Union[np.ndarray, dask.array.core.Array]:
 72 |         """
 73 | 
 74 |         :return: (features x inferred param x inferred param)
 75 |         """
 76 |         w = self.fim_weight_scale_scale
 77 |         xh = self.xh_scale
 78 |         return np.einsum("fob,oc->fbc", np.einsum("ob,of->fob", xh, w), xh)
 79 | 
 80 |     @property
 81 |     def hessian_weight_location_scale(self) -> np.ndarray:
 82 |         scale = self.scale
 83 |         loc = self.location
 84 |         return (2 / np.power(scale, 2)) * (self.x - loc)
 85 | 
 86 |     @property
 87 |     def hessian_weight_location_location(self) -> np.ndarray:
 88 |         scale = self.scale
 89 |         return -1 / np.power(scale, 2)
 90 | 
 91 |     @property
 92 |     def hessian_weight_scale_scale(self) -> np.ndarray:
 93 |         scale = self.scale
 94 |         loc = self.location
 95 |         return (2 / np.power(scale, 2)) * np.power(self.x - loc, 2)
 96 | 
 97 |     @property
 98 |     def ll(self) -> Union[np.ndarray, dask.array.core.Array]:
 99 |         loc = self.location
100 |         scale = self.scale
101 |         x = self.model.x
102 |         return np.asarray(ll(scale, loc, x))
103 | 
104 |     def ll_j(self, j: Union[int, np.ndarray]) -> Union[np.ndarray, dask.array.core.Array]:
105 |         # Make sure that dimensionality of sliced array is kept:
106 |         if isinstance(j, int) or isinstance(j, np.int32) or isinstance(j, np.int64):
107 |             j = np.full(1, j)
108 | 
109 |         loc = self.location_j(j=j)
110 |         scale = self.scale_j(j=j)
111 |         resid = loc - self.model.x[:, j]
112 |         ll = -0.5 * np.log(2 * math.pi) - np.log(scale) - 0.5 * np.power(resid / scale, 2)
113 |         return ll
114 | 
115 |     def ll_handle(self) -> Callable:
116 |         def fun(x, eta_loc, theta_scale, xh_scale):
117 |             eta_scale = np.matmul(xh_scale, theta_scale)
118 |             scale = self.model.inverse_link_scale(eta_scale)
119 |             loc = self.model.inverse_link_loc(eta_loc)
120 |             return ll(scale, loc, x)
121 | 
122 |         return fun
123 | 


--------------------------------------------------------------------------------
/batchglm/train/numpy/glm_norm/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theislab/batchglm/b893fd0ce020669ff38583e4ec135b10926093ae/batchglm/train/numpy/glm_norm/utils.py


--------------------------------------------------------------------------------
/batchglm/train/numpy/glm_poisson/__init__.py:
--------------------------------------------------------------------------------
1 | from .estimator import Estimator
2 | from .model_container import ModelContainer
3 | 


--------------------------------------------------------------------------------
/batchglm/train/numpy/glm_poisson/estimator.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | from typing import Optional, Tuple, Union
 3 | 
 4 | import numpy as np
 5 | 
 6 | from .external import EstimatorGlm, Model, init_par
 7 | from .model_container import ModelContainer
 8 | 
 9 | 
10 | class Estimator(EstimatorGlm):
11 |     """
12 |     Estimator for Generalized Linear Models (GLMs) with negative binomial noise.
13 |     Uses the natural logarithm as linker function.
14 | 
15 |     Attributes
16 |     ----------
17 |     model_vars : ModelVars
18 |         model variables
19 |     """
20 | 
21 |     def __init__(
22 |         self,
23 |         model: Model,
24 |         init_location: str = "AUTO",
25 |         init_scale: str = "AUTO",
26 |         # batch_size: Optional[Union[Tuple[int, int], int]] = None,
27 |         quick_scale: bool = False,
28 |         dtype: str = "float64",
29 |     ):
30 |         """
31 |         Performs initialisation and creates a new estimator.
32 | 
33 |         :param init_location: (Optional)
34 |             Low-level initial values for a. Can be:
35 | 
36 |             - str:
37 |                 * "auto": automatically choose best initialization
38 |                 * "standard": initialize intercept with observed mean
39 |                 * "init_model": initialize with another model (see `ìnit_model` parameter)
40 |                 * "closed_form": try to initialize with closed form
41 |             - np.ndarray: direct initialization of 'a'
42 |         :param dtype: Numerical precision.
43 |         """
44 |         init_theta_location, _, train_loc, _ = init_par(model=model, init_location=init_location)
45 |         self._train_loc = train_loc
46 |         # no need to train the scale parameter for the poisson model since it only has one parameter
47 |         self._train_scale = False
48 |         sys.stdout.write("training location model: %s\n" % str(self._train_loc))
49 |         init_theta_location = init_theta_location.astype(dtype)
50 | 
51 |         _model_container = ModelContainer(
52 |             model=model,
53 |             init_theta_location=init_theta_location,
54 |             init_theta_scale=init_theta_location,  # Not used.
55 |             chunk_size_genes=model.chunk_size_genes,
56 |             dtype=dtype,
57 |         )
58 |         super(Estimator, self).__init__(model_container=_model_container, dtype=dtype)
59 | 


--------------------------------------------------------------------------------
/batchglm/train/numpy/glm_poisson/exceptions.py:
--------------------------------------------------------------------------------
1 | class NoScaleError(Exception):
2 |     """
3 |     Exception raised for attempting to access the scale parameter (or one of its derived methods) of a poisson model.
4 |     """
5 | 
6 |     def __init__(self, method: str):
7 |         self.message = f"Attempted to access {method}. No scale parameter is fit for poisson - please use location."
8 |         super().__init__(self.message)
9 | 


--------------------------------------------------------------------------------
/batchglm/train/numpy/glm_poisson/external.py:
--------------------------------------------------------------------------------
 1 | import batchglm.utils.data as data_utils
 2 | from batchglm import pkg_constants
 3 | from batchglm.models.base_glm.utils import closedform_glm_mean, closedform_glm_scale
 4 | from batchglm.models.glm_poisson.model import Model
 5 | from batchglm.models.glm_poisson.utils import init_par
 6 | 
 7 | # import necessary base_glm layers
 8 | from batchglm.train.numpy.base_glm import EstimatorGlm, NumpyModelContainer
 9 | from batchglm.utils.linalg import groupwise_solve_lm
10 | 


--------------------------------------------------------------------------------
/batchglm/train/numpy/glm_poisson/model_container.py:
--------------------------------------------------------------------------------
  1 | from typing import Union
  2 | 
  3 | import dask
  4 | import numpy as np
  5 | import scipy
  6 | 
  7 | from .exceptions import NoScaleError
  8 | from .external import NumpyModelContainer
  9 | 
 10 | 
 11 | class ModelContainer(NumpyModelContainer):
 12 |     @property
 13 |     def fim_weight_location_location(self) -> Union[np.ndarray, dask.array.core.Array]:
 14 |         """
 15 |         Fisher inverse matrix weights
 16 |         :return: observations x features
 17 |         """
 18 |         return -self.hessian_weight_location_location
 19 | 
 20 |     @property
 21 |     def ybar(self) -> Union[np.ndarray, dask.array.core.Array]:
 22 |         """
 23 |         :return: observations x features
 24 |         """
 25 |         return np.asarray(self.x - self.location) / self.location
 26 | 
 27 |     def fim_weight_location_location_j(self, j: Union[int, np.ndarray]) -> Union[np.ndarray, dask.array.core.Array]:
 28 |         """
 29 |         Fisher inverse matrix weights at j
 30 |         :return: observations x features
 31 |         """
 32 |         return self.location_j(j=j)
 33 | 
 34 |     def ybar_j(self, j: Union[int, np.ndarray]) -> Union[np.ndarray, dask.array.core.Array]:
 35 |         """
 36 |         :return: observations x features
 37 |         """
 38 |         # Make sure that dimensionality of sliced array is kept:
 39 |         if isinstance(j, int) or isinstance(j, np.int32) or isinstance(j, np.int64):
 40 |             j = np.full(1, j)
 41 |         if isinstance(self.x, np.ndarray) or isinstance(self.x, dask.array.core.Array):
 42 |             return (self.x[:, j] - self.location_j(j=j)) / self.location_j(j=j)
 43 |         else:
 44 |             return np.asarray(self.x[:, j] - self.location_j(j=j)) / self.location_j(j=j)
 45 | 
 46 |     @property
 47 |     def hessian_weight_location_location(self) -> np.ndarray:
 48 |         """location-location block of the hessian matrix"""
 49 |         return -self.location
 50 | 
 51 |     @property
 52 |     def ll(self) -> Union[np.ndarray, dask.array.core.Array]:
 53 |         """log-likelihood"""
 54 |         loc = self.location
 55 |         log_loc = np.log(loc)
 56 |         x_times_log_loc = self.x * log_loc
 57 |         log_x_factorial = np.log(scipy.special.gammaln(self.x + np.ones_like(self.x)))
 58 |         ll = x_times_log_loc - loc - log_x_factorial
 59 |         return np.asarray(self.np_clip_param(ll, "ll"))
 60 | 
 61 |     def ll_j(self, j: Union[int, np.ndarray]) -> Union[np.ndarray, dask.array.core.Array]:
 62 |         """
 63 |         Log likelhiood for observation j
 64 |         :param j: observation
 65 |         """
 66 |         # Make sure that dimensionality of sliced array is kept:
 67 |         if isinstance(j, int) or isinstance(j, np.int32) or isinstance(j, np.int64):
 68 |             j = np.full(1, j)
 69 |         loc_j = self.location_j(j=j)
 70 |         log_loc = np.log(loc_j)
 71 |         x_times_log_loc = self.x[:, j] * log_loc
 72 |         log_x_factorial = np.log(scipy.special.gammaln(self.x[:, j] + np.ones_like(self.x[:, j])))
 73 |         ll = x_times_log_loc - loc_j - log_x_factorial
 74 |         return np.asarray(self.np_clip_param(ll, "ll"))
 75 | 
 76 |     @property
 77 |     def hessian(self) -> Union[np.ndarray, dask.array.core.Array]:
 78 |         return self.hessian_location_location
 79 | 
 80 |     @property
 81 |     def fim_weight(self):
 82 |         raise NotImplementedError("This method is currently unimplemented as it isn't used by any built-in procedures.")
 83 | 
 84 |     @property
 85 |     def jac_weight(self):
 86 |         raise NotImplementedError("This method is currently unimplemented as it isn't used by any built-in procedures.")
 87 | 
 88 |     def jac_weight_j(self, j: Union[int, np.ndarray]):
 89 |         raise NotImplementedError("This method is currently unimplemented as it isn't used by any built-in procedures.")
 90 | 
 91 |     # Methods marked as abstract that involve the scale parameter:
 92 |     @property
 93 |     def fim_location_scale(self):
 94 |         raise NoScaleError("fim_location_scale")
 95 | 
 96 |     @property
 97 |     def hessian_weight_scale_scale(self):
 98 |         raise NoScaleError("hessian_weight_scale_scale")
 99 | 
100 |     @property
101 |     def hessian_weight_location_scale(self):
102 |         raise NoScaleError("hessian_weight_location_scale")
103 | 
104 |     def jac_weight_scale_j(self, j: Union[int, np.ndarray]):
105 |         raise NoScaleError("jac_weight_scale_j")
106 | 
107 |     @property
108 |     def fim(self) -> Union[np.ndarray, dask.array.core.Array]:
109 |         return self.fim_location_location
110 | 
111 |     @property
112 |     def jac(self) -> Union[np.ndarray, dask.array.core.Array]:
113 |         return self.jac_location
114 | 


--------------------------------------------------------------------------------
/batchglm/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from . import data, input, plotting
2 | 


--------------------------------------------------------------------------------
/batchglm/utils/linalg.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | from typing import Callable, Union
  3 | 
  4 | import dask.array
  5 | import numpy as np
  6 | 
  7 | logger = logging.getLogger("batchglm")
  8 | 
  9 | 
 10 | def stacked_lstsq(L: Union[np.ndarray, dask.array.core.Array], b: np.ndarray, rcond: float = 1e-10):
 11 |     r"""
 12 |     Solve `Lx = b`, via SVD least squares cutting of small singular values
 13 | 
 14 |     :param L: tensor of shape (..., M, K)
 15 |     :param b: tensor of shape (..., M, N).
 16 |     :param rcond: threshold for inverse
 17 |     :param name: name scope of this op
 18 |     :return: x of shape (..., K, N)
 19 |     """
 20 |     u, s, v = np.linalg.svd(L, full_matrices=False)
 21 |     s_max = s.max(axis=-1, keepdims=True)
 22 |     s_min = rcond * s_max
 23 | 
 24 |     inv_s = np.reciprocal(s, out=np.zeros_like(s), where=s >= s_min)
 25 | 
 26 |     x = np.einsum("...MK,...MN->...KN", v, np.einsum("...K,...MK,...MN->...KN", inv_s, u, b))
 27 | 
 28 |     # rank = np.sum(s > rcond)
 29 | 
 30 |     return np.conj(x, out=x)
 31 | 
 32 | 
 33 | def groupwise_solve_lm(
 34 |     dmat: Union[np.ndarray, dask.array.core.Array],
 35 |     apply_fun: Callable,
 36 |     constraints: Union[np.ndarray, dask.array.core.Array],
 37 | ):
 38 |     r"""
 39 |     Solve GLMs by estimating the distribution parameters of each unique group of observations independently and
 40 |     solving then for the design matrix `dmat`.
 41 | 
 42 |     Idea:
 43 |     $$
 44 |         \theta &= f(x) \\
 45 |         \Rightarrow f^{-1}(\theta) &= x \\
 46 |             &= (D \cdot D^{+}) \cdot x \\
 47 |             &= D \cdot (D^{+} \cdot x) \\
 48 |             &= D \cdot x' = f^{-1}(\theta)
 49 |     $$
 50 | 
 51 |     :param dmat: design matrix which should be solved for
 52 |     :param apply_fun: some callable function taking one xr.DataArray argument.
 53 |         Should compute a group-wise parameter solution.
 54 | 
 55 |         Example method calculating group-wise means:
 56 |         ::
 57 |             def apply_fun(grouping):
 58 |                 groupwise_means = data.groupby(grouping).mean(dim="observations").values
 59 | 
 60 |                 return np.log(groupwise_means)
 61 | 
 62 |         The `data` argument provided to `apply_fun` is the same xr.DataArray provided to this
 63 |     :param constraints: tensor (all parameters x dependent parameters)
 64 |         Tensor that encodes how complete parameter set which includes dependent
 65 |         parameters arises from indepedent parameters: all = <constraints, indep>.
 66 |         This form of constraints is used in vector generalized linear models (VGLMs).
 67 | 
 68 |     :return: tuple of (apply_fun(grouping), x_prime, rmsd, rank, s) where x_prime is the parameter matrix solved for
 69 |         `dmat`.
 70 |     """
 71 |     # Get unqiue rows of design matrix and vector with group assignments:
 72 |     if isinstance(dmat, dask.array.core.Array):  # axis argument not supported by dask in .unique()
 73 |         unique_design, inverse_idx = np.unique(dmat.compute(), axis=0, return_inverse=True)
 74 |         unique_design = dask.array.from_array(unique_design, chunks=unique_design.shape)
 75 |     else:
 76 |         unique_design, inverse_idx = np.unique(dmat, axis=0, return_inverse=True)
 77 |     if unique_design.shape[0] > 500:
 78 |         raise ValueError("large least-square problem in init, likely defined a numeric predictor as categorical")
 79 | 
 80 |     full_rank = constraints.shape[1]
 81 |     unique_constrained_dmat = np.matmul(unique_design, constraints)
 82 |     if isinstance(unique_constrained_dmat, dask.array.core.Array):  # matrix_rank not supported by dask
 83 |         rank = np.linalg.matrix_rank(unique_constrained_dmat.compute())
 84 |     else:
 85 |         rank = np.linalg.matrix_rank(unique_constrained_dmat)
 86 |     if full_rank > rank:
 87 |         logger.error("model is not full rank!")
 88 | 
 89 |     # Get group-wise means in linker space based on group assignments
 90 |     # based on unique rows of design matrix:
 91 |     params = apply_fun(inverse_idx)
 92 | 
 93 |     # Use least-squares solver to compute model parameterization
 94 |     # accounting for dependent parameters, ie. degrees of freedom
 95 |     # of the model which appear as groups in the design matrix
 96 |     # and are not accounted for by parameters but which are
 97 |     # accounted for by constraints:
 98 |     # <X, <theta, H> = means -> <X, theta>, H> = means -> lstsqs for theta
 99 |     # (This is faster and more accurate than using matrix inversion.)
100 |     logger.debug(" ** Solve lstsq problem")
101 |     if np.any(np.isnan(params)):
102 |         raise Warning("entries of params were nan which will throw error in lstsq")
103 |     x_prime, rmsd, rank, s = np.linalg.lstsq(unique_constrained_dmat, params)
104 | 
105 |     return params, x_prime, rmsd, rank, s
106 | 


--------------------------------------------------------------------------------
/batchglm/utils/plotting.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | from typing import Optional, Tuple, Union
  3 | 
  4 | import dask.array
  5 | import matplotlib.pyplot as plt
  6 | import numpy as np
  7 | import pandas as pd
  8 | import seaborn as sns
  9 | from matplotlib import gridspec, rcParams
 10 | from matplotlib.axes import Axes
 11 | 
 12 | logger = logging.getLogger(__name__)
 13 | 
 14 | 
 15 | def _input_checks(
 16 |     true_values: Union[np.ndarray, dask.array.core.Array], pred_values: Union[np.ndarray, dask.array.core.Array]
 17 | ):
 18 |     """
 19 |     Check the type of true and predicted input and make sure they have the same size.
 20 | 
 21 |     :param true_values: The reference parameters.
 22 |     :param pred_values: The fitted parameters.
 23 |     """
 24 | 
 25 |     def _cast(data: Union[np.ndarray, dask.array.core.Array]) -> Tuple[np.ndarray, np.ndarray]:
 26 |         if isinstance(data, dask.array.core.Array):
 27 |             to_return = data.compute()
 28 |         elif isinstance(data, np.ndarray):
 29 |             to_return = data
 30 |         else:
 31 |             raise TypeError(f"Type {type(data)} is not recognized for true/pred values.")
 32 |         return to_return
 33 | 
 34 |     true_vals = _cast(true_values)
 35 |     pred_vals = _cast(pred_values)
 36 | 
 37 |     assert len(true_values.shape) == len(pred_values.shape), "true_values must have same dimensions as pred_values"
 38 |     assert np.all(true_values.shape == pred_values.shape), "true_values must have same dimensions as pred_values"
 39 | 
 40 |     return true_vals, pred_vals
 41 | 
 42 | 
 43 | def plot_coef_vs_ref(
 44 |     true_values: Union[np.ndarray, dask.array.core.Array],
 45 |     pred_values: Union[np.ndarray, dask.array.core.Array],
 46 |     size=1,
 47 |     log=False,
 48 |     save=None,
 49 |     show=True,
 50 |     ncols=5,
 51 |     row_gap=0.3,
 52 |     col_gap=0.25,
 53 |     title: str = "",
 54 |     return_axs: bool = False,
 55 | ) -> Optional[Axes]:
 56 |     """
 57 |     Plot estimated coefficients against reference (true) coefficients for location model.
 58 | 
 59 |     :param true_values:
 60 |     :param size: Point size.
 61 |     :param save: Path+file name stem to save plots to.
 62 |         File will be save+"_genes.png". Does not save if save is None.
 63 |     :param show: Whether to display plot.
 64 |     :param ncols: Number of columns in plot grid if multiple genes are plotted.
 65 |     :param row_gap: Vertical gap between panel rows relative to panel height.
 66 |     :param col_gap: Horizontal gap between panel columns relative to panel width.
 67 |     :param title: Plot title.
 68 |     :param return_axs: Whether to return axis objects.
 69 |     :return: Matplotlib axis objects.
 70 |     """
 71 |     true_values, pred_values = _input_checks(true_values, pred_values)
 72 | 
 73 |     plt.ioff()
 74 | 
 75 |     n_par = true_values.shape[0]
 76 |     ncols = ncols if n_par > ncols else n_par
 77 |     nrows = n_par // ncols + (n_par - (n_par // ncols) * ncols)
 78 | 
 79 |     gs = gridspec.GridSpec(nrows=nrows, ncols=ncols, hspace=row_gap, wspace=col_gap)
 80 | 
 81 |     fig = plt.figure(
 82 |         figsize=(
 83 |             ncols * rcParams["figure.figsize"][0],  # width in inches
 84 |             nrows * rcParams["figure.figsize"][1] * (1 + row_gap),  # height in inches
 85 |         )
 86 |     )
 87 | 
 88 |     if title is None:
 89 |         title = "parameter"
 90 | 
 91 |     # Build axis objects in loop.
 92 |     axs = []
 93 |     for i in range(n_par):
 94 |         ax = plt.subplot(gs[i])
 95 |         axs.append(ax)
 96 | 
 97 |         x = true_values[i, :]
 98 |         y = pred_values[i, :]
 99 |         if log:
100 |             x = np.log(x + 1)
101 |             y = np.log(y + 1)
102 | 
103 |         sns.scatterplot(x=x, y=y, size=size, ax=ax, legend=False)
104 |         sns.lineplot(
105 |             x=np.array([np.min([np.min(x), np.min(y)]), np.max([np.max(x), np.max(y)])]),
106 |             y=np.array([np.min([np.min(x), np.min(y)]), np.max([np.max(x), np.max(y)])]),
107 |             ax=ax,
108 |         )
109 | 
110 |         title_i = title + "_" + str(i)
111 |         # Add correlation into title:
112 |         title_i = title_i + " (R=" + str(np.round(np.corrcoef(x, y)[0, 1], 3)) + ")"
113 |         ax.set_title(title_i)
114 |         ax.set_xlabel("true parameter")
115 |         ax.set_ylabel("estimated parameter")
116 | 
117 |     # Save, show and return figure.
118 |     if save is not None:
119 |         plt.savefig(save + "_parameter_scatter.png")
120 | 
121 |     if show:
122 |         plt.show()
123 | 
124 |     plt.close(fig)
125 |     plt.ion()
126 | 
127 |     if return_axs:
128 |         return axs
129 |     return None
130 | 
131 | 
132 | def plot_deviation(
133 |     true_values: np.ndarray, pred_values: np.ndarray, save=None, show=True, return_axs=False, title: str = ""
134 | ) -> Optional[Axes]:
135 |     """
136 |     Plot deviation of estimated coefficients from reference (true) coefficients
137 |     as violin plot for location model.
138 | 
139 |     :param true_values:
140 |     :param pred_values:
141 |     :param save: Path+file name stem to save plots to.
142 |         File will be save+"_genes.png". Does not save if save is None.
143 |     :param show: Whether to display plot.
144 |     :param return_axs: Whether to return axis objects.
145 |     :param title: Title.
146 |     :return: Matplotlib axis objects.
147 |     """
148 |     true_values, pred_values = _input_checks(true_values, pred_values)
149 | 
150 |     plt.ioff()
151 | 
152 |     n_par = true_values.shape[0]
153 |     summary_fit = pd.concat(
154 |         [
155 |             pd.DataFrame(
156 |                 {
157 |                     "deviation": pred_values[i, :] - true_values[i, :],
158 |                     "coefficient": pd.Series(["coef_" + str(i) for x in range(pred_values.shape[1])], dtype="category"),
159 |                 }
160 |             )
161 |             for i in range(n_par)
162 |         ]
163 |     )
164 |     summary_fit["coefficient"] = summary_fit["coefficient"].astype("category")
165 | 
166 |     fig, ax = plt.subplots()
167 |     sns.violinplot(x=summary_fit["coefficient"], y=summary_fit["deviation"], ax=ax)
168 | 
169 |     if title is not None:
170 |         ax.set_title(title)
171 | 
172 |     # Save, show and return figure.
173 |     if save is not None:
174 |         plt.savefig(save + "_deviation_violin.png")
175 | 
176 |     if show:
177 |         plt.show()
178 | 
179 |     plt.close(fig)
180 |     plt.ion()
181 | 
182 |     if return_axs:
183 |         return ax
184 |     return None
185 | 


--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
 1 | comment: false
 2 | coverage:
 3 |     status:
 4 |         project:
 5 |             default:
 6 |                 target: auto
 7 |         patch:
 8 |             default:
 9 |                 target: auto
10 | 


--------------------------------------------------------------------------------
/cookietemple.cfg:
--------------------------------------------------------------------------------
 1 | [bumpversion]
 2 | current_version = 0.7.4
 3 | 
 4 | [bumpversion_files_whitelisted]
 5 | init_file = batchglm/__init__.py
 6 | dot_cookietemple = .cookietemple.yml
 7 | conf_py = docs/conf.py
 8 | main_file = batchglm/__main__.py
 9 | 
10 | [bumpversion_files_blacklisted]
11 | poetry = pyproject.toml
12 | release_drafter_config = .github/release-drafter.yml
13 | 
14 | [sync]
15 | sync_enabled = True
16 | 
17 | [sync_level]
18 | ct_sync_level = minor
19 | 
20 | [sync_files_blacklisted]
21 | changelog = CHANGELOG.rst
22 | poetry_lock = poetry.lock
23 | poetry = pyproject.toml
24 | tests = tests/**/*
25 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = python -msphinx
 7 | SPHINXPROJ    = batchglm
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/_static/css/custom.css:
--------------------------------------------------------------------------------
 1 | .small {
 2 |     font-size: 40%;
 3 | }
 4 | 
 5 | .rst-content dl:not(.docutils) dl dt {
 6 |     /* mimick numpydoc’s blockquote style */
 7 |     font-weight: normal;
 8 |     background: none transparent;
 9 |     border-left: none;
10 |     margin: 0 0 12px;
11 |     padding: 3px 0 0;
12 |     font-size: 100%;
13 | }
14 | 
15 | .rst-content dl:not(.docutils) dl dt code {
16 |     font-size: 100%;
17 |     font-weight: normal;
18 |     background: none transparent;
19 |     border: none;
20 |     padding: 0 2px;
21 | }
22 | 
23 | .rst-content dl:not(.docutils) dl dt a.reference > code {
24 |     text-decoration: underline;
25 | }
26 | 


--------------------------------------------------------------------------------
/docs/_static/custom_cookietemple.css:
--------------------------------------------------------------------------------
1 | @import "basic.css";
2 | 
3 | /*Set max width to none so the theme uses all available width*/
4 | .wy-nav-content {
5 |     max-width: none;
6 | }
7 | 


--------------------------------------------------------------------------------
/docs/api/.gitignore:
--------------------------------------------------------------------------------
1 | batchglm.*
2 | 


--------------------------------------------------------------------------------
/docs/api/index.rst:
--------------------------------------------------------------------------------
 1 | .. automodule:: batchglm
 2 | 
 3 | API
 4 | ===
 5 | 
 6 | 
 7 | Import batchglm's high-level API as::
 8 | 
 9 |    import batchglm.api as glm
10 | 
11 | 
12 | Fitting models
13 | -----------------------------------
14 | 
15 | All models are collected in the :mod:`train` and `model` module.
16 | Each model consists of at least:
17 | 
18 | 1) `models.glm_nb.Model` class which basicially describes the model
19 | 3) `train.xxxxx.Estimator` class which takes a `Model` object and fits the corresponding model onto it.
20 | 
21 | where `xxxxxx` is the backend desired, like `tf2`, `numpy` or `statsmodel`.
22 | 
23 | For example, here is a short snippet to give a sense of how the API might work::
24 | 
25 |    from batchglm.models.glm_nb import Model as NBModel
26 |    from batchglm.train.numpy.glm_nb import Estimator as NBEstimator
27 |    from batchglm.utils.input import InputDataGLM
28 | 
29 |    input_data = InputDataGLM(data=data_matrix, design_loc=_design_loc, design_scale=_design_scale, as_dask=as_dask)
30 |    model = NBModel(input_data=input_data)
31 |    estimator = NBEstimator(model=model, init_location="standard", init_scale="standard")
32 |    estimator.initialize()
33 |    estimator.train_sequence(training_strategy="DEFAULT")
34 |    # Now you can perform statistical tests, for example, on parameters like model.theta_location.
35 | 
36 | Currently implemented models:
37 | 
38 | Negative Binomial
39 | ~~~~~~~~~~~~~~~~~
40 | 
41 | .. autosummary::
42 |    :toctree: .
43 | 
44 |    models.glm_nb.Model
45 |    train.numpy.glm_nb.Estimator
46 | 
47 | Normal
48 | ~~~~~~~~~~~~~~~~~
49 | .. autosummary::
50 |    :toctree: .
51 | 
52 |    models.glm_norm.Model
53 |    train.numpy.glm_norm.Estimator
54 | Poisson
55 | ~~~~~~~~~~~~~~~~~
56 | .. autosummary::
57 |    :toctree: .
58 | 
59 |    models.glm_poisson.Model
60 |    train.numpy.glm_poisson.Estimator
61 | 
62 | Planned or Incomplete Models:
63 | 
64 | Beta
65 | ~~~~~~~~~~~~~~~~~
66 | 
67 | Data Utilities
68 | -----------------------------------
69 | We also provide some data utilities for working with things like design and constraint matrices.
70 | 
71 | .. autosummary::
72 |    :toctree: .
73 | 
74 |    utils.data.bin_continuous_covariate
75 |    utils.data.constraint_matrix_from_string
76 |    utils.data.constraint_system_from_star
77 |    utils.data.design_matrix
78 |    utils.data.preview_coef_names
79 |    utils.data.string_constraints_from_dict
80 |    utils.data.view_coef_names
81 |    utils.input.InputDataGLM
82 | 


--------------------------------------------------------------------------------
/docs/authors.rst:
--------------------------------------------------------------------------------
 1 | =======
 2 | Credits
 3 | =======
 4 | 
 5 | Development Lead
 6 | ----------------
 7 | 
 8 | * Mario Picciani <mario.picciani@tum.de>
 9 | 
10 | Contributors
11 | ------------
12 | 
13 | None yet. Why not be the first?
14 | 


--------------------------------------------------------------------------------
/docs/code_of_conduct.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../CODE_OF_CONDUCT.rst
2 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # mypy: ignore-errors
  3 | # batchglm documentation build configuration file
  4 | #
  5 | # If extensions (or modules to document with autodoc) are in another
  6 | # directory, add these directories to sys.path here. If the directory is
  7 | # relative to the documentation root, use os.path.abspath to make it
  8 | # absolute, like shown here.
  9 | #
 10 | import os
 11 | import sys
 12 | 
 13 | sys.path.insert(0, os.path.abspath(".."))
 14 | 
 15 | 
 16 | # -- General configuration ---------------------------------------------
 17 | 
 18 | # If your documentation needs a minimal Sphinx version, state it here.
 19 | # needs_sphinx = '1.0'
 20 | 
 21 | # Add any Sphinx extension module names here, as strings. They can be
 22 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
 23 | 
 24 | # Add 'sphinx_automodapi.automodapi' if you want to build modules
 25 | extensions = [
 26 |     "sphinx.ext.viewcode",
 27 |     "sphinx.ext.autodoc",
 28 |     "sphinx.ext.autosummary",
 29 |     "sphinx.ext.napoleon",
 30 |     "sphinx_click",
 31 |     "sphinx_rtd_dark_mode",
 32 | ]
 33 | 
 34 | default_dark_mode = True
 35 | 
 36 | # Add any paths that contain templates here, relative to this directory.
 37 | templates_path = ["_templates"]
 38 | 
 39 | # The suffix(es) of source filenames.
 40 | source_suffix = ".rst"
 41 | 
 42 | # The master toctree document.
 43 | master_doc = "index"
 44 | 
 45 | # General information about the project.
 46 | project = "batchglm"
 47 | copyright = "2022, Mario Picciani"
 48 | author = "Mario Picciani"
 49 | 
 50 | # The version info for the project you're documenting, acts as replacement
 51 | # for |version| and |release|, also used in various other places throughout
 52 | # the built documents.
 53 | #
 54 | # The short X.Y version.
 55 | version = "0.7.4"
 56 | # The full version, including alpha/beta/rc tags.
 57 | release = "0.7.4"
 58 | 
 59 | # The language for content autogenerated by Sphinx. Refer to documentation
 60 | # for a list of supported languages.
 61 | #
 62 | # This is also used if you do content translation via gettext catalogs.
 63 | # Usually you set "language" from the command line for these cases.
 64 | language = None
 65 | 
 66 | # List of patterns, relative to source directory, that match files and
 67 | # directories to ignore when looking for source files.
 68 | # This patterns also effect to html_static_path and html_extra_path
 69 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
 70 | 
 71 | # The name of the Pygments (syntax highlighting) style to use.
 72 | pygments_style = "sphinx"
 73 | 
 74 | # If true, `todo` and `todoList` produce output, else they produce nothing.
 75 | todo_include_todos = False
 76 | 
 77 | 
 78 | # -- Options for HTML output -------------------------------------------
 79 | 
 80 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 81 | # a list of builtin themes.
 82 | #
 83 | html_theme = "sphinx_rtd_theme"
 84 | 
 85 | # Theme options are theme-specific and customize the look and feel of a
 86 | # theme further.  For a list of options available for each theme, see the
 87 | # documentation.
 88 | #
 89 | # html_theme_options = {}
 90 | 
 91 | # Add any paths that contain custom static files (such as style sheets) here,
 92 | # relative to this directory. They are copied after the builtin static files,
 93 | # so a file named "default.css" will overwrite the builtin "default.css".
 94 | html_static_path = ["_static"]
 95 | 
 96 | 
 97 | # -- Options for HTMLHelp output ---------------------------------------
 98 | 
 99 | # Output file base name for HTML help builder.
100 | htmlhelp_basename = "batchglmdoc"
101 | 
102 | 
103 | # -- Options for LaTeX output ------------------------------------------
104 | 
105 | latex_elements = {
106 |     # The paper size ("letterpaper" or "a4paper").
107 |     #
108 |     # "papersize": "letterpaper",
109 |     # The font size ("10pt", "11pt" or "12pt").
110 |     #
111 |     # "pointsize": "10pt",
112 |     # Additional stuff for the LaTeX preamble.
113 |     #
114 |     # "preamble": "",
115 |     # Latex figure (float) alignment
116 |     #
117 |     # "figure_align": "htbp",
118 | }
119 | 
120 | # Grouping the document tree into LaTeX files. List of tuples
121 | # (source start file, target name, title, author, documentclass
122 | # [howto, manual, or own class]).
123 | latex_documents = [
124 |     (
125 |         master_doc,
126 |         "batchglm.tex",
127 |         "batchglm Documentation",
128 |         "Mario Picciani",
129 |         "manual",
130 |     ),
131 | ]
132 | 
133 | 
134 | # -- Options for manual page output ------------------------------------
135 | 
136 | # One entry per manual page. List of tuples
137 | # (source start file, name, description, authors, manual section).
138 | man_pages = [
139 |     (
140 |         master_doc,
141 |         "batchglm",
142 |         "batchglm Documentation",
143 |         [author],
144 |         1,
145 |     )
146 | ]
147 | 
148 | autodoc_typehints = "description"
149 | 
150 | 
151 | # -- Options for Texinfo output ----------------------------------------
152 | 
153 | # Grouping the document tree into Texinfo files. List of tuples
154 | # (source start file, target name, title, author,
155 | #  dir menu entry, description, category)
156 | texinfo_documents = [
157 |     (
158 |         master_doc,
159 |         "batchglm",
160 |         "batchglm Documentation",
161 |         author,
162 |         "batchglm",
163 |         "One line description of project.",
164 |         "Miscellaneous",
165 |     ),
166 | ]
167 | 
168 | html_css_files = [
169 |     "custom_cookietemple.css",
170 | ]
171 | 


--------------------------------------------------------------------------------
/docs/contributing.rst:
--------------------------------------------------------------------------------
  1 | Contributor Guide
  2 | =================
  3 | 
  4 | Thank you for your interest in improving this project.
  5 | This project is open-source under the `BSD license`_ and
  6 | highly welcomes contributions in the form of bug reports, feature requests, and pull requests.
  7 | 
  8 | Here is a list of important resources for contributors:
  9 | 
 10 | - `Source Code`_
 11 | - `Documentation`_
 12 | - `Issue Tracker`_
 13 | - `Code of Conduct`_
 14 | 
 15 | .. _BSD license: https://opensource.org/licenses/BSD
 16 | .. _Source Code: https://github.com/theislab/batchglm
 17 | .. _Documentation: https://batchglm.readthedocs.io/
 18 | .. _Issue Tracker: https://github.com/theislab/batchglm/issues
 19 | 
 20 | How to report a bug
 21 | -------------------
 22 | 
 23 | Report bugs on the `Issue Tracker`_.
 24 | 
 25 | 
 26 | How to request a feature
 27 | ------------------------
 28 | 
 29 | Request features on the `Issue Tracker`_.
 30 | 
 31 | 
 32 | How to set up your development environment
 33 | ------------------------------------------
 34 | 
 35 | You need Python 3.7+ and the following tools:
 36 | 
 37 | - Poetry_
 38 | - Nox_
 39 | - nox-poetry_
 40 | 
 41 | You can install them with:
 42 | 
 43 | .. code:: console
 44 | 
 45 |     $ pip install poetry nox nox-poetry
 46 | 
 47 | Install the package with development requirements:
 48 | 
 49 | .. code:: console
 50 | 
 51 |    $ make install
 52 | 
 53 | You can now run an interactive Python session,
 54 | or the command-line interface:
 55 | 
 56 | .. code:: console
 57 | 
 58 |    $ poetry run python
 59 |    $ poetry run batchglm
 60 | 
 61 | .. _Poetry: https://python-poetry.org/
 62 | .. _Nox: https://nox.thea.codes/
 63 | .. _nox-poetry: https://nox-poetry.readthedocs.io/
 64 | 
 65 | 
 66 | How to test the project
 67 | -----------------------
 68 | 
 69 | Run the full test suite:
 70 | 
 71 | .. code:: console
 72 | 
 73 |    $ nox
 74 | 
 75 | List the available Nox sessions:
 76 | 
 77 | .. code:: console
 78 | 
 79 |    $ nox --list-sessions
 80 | 
 81 | You can also run a specific Nox session.
 82 | For example, invoke the unit test suite like this:
 83 | 
 84 | .. code:: console
 85 | 
 86 |    $ nox --session=tests
 87 | 
 88 | Unit tests are located in the ``tests`` directory,
 89 | and are written using the pytest_ testing framework.
 90 | 
 91 | .. _pytest: https://pytest.readthedocs.io/
 92 | 
 93 | 
 94 | How to submit changes
 95 | ---------------------
 96 | 
 97 | Open a `pull request`_ to submit changes to this project against the ``development`` branch.
 98 | 
 99 | Your pull request needs to meet the following guidelines for acceptance:
100 | 
101 | - The Nox test suite must pass without errors and warnings.
102 | - Include unit tests. This project maintains a high code coverage.
103 | - If your changes add functionality, update the documentation accordingly.
104 | 
105 | To run linting and code formatting checks before committing your change, you can install pre-commit as a Git hook by running the following command:
106 | 
107 | .. code:: console
108 | 
109 |    $ nox --session=pre-commit -- install
110 | 
111 | It is recommended to open an issue before starting work on anything.
112 | This will allow a chance to talk it over with the owners and validate your approach.
113 | 
114 | .. _pull request: https://github.com/theislab/batchglm/pulls
115 | .. _Code of Conduct: CODE_OF_CONDUCT.rst
116 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. include:: ../README.rst
 2 | ===========================
 3 | Welcome to batchglm's documentation!
 4 | ==========================================================
 5 | 
 6 | .. toctree::
 7 |    :maxdepth: 2
 8 |    :caption: Contents:
 9 | 
10 |    readme
11 |    installation
12 |    api/index
13 |    contributing
14 |    authors
15 |    code_of_conduct
16 | 
17 | Indices and tables
18 | ==================
19 | * :ref:`genindex`
20 | * :ref:`modindex`
21 | * :ref:`search`
22 | 


--------------------------------------------------------------------------------
/docs/installation.rst:
--------------------------------------------------------------------------------
 1 | .. highlight:: shell
 2 | 
 3 | ============
 4 | Installation
 5 | ============
 6 | 
 7 | 
 8 | Stable release
 9 | --------------
10 | 
11 | To install batchglm, run this command in your terminal:
12 | 
13 | .. code-block:: console
14 | 
15 |     $ pip install batchglm
16 | 
17 | This is the preferred method to install batchglm, as it will always install the most recent stable release.
18 | 
19 | If you don't have `pip`_ installed, this `Python installation guide`_ can guide
20 | you through the process.
21 | 
22 | .. _pip: https://pip.pypa.io
23 | .. _Python installation guide: http://docs.python-guide.org/en/latest/starting/installation/
24 | 
25 | 
26 | From sources
27 | ------------
28 | 
29 | The sources for batchglm can be downloaded from the `Github repo`_.
30 | Please note that you require `poetry`_ to be installed.
31 | 
32 | You can either clone the public repository:
33 | 
34 | .. code-block:: console
35 | 
36 |     $ git clone git://github.com/theislab/batchglm
37 | 
38 | Or download the `tarball`_:
39 | 
40 | .. code-block:: console
41 | 
42 |     $ curl -OJL https://github.com/theislab/batchglm/tarball/master
43 | 
44 | Once you have a copy of the source, you can install it with:
45 | 
46 | 
47 |     $ make install
48 | 
49 | 
50 | .. _Github repo: https://github.com/theislab/batchglm
51 | .. _tarball: https://github.com/theislab/batchglm/tarball/master
52 | .. _poetry: https://python-poetry.org/
53 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=python -msphinx
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | set SPHINXPROJ=batchglm
13 | 
14 | if "%1" == "" goto help
15 | 
16 | %SPHINXBUILD% >NUL 2>NUL
17 | if errorlevel 9009 (
18 | 	echo.
19 | 	echo.The Sphinx module was not found. Make sure you have Sphinx installed,
20 | 	echo.then set the SPHINXBUILD environment variable to point to the full
21 | 	echo.path of the 'sphinx-build' executable. Alternatively you may add the
22 | 	echo.Sphinx directory to PATH.
23 | 	echo.
24 | 	echo.If you don't have Sphinx installed, grab it from
25 | 	echo.http://sphinx-doc.org/
26 | 	exit /b 1
27 | )
28 | 
29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
30 | goto end
31 | 
32 | :help
33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
34 | 
35 | :end
36 | popd
37 | 


--------------------------------------------------------------------------------
/docs/readme.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../README.rst
2 | 


--------------------------------------------------------------------------------
/docs/reference.rst:
--------------------------------------------------------------------------------
 1 | Reference
 2 | =========
 3 | 
 4 | .. contents::
 5 |     :local:
 6 |     :backlinks: none
 7 | 
 8 | 
 9 | .. automodule:: batchglm.__main__
10 |    :members:


--------------------------------------------------------------------------------
/docs/references.rst:
--------------------------------------------------------------------------------
1 | References
2 | ==========
3 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | Sphinx>=4.0.1
2 | sphinx_rtd_theme>=0.5.2
3 | sphinx-rtd-dark-mode>=1.2.1
4 | sphinx-automodapi>=0.13
5 | sphinx_click>=3.0.0
6 | click>=8.0.1
7 | 


--------------------------------------------------------------------------------
/docs/tutorials.rst:
--------------------------------------------------------------------------------
 1 | Tutorials
 2 | =========
 3 | 
 4 | 
 5 | 
 6 | Fitting distributions
 7 | ---------------------
 8 | 
 9 | 
10 | How to fit a `GLM <https://github.com/theislab/batchglm/tutorials/nb_glm.ipynb>`__.
11 | 
12 | 
13 | 
14 | Other
15 | -----
16 | 
17 | Linear regression example using Tensorflow `GLM <https://github.com/theislab/batchglm/tutorials/linear_regression.py>`__.
18 | 
19 | 
20 | 


--------------------------------------------------------------------------------
/docs/usage.rst:
--------------------------------------------------------------------------------
1 | Usage
2 | =====
3 | 
4 | .. click:: batchglm.__main__:main
5 |    :prog: batchglm
6 |    :nested: full


--------------------------------------------------------------------------------
/makefiles/Linux.mk:
--------------------------------------------------------------------------------
 1 | .PHONY: clean clean-test clean-pyc clean-build docs help
 2 | .DEFAULT_GOAL := help
 3 | 
 4 | define BROWSER_PYSCRIPT
 5 | import os, webbrowser, sys
 6 | 
 7 | from urllib.request import pathname2url
 8 | 
 9 | webbrowser.open("file://" + pathname2url(os.path.abspath(sys.argv[1])))
10 | endef
11 | export BROWSER_PYSCRIPT
12 | 
13 | define PRINT_HELP_PYSCRIPT
14 | import re, sys
15 | 
16 | for line in sys.stdin:
17 | 	match = re.match(r'^([a-zA-Z_-]+):.*?## (.*)$$', line)
18 | 	if match:
19 | 		target, help = match.groups()
20 | 		print("%-20s %s" % (target, help))
21 | endef
22 | export PRINT_HELP_PYSCRIPT
23 | 
24 | BROWSER := python -c "$$BROWSER_PYSCRIPT"
25 | 
26 | help:
27 | 	@python -c "$$PRINT_HELP_PYSCRIPT" < $(MAKEFILE_LIST)
28 | 
29 | clean: clean-build clean-pyc clean-test ## remove all build, test, coverage and Python artifacts
30 | 
31 | clean-build: ## remove build artifacts
32 | 	rm -fr build/
33 | 	rm -fr dist/
34 | 	rm -fr .eggs/
35 | 	find . -name '*.egg-info' -exec rm -fr {} +
36 | 	find . -name '*.egg' -exec rm -f {} +
37 | 
38 | clean-pyc: ## remove Python file artifacts
39 | 	find . -name '*.pyc' -exec rm -f {} +
40 | 	find . -name '*.pyo' -exec rm -f {} +
41 | 	find . -name '*~' -exec rm -f {} +
42 | 	find . -name '__pycache__' -exec rm -fr {} +
43 | 
44 | clean-test: ## remove test and coverage artifacts
45 | 	rm -fr .tox/
46 | 	rm -f .coverage
47 | 	rm -fr htmlcov/
48 | 	rm -fr .pytest_cache
49 | 
50 | lint: ## check style with flake8
51 | 	flake8 batchglm tests
52 | 
53 | test: ## run tests quickly with the default Python
54 | 	pytest
55 | 
56 | test-all: ## run tests on every Python version with tox
57 | 	nox
58 | 
59 | coverage: ## check code coverage quickly with the default Python
60 | 	coverage run --source batchglm -m pytest
61 | 	coverage report -m
62 | 	coverage html
63 | 	$(BROWSER) htmlcov/index.html
64 | 
65 | docs: ## generate Sphinx HTML documentation, including API docs
66 | 	rm -f docs/batchglm.rst
67 | 	rm -f docs/modules.rst
68 | 	sphinx-apidoc -o docs/ batchglm
69 | 	$(MAKE) -C docs clean
70 | 	$(MAKE) -C docs html
71 | 	$(BROWSER) docs/_build/html/index.html
72 | 
73 | servedocs: docs ## compile the docs watching for changes
74 | 	watchmedo shell-command -p '*.rst' -c '$(MAKE) -C docs html' -R -D .
75 | 
76 | release: dist ## package and upload a release
77 | 	poetry release
78 | 
79 | dist: clean-build clean-pyc ## builds source and wheel package
80 | 	poetry build
81 | 
82 | install: clean-build clean-pyc ## install the package to the active Python's site-packages
83 | 	poetry install
84 | 


--------------------------------------------------------------------------------
/makefiles/Windows.mk:
--------------------------------------------------------------------------------
 1 | .PHONY: clean clean-test clean-pyc clean-build docs help
 2 | .DEFAULT_GOAL := help
 3 | 
 4 | define BROWSER_PYSCRIPT
 5 | import os, webbrowser, sys
 6 | 
 7 | from urllib.request import pathname2url
 8 | 
 9 | webbrowser.open("file://" + pathname2url(os.path.abspath(sys.argv[1])))
10 | endef
11 | export BROWSER_PYSCRIPT
12 | 
13 | define PRINT_HELP_PYSCRIPT
14 | import re, sys
15 | 
16 | for line in sys.stdin:
17 | 	match = re.match(r'^([a-zA-Z_-]+):.*?## (.*)$$', line)
18 | 	if match:
19 | 		target, help = match.groups()
20 | 		print("%-20s %s" % (target, help))
21 | endef
22 | export PRINT_HELP_PYSCRIPT
23 | 
24 | BROWSER := python -c "$$BROWSER_PYSCRIPT"
25 | 
26 | help:
27 | 	@python -c "$$PRINT_HELP_PYSCRIPT" < $(MAKEFILE_LIST)
28 | 
29 | clean: clean-build clean-pyc clean-test ## remove all build, test, coverage and Python artifacts
30 | 
31 | clean-build: ## remove build artifacts
32 | 	if exist build rd /s /q build
33 | 	if exist build rd /s /q dist
34 | 	if exist build rd /s /q .eggs
35 | 	for /d /r . %%d in (*.egg-info) do @if exist "%%d" echo "%%d" && rd /s/q "%%d"
36 | 	del /q /s /f .\*.egg
37 | 
38 | 
39 | clean-pyc: ## remove Python file artifacts
40 | 	del /s /f /q .\*.pyc
41 | 	del /s /f /q .\*.pyo
42 | 	del /s /f /q .\*~
43 | 	for /d /r . %%d in (*__pycache__) do @if exist "%%d" echo "%%d" && rd /s/q "%%d"
44 | 
45 | clean-test: ## remove test and coverage artifacts
46 | 	if exist .tox rd /s /q .tox
47 | 	if exist .coverage rd /s /q .coverage
48 | 	if exist htmlcov rd /s /q htmlcov
49 | 	if exist .pytest_cache rd /s /q .pytest_cache
50 | 
51 | lint: ## check style with flake8
52 | 	flake8 batchglm tests
53 | 
54 | test: ## run tests quickly with the default Python
55 | 	pytest
56 | 
57 | test-all: ## run tests on every Python version with tox
58 | 	tox
59 | 
60 | coverage: ## check code coverage quickly with the default Python
61 | 	coverage run --source batchglm -m pytest
62 | 	coverage report -m
63 | 	coverage html
64 | 	$(BROWSER) htmlcov\index.html
65 | 
66 | docs: ## generate Sphinx HTML documentation, including API docs
67 | 	del /f /q docs\batchglm.rst
68 | 	del /f /q docs\modules.rst
69 | 	sphinx-apidoc -o docs batchglm
70 | 	$(MAKE) -C docs clean
71 | 	$(MAKE) -C docs html
72 | 	$(BROWSER) docs\_build\html\index.html
73 | 
74 | servedocs: docs ## compile the docs watching for changes
75 | 	watchmedo shell-command -p '*.rst' -c '$(MAKE) -C docs html' -R -D .
76 | 
77 | release: dist ## package and upload a release
78 | 	poetry release
79 | 
80 | dist: clean-build clean-pyc ## builds source and wheel package
81 | 	poetry build
82 | 
83 | install: clean-build clean-pyc ## install the package to the active Python's site-packages
84 | 	poetry install
85 | 


--------------------------------------------------------------------------------
/noxfile.py:
--------------------------------------------------------------------------------
  1 | """Nox sessions."""
  2 | import shutil
  3 | import sys
  4 | from pathlib import Path
  5 | from textwrap import dedent
  6 | 
  7 | import nox
  8 | from rich import print
  9 | 
 10 | try:
 11 |     from nox_poetry import Session, session
 12 | except ImportError:
 13 |     print("[bold red]Did not find nox-poetry installed in your current environment!")
 14 |     print("[bold blue]Try installing it using [bold green]pip install nox-poetry [bold blue]! ")
 15 |     sys.exit(1)
 16 | 
 17 | package = "batchglm"
 18 | python_versions = ["3.8", "3.9"]
 19 | nox.options.sessions = (
 20 |     "pre-commit",
 21 |     "safety",
 22 |     "mypy",
 23 |     "tests",
 24 |     "xdoctest",
 25 |     "docs-build",
 26 | )
 27 | 
 28 | 
 29 | def activate_virtualenv_in_precommit_hooks(session: Session) -> None:
 30 |     """Activate virtualenv in hooks installed by pre-commit.
 31 | 
 32 |     This function patches git hooks installed by pre-commit to activate the
 33 |     session's virtual environment. This allows pre-commit to locate hooks in
 34 |     that environment when invoked from git.
 35 | 
 36 |     Args:
 37 |         session: The Session object.
 38 |     """
 39 |     if session.bin is None:
 40 |         return
 41 | 
 42 |     virtualenv = session.env.get("VIRTUAL_ENV")
 43 |     if virtualenv is None:
 44 |         return
 45 | 
 46 |     hookdir = Path(".git") / "hooks"
 47 |     if not hookdir.is_dir():
 48 |         return
 49 | 
 50 |     for hook in hookdir.iterdir():
 51 |         if hook.name.endswith(".sample") or not hook.is_file():
 52 |             continue
 53 | 
 54 |         text = hook.read_text()
 55 |         bindir = repr(session.bin)[1:-1]  # strip quotes
 56 |         if not (Path("A") == Path("a") and bindir.lower() in text.lower() or bindir in text):
 57 |             continue
 58 | 
 59 |         lines = text.splitlines()
 60 |         if not (lines[0].startswith("#!") and "python" in lines[0].lower()):
 61 |             continue
 62 | 
 63 |         header = dedent(
 64 |             f"""\
 65 |             import os
 66 |             os.environ["VIRTUAL_ENV"] = {virtualenv!r}
 67 |             os.environ["PATH"] = os.pathsep.join((
 68 |                 {session.bin!r},
 69 |                 os.environ.get("PATH", ""),
 70 |             ))
 71 |             """
 72 |         )
 73 | 
 74 |         lines.insert(1, header)
 75 |         hook.write_text("\n".join(lines))
 76 | 
 77 | 
 78 | @session(name="pre-commit", python=python_versions)
 79 | def precommit(session: Session) -> None:
 80 |     """Lint using pre-commit."""
 81 |     args = session.posargs or ["run", "--all-files"]
 82 |     session.install(
 83 |         "black",
 84 |         "darglint",
 85 |         "flake8",
 86 |         "flake8-bandit",
 87 |         "flake8-bugbear",
 88 |         "flake8-docstrings",
 89 |         "flake8-rst-docstrings",
 90 |         "pep8-naming",
 91 |         "pre-commit",
 92 |         "pre-commit-hooks",
 93 |         "reorder-python-imports",
 94 |     )
 95 |     session.run("pre-commit", *args)
 96 |     if args and args[0] == "install":
 97 |         activate_virtualenv_in_precommit_hooks(session)
 98 | 
 99 | 
100 | @session(python=python_versions)
101 | def safety(session: Session) -> None:
102 |     """Scan dependencies for insecure packages."""
103 |     requirements = session.poetry.export_requirements()
104 |     session.install("safety")
105 |     session.run("safety", "check", "--full-report", f"--file={requirements}")
106 | 
107 | 
108 | @session(python=python_versions)
109 | def mypy(session: Session) -> None:
110 |     """Type-check using mypy."""
111 |     args = session.posargs or ["batchglm", "tests", "docs/conf.py"]
112 |     session.install(".")
113 |     session.install("mypy", "pytest", "types-pkg-resources", "types-requests", "types-attrs")
114 |     session.run("mypy", *args)
115 | 
116 | 
117 | @session(python=python_versions)
118 | def tests(session: Session) -> None:
119 |     """Run the test suite."""
120 |     session.install(".")
121 |     session.install("coverage[toml]", "pytest", "pygments")
122 |     try:
123 |         session.run("coverage", "run", "--parallel", "-m", "pytest", *session.posargs)
124 |     finally:
125 |         if session.interactive:
126 |             session.notify("coverage")
127 | 
128 | 
129 | @session
130 | def coverage(session: Session) -> None:
131 |     """Produce the coverage report."""
132 |     # Do not use session.posargs unless this is the only session.
133 |     nsessions = len(session._runner.manifest)  # type: ignore[attr-defined]
134 |     has_args = session.posargs and nsessions == 1
135 |     args = session.posargs if has_args else ["report", "-i"]
136 | 
137 |     session.install("coverage[toml]")
138 | 
139 |     if not has_args and any(Path().glob(".coverage.*")):
140 |         session.run("coverage", "combine")
141 | 
142 |     session.run("coverage", *args)
143 | 
144 | 
145 | @session(python=python_versions)
146 | def typeguard(session: Session) -> None:
147 |     """Runtime type checking using Typeguard."""
148 |     session.install(".")
149 |     session.install("pytest", "typeguard", "pygments")
150 |     session.run("pytest", f"--typeguard-packages={package}", *session.posargs)
151 | 
152 | 
153 | @session(python=python_versions)
154 | def xdoctest(session: Session) -> None:
155 |     """Run examples with xdoctest."""
156 |     args = session.posargs or ["all"]
157 |     session.install(".")
158 |     session.install("xdoctest[colors]")
159 |     session.run("python", "-m", "xdoctest", package, *args)
160 | 
161 | 
162 | @session(name="docs-build", python=python_versions)
163 | def docs_build(session: Session) -> None:
164 |     """Build the documentation."""
165 |     args = session.posargs or ["docs", "docs/_build"]
166 |     session.install(".")
167 |     session.install("sphinx", "sphinx-click", "sphinx-rtd-theme", "sphinx-rtd-dark-mode")
168 | 
169 |     build_dir = Path("docs", "_build")
170 |     if build_dir.exists():
171 |         shutil.rmtree(build_dir)
172 | 
173 |     session.run("sphinx-build", *args)
174 | 
175 | 
176 | @session(python=python_versions)
177 | def docs(session: Session) -> None:
178 |     """Build and serve the documentation with live reloading on file changes."""
179 |     args = session.posargs or ["--open-browser", "docs", "docs/_build"]
180 |     session.install(".")
181 |     session.install("sphinx", "sphinx-autobuild", "sphinx-click", "sphinx-rtd-theme", "sphinx-rtd-dark-mode")
182 | 
183 |     build_dir = Path("docs", "_build")
184 |     if build_dir.exists():
185 |         shutil.rmtree(build_dir)
186 | 
187 |     session.run("sphinx-autobuild", *args)
188 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
  1 | [tool.poetry]
  2 | name = "batchglm"
  3 | version = "0.7.4"  # <<COOKIETEMPLE_FORCE_BUMP>>
  4 | description = "batchglm. A cookietemple based ."
  5 | authors = ["Mario Picciani <mario.picciani@tum.de>"]
  6 | license = "BSD"
  7 | readme = "README.rst"
  8 | homepage = "https://github.com/theislab/batchglm"
  9 | repository = "https://github.com/theislab/batchglm"
 10 | documentation = "https://batchglm.readthedocs.io"
 11 | packages = [
 12 |     { include = "batchglm" },
 13 | ]
 14 | classifiers = [
 15 |     "Programming Language :: Python :: 3.6",
 16 |     "Programming Language :: Python :: 3.7",
 17 |     "Programming Language :: Python :: 3.8",
 18 |     "Programming Language :: Python :: 3.9",
 19 | ]
 20 | 
 21 | 
 22 | [tool.poetry.dependencies]
 23 | python = ">=3.8.0, <3.10.0"
 24 | click = "^8.0.0"
 25 | rich = "^10.3.0"
 26 | PyYAML = "^5.4.1"
 27 | dask = "2021.4.1" # https://github.com/theislab/diffxpy/issues/194
 28 | numpy = ">=1.22.2"
 29 | patsy = "^0.5.2"
 30 | scipy = "^1.7.3"
 31 | pandas = "^1.4.0"
 32 | anndata = "^0.7.8"
 33 | sparse = "0.9.1" # https://github.com/theislab/diffxpy/issues/194
 34 | matplotlib = "^3.5.1"
 35 | sphinx-autodoc-typehints = "^1.16.0"
 36 | seaborn = "^0.11.2"
 37 | bandit = "1.7.2"
 38 | 
 39 | [tool.poetry.dev-dependencies]
 40 | pytest = "^6.2.3"
 41 | coverage = {extras = ["toml"], version = "^5.3"}
 42 | safety = "^1.9.0"
 43 | typeguard = "^2.12.0"
 44 | xdoctest = {extras = ["colors"], version = "^0.15.0"}
 45 | sphinx = "^4.0.2"
 46 | sphinx-autobuild = "^2021.3.14"
 47 | pre-commit = "^2.11.1"
 48 | flake8 = "^3.8.4"
 49 | black = ">=21.12b0"
 50 | flake8-bandit = "^2.1.2"
 51 | flake8-bugbear = "^21.4.3"
 52 | flake8-docstrings = "^1.5.0"
 53 | flake8-rst-docstrings = "^0.2.3"
 54 | pep8-naming = "^0.11.1"
 55 | darglint = "^1.5.8"
 56 | reorder-python-imports = "^2.5.0"
 57 | pre-commit-hooks = "^4.0.1"
 58 | sphinx-rtd-theme = "^0.5.0"
 59 | sphinx-click = "^3.0.0"
 60 | Pygments = "^2.8.1"
 61 | types-pkg-resources = "^0.1.2"
 62 | types-requests = "^2.25.2"
 63 | types-attrs = "^19.1.0"
 64 | sphinx-rtd-dark-mode = "^1.2.3"
 65 | Jinja2 = "^3.0.1"
 66 | mypy = "^0.910"
 67 | matplotlib = "^3.5.1"
 68 | nox = "^2022.1.7"
 69 | cookietemple = "^1.3.11"
 70 | nox-poetry = "^0.9.0"
 71 | 
 72 | [tool.poetry.scripts]
 73 | batchglm = "batchglm.__main__:main"
 74 | 
 75 | [tool.black]
 76 | line-length = 120
 77 | 
 78 | [tool.mypy]
 79 | strict = false
 80 | pretty = true
 81 | show_column_numbers = true
 82 | show_error_codes = true
 83 | show_error_context = true
 84 | ignore_missing_imports = true
 85 | exclude = "_version.py"
 86 | 
 87 | [tool.isort]
 88 | multi_line_output=3
 89 | include_trailing_comma=true
 90 | balanced_wrapping=true
 91 | line_length=120
 92 | 
 93 | [tool.coverage.paths]
 94 | source = ["batchglm", "*/site-packages"]
 95 | 
 96 | [tool.coverage.run]
 97 | branch = true
 98 | source = ["batchglm"]
 99 | 
100 | [tool.coverage.report]
101 | show_missing = true
102 | 
103 | [build-system]
104 | requires = ["poetry-core>=1.0.0"]
105 | build-backend = "poetry.core.masonry.api"
106 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | anndata==0.7.8
 2 | dask==2021.3.0
 3 | numpy>=1.16.4
 4 | pandas==1.1.5
 5 | patsy==0.5.2
 6 | pytest==6.2.5
 7 | scipy>=1.2.1
 8 | sparse==0.9.1
 9 | toolz==0.11.2
10 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [versioneer]
 2 | VCS = git
 3 | style = pep440
 4 | versionfile_source = batchglm/_version.py
 5 | versionfile_build = batchglm/_version.py
 6 | tag_prefix =
 7 | 
 8 | [build_ext]
 9 | inplace = 1
10 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import find_packages, setup
 2 | 
 3 | import versioneer
 4 | 
 5 | author = "David S. Fischer, Florian R. Hölzlwimmer, Sabrina Richter"
 6 | author_email = "david.fischer@helmholtz-muenchen.de"
 7 | description = "Fast and scalable fitting of over-determined generalized-linear models (GLMs)"
 8 | 
 9 | with open("README.md", "r") as fh:
10 |     long_description = fh.read()
11 | 
12 | setup(
13 |     name="batchglm",
14 |     author=author,
15 |     author_email=author_email,
16 |     description=description,
17 |     long_description=long_description,
18 |     long_description_content_type="text/markdown",
19 |     packages=find_packages(),
20 |     install_requires=["anndata", "numpy>=1.16.4", "scipy>=1.2.1", "pandas", "dask", "toolz", "patsy", "sparse", "dask"],
21 |     extras_require={
22 |         "plotting_deps": ["matplotlib", "seaborn"],
23 |         "docs": [
24 |             "sphinx",
25 |             "sphinx-autodoc-typehints",
26 |             "sphinx_rtd_theme",
27 |             "jinja2",
28 |             "docutils",
29 |         ],
30 |     },
31 |     version=versioneer.get_version(),
32 |     cmdclass=versioneer.get_cmdclass(),
33 | )
34 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | """Test suite for the batchglm package."""
2 | 


--------------------------------------------------------------------------------
/tests/numpy/test_accuracy.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import unittest
  3 | 
  4 | import numpy as np
  5 | from utils import get_estimator, get_generated_model
  6 | 
  7 | from batchglm import pkg_constants
  8 | from batchglm.models.base_glm import ModelGLM
  9 | from batchglm.train.numpy.base_glm import EstimatorGlm
 10 | 
 11 | logger = logging.getLogger("batchglm")
 12 | 
 13 | NB_OPTIMIZERS = ["GD", "ADAM", "ADAGRAD", "RMSPROP", "NR", "NR_TR", "IRLS", "IRLS_GD", "IRLS_TR", "IRLS_GD_TR"]
 14 | NORM_OPTIMIZERS = ["GD", "ADAM", "ADAGRAD", "RMSPROP", "NR", "NR_TR", "IRLS", "IRLS_TR"]
 15 | BETA_OPTIMIZERS = ["GD", "ADAM", "ADAGRAD", "RMSPROP", "NR", "NR_TR"]
 16 | 
 17 | 
 18 | pkg_constants.TRUST_REGION_T1 = 0.5
 19 | pkg_constants.TRUST_REGION_T2 = 1.5
 20 | pkg_constants.CHOLESKY_LSTSQS = True
 21 | pkg_constants.CHOLESKY_LSTSQS_BATCHED = True
 22 | pkg_constants.JACOBIAN_MODE = "analytic"
 23 | 
 24 | 
 25 | class TestAccuracy(unittest.TestCase):
 26 |     def eval_estimation(self, estimator: EstimatorGlm):
 27 |         mean_thres_location = 0.2
 28 |         mean_thres_scale = 0.2
 29 |         std_thres_location = 1
 30 |         std_thres_scale = 1
 31 | 
 32 |         def deviation_theta(true: np.ndarray, pred: np.ndarray, mean_thres: float, std_thres: float) -> bool:
 33 |             relative_deviation = (pred - true) / true
 34 |             mean = np.mean(relative_deviation)
 35 |             std = np.std(relative_deviation)
 36 |             logger.info(f"Relative deviation theta location: {mean} (mean), {std} (std)")
 37 |             return np.abs(mean) <= mean_thres and std <= std_thres
 38 | 
 39 |         success = True
 40 |         if estimator.train_loc:
 41 |             success = deviation_theta(
 42 |                 true=estimator.model_container.model._theta_location,
 43 |                 pred=estimator.model_container.theta_location,
 44 |                 mean_thres=mean_thres_location,
 45 |                 std_thres=std_thres_location,
 46 |             )
 47 |         if estimator.train_scale:
 48 |             success &= deviation_theta(
 49 |                 true=estimator.model_container.model._theta_scale,
 50 |                 pred=estimator.model_container.theta_scale,
 51 |                 mean_thres=mean_thres_scale,
 52 |                 std_thres=std_thres_scale,
 53 |             )
 54 |         return success
 55 | 
 56 |     def _test_accuracy(self, estimator: EstimatorGlm) -> bool:
 57 |         """Runs the estimator to fit the model and evaluates with respect to the simulated parameters."""
 58 |         estimator.initialize()
 59 |         estimator.train_sequence(training_strategy="DEFAULT")
 60 |         success = self.eval_estimation(estimator)
 61 |         if not success:
 62 |             logger.warning("Estimator did not yield exact results")
 63 |         return success
 64 | 
 65 | 
 66 | class TestAccuracyNB(TestAccuracy):
 67 |     def test_accuracy_rand_theta(self):
 68 |         """
 69 |         This tests randTheta simulated data with 2 conditions and 4 batches sparse and dense.
 70 |         """
 71 |         dense_model = get_generated_model(
 72 |             noise_model="nb", num_conditions=2, num_batches=4, sparse=False, mode="randTheta"
 73 |         )
 74 |         sparse_model = get_generated_model(
 75 |             noise_model="nb", num_conditions=2, num_batches=4, sparse=True, mode="randTheta"
 76 |         )
 77 |         dense_estimator = get_estimator(
 78 |             noise_model="nb", model=dense_model, init_location="standard", init_scale="standard"
 79 |         )
 80 |         assert self._test_accuracy(dense_estimator)
 81 | 
 82 |         sparse_estimator = get_estimator(
 83 |             noise_model="nb", model=sparse_model, init_location="standard", init_scale="standard"
 84 |         )
 85 |         assert self._test_accuracy(sparse_estimator)
 86 | 
 87 |     def test_accuracy_const_theta(self):
 88 |         """
 89 |         This tests constTheta simulated data with 2 conditions and 0 batches sparse and dense.
 90 |         """
 91 |         dense_model = get_generated_model(
 92 |             noise_model="nb", num_conditions=2, num_batches=0, sparse=False, mode="constTheta"
 93 |         )
 94 |         sparse_model = get_generated_model(
 95 |             noise_model="nb", num_conditions=2, num_batches=0, sparse=True, mode="constTheta"
 96 |         )
 97 | 
 98 |         dense_estimator = get_estimator(
 99 |             noise_model="nb", model=dense_model, init_location="standard", init_scale="standard"
100 |         )
101 |         assert self._test_accuracy(dense_estimator)
102 | 
103 |         sparse_estimator = get_estimator(
104 |             noise_model="nb", model=sparse_model, init_location="standard", init_scale="standard"
105 |         )
106 |         assert self._test_accuracy(sparse_estimator)
107 | 
108 | 
109 | class TestAccuracyPoisson(TestAccuracy):
110 |     def test_accuracy_rand_theta(self):
111 |         """
112 |         This tests randTheta simulated data with 2 conditions and 4 batches sparse and dense.
113 |         """
114 |         dense_model = get_generated_model(
115 |             noise_model="poisson", num_conditions=2, num_batches=4, sparse=False, mode="randTheta"
116 |         )
117 |         sparse_model = get_generated_model(
118 |             noise_model="poisson", num_conditions=2, num_batches=4, sparse=True, mode="randTheta"
119 |         )
120 |         dense_estimator = get_estimator(
121 |             noise_model="poisson", model=dense_model, init_location="standard", init_scale="standard"
122 |         )
123 |         assert self._test_accuracy(dense_estimator)
124 | 
125 |         sparse_estimator = get_estimator(
126 |             noise_model="poisson", model=sparse_model, init_location="standard", init_scale="standard"
127 |         )
128 |         assert self._test_accuracy(sparse_estimator)
129 | 
130 |     def test_accuracy_const_theta(self):
131 |         """
132 |         This tests constTheta simulated data with 2 conditions and 0 batches sparse and dense.
133 |         """
134 |         dense_model = get_generated_model(
135 |             noise_model="poisson", num_conditions=2, num_batches=0, sparse=False, mode="constTheta"
136 |         )
137 |         sparse_model = get_generated_model(
138 |             noise_model="poisson", num_conditions=2, num_batches=0, sparse=True, mode="constTheta"
139 |         )
140 | 
141 |         dense_estimator = get_estimator(
142 |             noise_model="poisson", model=dense_model, init_location="standard", init_scale="standard"
143 |         )
144 |         assert self._test_accuracy(dense_estimator)
145 | 
146 |         sparse_estimator = get_estimator(
147 |             noise_model="poisson", model=sparse_model, init_location="standard", init_scale="standard"
148 |         )
149 |         assert self._test_accuracy(sparse_estimator)
150 | 
151 | 
152 | class TestAccuracyNorm(TestAccuracy):
153 |     def test_accuracy_rand_theta(self):
154 |         """
155 |         This tests randTheta simulated data with 2 conditions and 4 batches sparse and dense.
156 |         """
157 |         dense_model = get_generated_model(
158 |             noise_model="norm", num_conditions=2, num_batches=4, sparse=False, mode="randTheta"
159 |         )
160 |         sparse_model = get_generated_model(
161 |             noise_model="norm", num_conditions=2, num_batches=4, sparse=True, mode="randTheta"
162 |         )
163 |         dense_estimator = get_estimator(noise_model="norm", model=dense_model)
164 |         assert self._test_accuracy(dense_estimator)
165 | 
166 |         sparse_estimator = get_estimator(noise_model="norm", model=sparse_model)
167 |         assert self._test_accuracy(sparse_estimator)
168 | 
169 |     def test_accuracy_const_theta(self):
170 |         """
171 |         This tests constTheta simulated data with 2 conditions and 0 batches sparse and dense.
172 |         """
173 |         dense_model = get_generated_model(
174 |             noise_model="norm", num_conditions=2, num_batches=0, sparse=False, mode="constTheta"
175 |         )
176 |         sparse_model = get_generated_model(
177 |             noise_model="norm", num_conditions=2, num_batches=0, sparse=True, mode="constTheta"
178 |         )
179 | 
180 |         dense_estimator = get_estimator(noise_model="norm", model=dense_model)
181 |         assert self._test_accuracy(dense_estimator)
182 | 
183 |         sparse_estimator = get_estimator(noise_model="norm", model=sparse_model)
184 |         assert self._test_accuracy(sparse_estimator)
185 | 
186 | 
187 | if __name__ == "__main__":
188 |     unittest.main()
189 | 


--------------------------------------------------------------------------------
/tests/numpy/test_accuracy_extreme_values.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import unittest
 3 | from typing import List, Optional, Union
 4 | 
 5 | import numpy as np
 6 | from test_accuracy import TestAccuracy
 7 | from utils import get_estimator, get_generated_model
 8 | 
 9 | logger = logging.getLogger("batchglm")
10 | # logging.getLogger("batchglm").setLevel(logging.WARNING)
11 | 
12 | 
13 | class _TestAccuracyXtremeAll(TestAccuracy):
14 |     """
15 |     Test whether numerical extremes throw error in initialisation or during first training steps.
16 |     """
17 | 
18 |     def _test_accuracy_extreme_values(self, idx: Union[List[int], int, np.ndarray], val: float, noise_model: str):
19 |         model = get_generated_model(noise_model=noise_model, num_conditions=2, num_batches=4, sparse=False, mode=None)
20 |         model._x[:, idx] = val
21 |         estimator = get_estimator(noise_model=noise_model, model=model, init_location="standard", init_scale="standard")
22 |         return self._test_accuracy(estimator)
23 | 
24 |     def _test_low_values(self, **kwargs):
25 |         return self._test_accuracy_extreme_values(idx=0, val=0.0, **kwargs)
26 | 
27 |     def _test_zero_variance(self, **kwargs):
28 |         self._modify_sim(idx=0, val=5.0, **kwargs)
29 |         return self.basic_test(batched=False, train_loc=True, train_scale=True, sparse=False)
30 | 
31 | 
32 | class TestAccuracyXtremeNb(_TestAccuracyXtremeAll):
33 |     """
34 |     Test whether optimizers yield exact results for negative binomial distributed data.
35 |     """
36 | 
37 |     def test_nb(self) -> bool:
38 |         np.random.seed(1)
39 |         ret_val = self._test_low_values(noise_model="nb")
40 |         np.random.seed(1)
41 |         return ret_val and self._test_zero_variance(noise_model="nb")
42 | 
43 | 
44 | class TestAccuracyXtremeNorm(_TestAccuracyXtremeAll):
45 |     """
46 |     Test whether optimizers yield exact results for normal distributed data.
47 |     """
48 | 
49 |     def test_norm(self) -> bool:
50 |         logger.error("TestAccuracyXtremeNorm.test_norm()")
51 |         logger.info("Normal noise model not implemented for numpy")
52 | 
53 |         np.random.seed(1)
54 |         ret_val = self._test_low_values(noise_model="norm")
55 |         np.random.seed(1)
56 |         return ret_val and self._test_zero_variance(noise_model="nb")
57 | 
58 | 
59 | class TestAccuracyXtremeBeta(_TestAccuracyXtremeAll):
60 |     """
61 |     Test whether optimizers yield exact results for beta distributed data.
62 |     """
63 | 
64 |     def test_beta(self) -> bool:
65 |         logger.error("TestAccuracyXtremeBeta.test_beta()")
66 |         logger.info("Beta noise model not implemented for numpy")
67 | 
68 |         # np.random.seed(1)
69 |         # self._test_low_values(noise_model="beta")
70 |         # self._test_zero_variance(noise_model="beta")
71 |         return True
72 | 
73 | 
74 | class TestAccuracyXtremePoisson(_TestAccuracyXtremeAll):
75 |     """
76 |     Test whether optimizers yield exact results for Poisson distributed data.
77 |     """
78 | 
79 |     def test_poisson(self) -> bool:
80 |         logger.error("TestAccuracyXtremePoisson.test_poisson()")
81 |         logger.info("Poisson noise model not implemented for numpy")
82 | 
83 |         np.random.seed(1)
84 |         self._test_low_values(noise_model="poisson")
85 |         # self._test_zero_variance(noise_model="poisson")
86 |         return True
87 | 
88 | 
89 | if __name__ == "__main__":
90 |     unittest.main()
91 | 


--------------------------------------------------------------------------------
/tests/numpy/utils.py:
--------------------------------------------------------------------------------
  1 | from typing import Optional
  2 | 
  3 | import numpy as np
  4 | 
  5 | from batchglm.models.base_glm import ModelGLM
  6 | from batchglm.models.glm_beta import Model as BetaModel
  7 | from batchglm.models.glm_nb import Model as NBModel
  8 | from batchglm.models.glm_norm import Model as NormModel
  9 | from batchglm.models.glm_poisson import Model as PoissonModel
 10 | from batchglm.train.numpy.base_glm import EstimatorGlm
 11 | from batchglm.train.numpy.glm_nb import Estimator as NBEstimator
 12 | from batchglm.train.numpy.glm_norm import Estimator as NormEstimator
 13 | from batchglm.train.numpy.glm_poisson import Estimator as PoissonEstimator
 14 | 
 15 | 
 16 | def get_estimator(noise_model: str, **kwargs) -> EstimatorGlm:
 17 |     if noise_model == "nb":
 18 |         return NBEstimator(**kwargs)
 19 |     elif noise_model == "norm":
 20 |         return NormEstimator(**kwargs)
 21 |         # estimator = NormEstimator(**kwargs)
 22 |     elif noise_model == "beta":
 23 |         raise NotImplementedError("Beta Estimator is not yet implemented.")
 24 |         # estimator = BetaEstimator(**kwargs)
 25 |     elif noise_model == "poisson":
 26 |         return PoissonEstimator(**kwargs)
 27 |     raise ValueError(f"Noise model {noise_model} not recognized.")
 28 | 
 29 | 
 30 | def get_model(noise_model: str) -> ModelGLM:
 31 |     if noise_model is None:
 32 |         raise ValueError("noise_model is None")
 33 |     if noise_model == "nb":
 34 |         return NBModel()
 35 |     elif noise_model == "norm":
 36 |         return NormModel()
 37 |     elif noise_model == "beta":
 38 |         return BetaModel()
 39 |     elif noise_model == "poisson":
 40 |         return PoissonModel()
 41 |     raise ValueError(f"Noise model {noise_model} not recognized.")
 42 | 
 43 | 
 44 | def get_generated_model(
 45 |     noise_model: str, num_conditions: int, num_batches: int, sparse: bool, mode: Optional[str] = None
 46 | ) -> ModelGLM:
 47 |     model = get_model(noise_model=noise_model)
 48 | 
 49 |     def random_uniform(low: float, high: float):
 50 |         return lambda shape: np.random.uniform(low=low, high=high, size=shape)
 51 | 
 52 |     def const(offset: float):
 53 |         return lambda shape: np.zeros(shape) + offset
 54 | 
 55 |     if mode is None:
 56 |         """Sample loc and scale with default functions"""
 57 |         rand_fn_ave = None
 58 |         rand_fn_loc = None
 59 |         rand_fn_scale = None
 60 | 
 61 |     elif mode == "randTheta":
 62 | 
 63 |         if noise_model in ["nb", "norm", "poisson"]:
 64 |             # too large mean breaks poisson
 65 |             rand_fn_ave = random_uniform(10, 1000 if noise_model != "poisson" else 15)
 66 |             rand_fn_loc = random_uniform(1, 3)
 67 |             rand_fn_scale = random_uniform(1, 3)
 68 |         elif noise_model == "beta":
 69 |             rand_fn_ave = random_uniform(0.1, 0.7)
 70 |             rand_fn_loc = random_uniform(0.0, 0.15)
 71 |             rand_fn_scale = random_uniform(0.0, 0.15)
 72 |         else:
 73 |             raise ValueError(f"Noise model {noise_model} not recognized.")
 74 | 
 75 |     elif mode == "constTheta":
 76 | 
 77 |         if noise_model in ["nb", "norm", "poisson"]:
 78 |             # too large mean breaks poisson
 79 |             rand_fn_ave = random_uniform(10, 1000 if noise_model != "poisson" else 15)
 80 |             rand_fn_loc = const(1.0)
 81 |             rand_fn_scale = const(1.0)
 82 |         elif noise_model == "beta":
 83 |             rand_fn_ave = random_uniform(0.1, 0.9)
 84 |             rand_fn_loc = const(0.05)
 85 |             rand_fn_scale = const(0.2)
 86 |         else:
 87 |             raise ValueError(f"Noise model {noise_model} not recognized.")
 88 | 
 89 |     else:
 90 |         raise ValueError(f"Mode {mode} not recognized.")
 91 | 
 92 |     model.generate_artificial_data(
 93 |         n_obs=2000,
 94 |         n_vars=100,
 95 |         num_conditions=num_conditions,
 96 |         num_batches=num_batches,
 97 |         intercept_scale=True,
 98 |         sparse=sparse,
 99 |         rand_fn_ave=rand_fn_ave,
100 |         rand_fn_loc=rand_fn_loc,
101 |         rand_fn_scale=rand_fn_scale,
102 |     )
103 |     return model
104 | 


--------------------------------------------------------------------------------
/tests/run_data_utils_test.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import unittest
  3 | 
  4 | import numpy as np
  5 | import pandas as pd
  6 | 
  7 | from batchglm.utils.data import constraint_system_from_star
  8 | 
  9 | logger = logging.getLogger("batchglm")
 10 | 
 11 | 
 12 | class TestConstraintSystemFromStar(unittest.TestCase):
 13 | 
 14 |     true_cmat = np.array(
 15 |         [
 16 |             [1.0, 0.0, 0.0, 0.0],
 17 |             [0.0, 1.0, 0.0, 0.0],
 18 |             [0.0, 0.0, -1.0, 0.0],
 19 |             [0.0, 0.0, 1.0, 0.0],
 20 |             [0.0, 0.0, 0.0, -1.0],
 21 |             [0.0, 0.0, 0.0, 1.0],
 22 |         ]
 23 |     )
 24 | 
 25 |     true_cmat_list = np.array(
 26 |         [[-1.0, 0.0, 0.0, 0.0], [1.0, 0.0, 0.0, 0.0], [0.0, 1.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0], [0.0, 0.0, 0.0, 1.0]]
 27 |     )
 28 | 
 29 |     true_cmat_array = np.array([[-1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]])
 30 | 
 31 |     true_dmat = np.array(
 32 |         [
 33 |             [1.0, 0.0, 0.0, 1.0, 0.0, 0.0],
 34 |             [0.0, 1.0, 0.0, 0.0, 1.0, 0.0],
 35 |             [0.0, 1.0, 0.0, 0.0, 1.0, 0.0],
 36 |             [1.0, 0.0, 1.0, 0.0, 0.0, 0.0],
 37 |             [1.0, 0.0, 1.0, 0.0, 0.0, 0.0],
 38 |             [0.0, 1.0, 0.0, 0.0, 0.0, 1.0],
 39 |         ]
 40 |     )
 41 | 
 42 |     true_dmat_list = np.array(
 43 |         [
 44 |             [1.0, 0.0, 1.0, 0.0, 0.0],
 45 |             [0.0, 1.0, 0.0, 1.0, 0.0],
 46 |             [0.0, 1.0, 0.0, 1.0, 0.0],
 47 |             [1.0, 0.0, 0.0, 0.0, 0.0],
 48 |             [1.0, 0.0, 0.0, 0.0, 0.0],
 49 |             [0.0, 1.0, 0.0, 0.0, 1.0],
 50 |         ]
 51 |     )
 52 | 
 53 |     true_dmat_array = np.array([[1.0, 0.0, 1.0], [0.0, 1.0, 0.0], [1.0, 0.0, 0.0], [0.0, 1.0, 1.0]])
 54 | 
 55 |     true_terms = ["condition", "batch"]
 56 |     true_coefs = ["condition[0]", "condition[1]", "batch[0]", "batch[1]", "batch[2]", "batch[3]"]
 57 | 
 58 |     true_terms_list = ["condition[0]", "condition[1]", "batch[T.1]", "batch[T.2]", "batch[T.3]"]
 59 |     true_coefs_list = ["condition[0]", "condition[1]", "batch[T.1]", "batch[T.2]", "batch[T.3]"]
 60 | 
 61 |     true_terms_array = ["condition[0]", "condition[1]", "batch[T.1]"]
 62 |     true_coefs_array = ["condition[0]", "condition[1]", "batch[T.1]"]
 63 | 
 64 |     # dict tests
 65 | 
 66 |     def execute_test_dict(self, *args, **kwargs):
 67 |         dmat, coef_names, cmat, term_names = constraint_system_from_star(*args, **kwargs)
 68 |         assert term_names == self.true_terms
 69 |         assert coef_names == self.true_coefs
 70 |         assert np.all(np.equal(cmat, self.true_cmat))
 71 |         assert np.all(np.equal(dmat, self.true_dmat))
 72 | 
 73 |     def test_constraint_system_dict(self):
 74 |         formula = "~0 + condition + batch"
 75 |         sample_description = pd.DataFrame({"condition": [0, 1, 1, 0, 0, 1], "batch": [1, 2, 2, 0, 0, 3]})
 76 |         constraints = {"batch": "condition"}
 77 |         self.execute_test_dict(constraints, sample_description=sample_description, formula=formula)
 78 | 
 79 |     # list tests
 80 | 
 81 |     def execute_test_list(self, *args, **kwargs):
 82 |         dmat, coef_names, cmat, term_names = constraint_system_from_star(*args, **kwargs)
 83 |         assert term_names == self.true_terms_list
 84 |         assert coef_names == self.true_coefs_list
 85 |         assert np.all(np.equal(cmat, self.true_cmat_list))
 86 |         assert np.all(np.equal(dmat, self.true_dmat_list))
 87 | 
 88 |     def test_constraint_system_list(self):
 89 |         formula = "~0 + condition + batch"
 90 |         sample_description = pd.DataFrame({"condition": [0, 1, 1, 0, 0, 1], "batch": [1, 2, 2, 0, 0, 3]})
 91 |         constraints = ["condition[0] + condition[1] = 0"]
 92 |         self.execute_test_list(constraints, sample_description=sample_description, formula=formula)
 93 | 
 94 |     def test_constraint_system_list_with_dmat(self):
 95 |         constraints = ["condition[0] + condition[1] = 0"]
 96 |         dmat = pd.DataFrame(self.true_dmat_list, columns=self.true_coefs_list)
 97 |         self.execute_test_list(constraints, dmat=dmat)
 98 | 
 99 |     # array tests
100 | 
101 |     def execute_test_array(self, *args, **kwargs):
102 |         dmat, coef_names, cmat, term_names = constraint_system_from_star(*args, **kwargs)
103 |         assert term_names == self.true_terms_array
104 |         assert coef_names == self.true_coefs_array
105 |         assert np.all(np.equal(cmat, self.true_cmat_array))
106 |         assert np.all(np.equal(dmat, self.true_dmat_array))
107 | 
108 |     def test_constraint_system_array(self):
109 |         formula = "~0 + condition + batch"
110 |         sample_description = pd.DataFrame({"condition": [0, 1, 0, 1], "batch": [1, 0, 0, 1]})
111 |         constraints = np.array([[-1, 0, 0], [0, 1, 0], [0, 0, 1]])
112 |         self.execute_test_array(constraints, sample_description=sample_description, formula=formula)
113 | 
114 |     def test_constraint_system_array_with_dmat(self):
115 |         constraints = np.array([[-1, 0, 0], [0, 1, 0], [0, 0, 1]])
116 |         dmat = pd.DataFrame(self.true_dmat_array, columns=self.true_coefs_array)
117 |         self.execute_test_array(constraints, dmat=dmat)
118 | 
119 | 
120 | if __name__ == "__main__":
121 |     unittest.main()
122 | 


--------------------------------------------------------------------------------
/tests/test_main.py:
--------------------------------------------------------------------------------
 1 | """Test cases for the __main__ module."""
 2 | import pytest
 3 | from click.testing import CliRunner
 4 | 
 5 | from batchglm import __main__
 6 | 
 7 | 
 8 | @pytest.fixture
 9 | def runner() -> CliRunner:
10 |     """Fixture for invoking command-line interfaces."""
11 |     return CliRunner()
12 | 
13 | 
14 | def test_main_succeeds(runner: CliRunner) -> None:
15 |     """It exits with a status code of zero."""
16 |     result = runner.invoke(__main__.main)
17 |     assert result.exit_code == 0
18 | 


--------------------------------------------------------------------------------
/tests/test_types_dmat.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from typing import List, Union
 3 | 
 4 | import dask
 5 | import numpy as np
 6 | import pandas as pd
 7 | import patsy
 8 | 
 9 | from batchglm.utils.input import parse_design
10 | 
11 | 
12 | def check_np_dask(dmat: Union[np.ndarray, dask.array.core.Array], params: List[str]) -> bool:
13 |     parse_design(design_matrix=dmat, param_names=params)
14 |     try:  # must produce ValueError
15 |         parse_design(design_matrix=dmat, param_names=None)
16 |         return False
17 |     except ValueError as ve:
18 |         if not str(ve) == "Provide names when passing design_matrix as np.ndarray or dask.array.core.Array!":
19 |             raise
20 |     try:  # must result in AssertionError
21 |         parse_design(design_matrix=dmat, param_names=params[:-1])
22 |         return False
23 |     except AssertionError as ae:
24 |         if not (
25 |             str(ae) == "Length of provided param_names is not equal to number of coefficients in design_matrix."
26 |             or str(ae).startswith("Datatype for design_matrix not understood")
27 |         ):
28 |             raise
29 |     return True
30 | 
31 | 
32 | def check_pd_patsy(dmat: Union[pd.DataFrame, patsy.design_info.DesignMatrix], params: List[str]) -> bool:
33 |     _, ret_params = parse_design(design_matrix=dmat, param_names=None)
34 |     if ret_params != params:
35 |         return False
36 | 
37 |     # generate new coefs to test ignoring passed params
38 |     new_coef_list = ["a", "b", "c"]
39 | 
40 |     # param_names should be ignored
41 |     _, ret_params = parse_design(design_matrix=dmat, param_names=new_coef_list)
42 |     if params != ret_params:
43 |         return False
44 |     # param_names should be ignored
45 |     _, ret_params = parse_design(design_matrix=dmat, param_names=new_coef_list[:-1])
46 |     if params != ret_params:
47 |         return False
48 |     return True
49 | 
50 | 
51 | class TestParseDesign(unittest.TestCase):
52 |     """
53 |     Test various input data types for parsing of design and constraint matrices.
54 |     The method "parse_design" in batchglm.models.base_glm.utils must return Tuple[np.ndarray, List[str]].
55 |     It must fail if no param_names are passed or the length of param_names is not equal to the length of params.
56 |     """
57 | 
58 |     def test_parse_design(self) -> bool:
59 |         # create artificial data
60 |         obs, coef = (500, 3)
61 |         dmat = np.zeros(shape=(obs, coef))
62 |         coef_list = ["Intercept", "coef_0", "coef_1"]
63 | 
64 |         # check np
65 |         if not (check_np_dask(dmat=dmat, params=coef_list)):
66 |             return False
67 |         # check dask
68 |         if not (check_np_dask(dmat=dask.array.from_array(dmat, chunks=(1000, 1000)), params=coef_list)):
69 |             return False
70 |         # check pd
71 |         pd_coef = pd.DataFrame(dmat, columns=coef_list)
72 |         if not (check_pd_patsy(dmat=pd_coef, params=coef_list)):
73 |             return False
74 |         # check patsy
75 |         if not (check_pd_patsy(dmat=patsy.dmatrix("~1 + coef_0 + coef_1", pd_coef), params=coef_list)):
76 |             return False
77 |         return True
78 | 
79 | 
80 | if __name__ == "__main__":
81 |     unittest.main()
82 | 


--------------------------------------------------------------------------------