├── .all-contributorsrc ├── .codacy.yml ├── .coveragerc ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md ├── PULL_REQUEST_TEMPLATE │ └── pull_request_template.md └── workflows │ ├── draft-pdf.yml │ ├── python_package_macos.yaml │ ├── python_package_ubuntu.yaml │ ├── python_package_windows.yaml │ └── python_publish.yaml ├── .gitignore ├── .prospector.yml ├── .readthedocs.yaml ├── CHANGELOG.rst ├── CITATION.cff ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.rst ├── CONTRIBUTORS.md ├── FDApy ├── __init__.py ├── misc │ ├── __init__.py │ ├── basis.py │ ├── loader.py │ └── utils.py ├── preprocessing │ ├── __init__.py │ ├── dim_reduction │ │ ├── __init__.py │ │ ├── fcp_tpa.py │ │ ├── mfpca.py │ │ └── ufpca.py │ └── smoothing │ │ ├── __init__.py │ │ ├── local_polynomial.py │ │ └── psplines.py ├── py.typed ├── representation │ ├── __init__.py │ ├── argvals.py │ ├── basis.py │ ├── functional_data.py │ └── values.py ├── simulation │ ├── __init__.py │ ├── brownian.py │ ├── datasets.py │ ├── karhunen.py │ └── simulation.py └── visualization │ ├── __init__.py │ └── _plot.py ├── LICENSE ├── MANIFEST.in ├── README.rst ├── docs ├── Makefile ├── _templates │ └── autosummary │ │ ├── base.rst │ │ └── class.rst ├── conf.py ├── index.rst ├── make.bat ├── modules.rst ├── modules │ ├── misc.rst │ ├── preprocessing.rst │ ├── representation.rst │ └── simulation.rst ├── refs.bib └── sg_execution_times.rst ├── examples ├── README.rst ├── basis │ ├── README.rst │ ├── plot_basis_1d.py │ ├── plot_basis_2d.py │ └── plot_basis_multivariate_1d.py ├── data │ ├── canadian_precipitation_monthly.csv │ ├── canadian_temperature_daily.csv │ └── cd4.csv ├── data_analysis │ ├── README.rst │ ├── plot_canadian_weather.py │ └── plot_cd4.py ├── fpca │ ├── README.rst │ ├── plot_fpca_1d.py │ ├── plot_fpca_1d_sparse.py │ ├── plot_fpca_2d.py │ ├── plot_mfpca_1d.py │ ├── plot_mfpca_1d_2d.py │ ├── plot_mfpca_1d_sparse.py │ └── plot_mfpca_2d.py ├── misc │ ├── README.rst │ ├── plot_local_polynomials_1d.py │ ├── plot_local_polynomials_2d.py │ ├── plot_psplines_1d.py │ └── plot_psplines_2d.py ├── representation │ ├── README.rst │ ├── plot_basis_functional.py │ ├── plot_dense_functional.py │ ├── plot_irregular_functional.py │ └── plot_multivariate_functional.py ├── simulation │ ├── README.rst │ ├── plot_brownian.py │ ├── plot_cluster.py │ ├── plot_cluster_multivariate.py │ ├── plot_karhunen.py │ ├── plot_karhunen_multivariate.py │ └── plot_simulation.py └── smoothing │ ├── README.rst │ ├── plot_smooth_data_1d.py │ └── plot_smooth_data_2d.py ├── joss ├── paper.bib └── paper.md ├── pyproject.toml ├── setup.cfg └── tests ├── __init__.py ├── data ├── basis_2_1D.pickle ├── basis_2_2D.pickle ├── basis_multi_3_1D.pickle ├── data_noisy_5_100_005.pickle ├── data_noisy_5_100_005_2D.pickle ├── data_noisy_5_10_001.pickle ├── data_noisy_5_10_001_2D.pickle ├── data_noisy_5_1_001_2D.pickle ├── data_noisy_5_1_005.pickle ├── data_sparse_5_100_08.pickle ├── data_sparse_5_10_08.pickle └── data_sparse_5_1_08.pickle ├── test_argvals.py ├── test_basis.py ├── test_basis_functional_data.py ├── test_brownian.py ├── test_checker_functional_data.py ├── test_datasets.py ├── test_dense_functional_data.py ├── test_fcp_tpa.py ├── test_fpca.py ├── test_functional_data.py ├── test_irregular_functional_data.py ├── test_karhunen.py ├── test_loader.py ├── test_local_polynomials.py ├── test_mfpca.py ├── test_multivariate_functional_data.py ├── test_plot.py ├── test_psplines.py ├── test_simulation.py ├── test_ufpca.py ├── test_utils.py └── test_values.py /.all-contributorsrc: -------------------------------------------------------------------------------- 1 | { 2 | "projectName": "FDApy", 3 | "projectOwner": "StevenGolovkine", 4 | "files": [ 5 | "CONTRIBUTORS.md" 6 | ], 7 | "commitType": "docs", 8 | "commitConvention": "angular", 9 | "contributorsPerLine": 7, 10 | "contributors": [ 11 | { 12 | "login": "StevenGolovkine", 13 | "name": "Steven", 14 | "avatar_url": "https://avatars.githubusercontent.com/u/22517192?v=4", 15 | "profile": "https://github.com/StevenGolovkine", 16 | "contributions": [ 17 | "code" 18 | ] 19 | }, 20 | { 21 | "login": "edwardgunning", 22 | "name": "Edward Gunning", 23 | "avatar_url": "https://avatars.githubusercontent.com/u/56870103?v=4", 24 | "profile": "https://edwardgunning.github.io/", 25 | "contributions": [ 26 | "research" 27 | ] 28 | }, 29 | { 30 | "login": "mstimberg", 31 | "name": "Marcel Stimberg", 32 | "avatar_url": "https://avatars.githubusercontent.com/u/1381982?v=4", 33 | "profile": "https://marcel.stimberg.info", 34 | "contributions": [ 35 | "review" 36 | ] 37 | }, 38 | { 39 | "login": "vnmabus", 40 | "name": "Carlos Ramos Carreño", 41 | "avatar_url": "https://avatars.githubusercontent.com/u/2364173?v=4", 42 | "profile": "https://github.com/vnmabus", 43 | "contributions": [ 44 | "review" 45 | ] 46 | }, 47 | { 48 | "login": "quantgirluk", 49 | "name": "Quant Girl", 50 | "avatar_url": "https://avatars.githubusercontent.com/u/46248141?v=4", 51 | "profile": "https://quantgirl.blog/", 52 | "contributions": [ 53 | "review" 54 | ] 55 | } 56 | ] 57 | } 58 | -------------------------------------------------------------------------------- /.codacy.yml: -------------------------------------------------------------------------------- 1 | # Python for Codacy 2 | 3 | --- 4 | 5 | exclude_paths: 6 | - 'examples/**' 7 | - 'tests/**' 8 | - 'docs/**' 9 | - '.github/**' 10 | - 'joss/**' 11 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [report] 2 | exclude_lines = 3 | pragma: no cover 4 | if TYPE_CHECKING: -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior: 15 | 1. Load '...' 16 | 2. Run '....' 17 | 3. See error 18 | 19 | **Expected behavior** 20 | A clear and concise description of what you expected to happen. 21 | 22 | **Screenshots** 23 | If applicable, add screenshots to help explain your problem. 24 | 25 | **Desktop (please complete the following information):** 26 | - OS: [e.g. Windows, macos] 27 | - Python Version [e.g. 3.10.16] 28 | 29 | **Additional context** 30 | Add any other context about the problem here. 31 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE/pull_request_template.md: -------------------------------------------------------------------------------- 1 | 6 | 7 | ## References to issues or other PRs 8 | 18 | 19 | 20 | ## Describe the proposed changes 21 | 22 | 23 | ## Additional information 24 | 25 | 26 | ## Checklist before requesting a review 27 | 28 | - [ ] I have performed a self-review of my code 29 | - [ ] The code conforms to the style used in this package 30 | - [ ] The code is fully documented and typed (type-checked with [Mypy](https://mypy-lang.org/)) 31 | - [ ] I have added thorough tests for the new/changed functionality 32 | -------------------------------------------------------------------------------- /.github/workflows/draft-pdf.yml: -------------------------------------------------------------------------------- 1 | name: Draft PDF 2 | on: [push] 3 | 4 | jobs: 5 | paper: 6 | runs-on: ubuntu-latest 7 | name: Paper Draft 8 | steps: 9 | - name: Checkout 10 | uses: actions/checkout@v4 11 | - name: Build draft PDF 12 | uses: openjournals/openjournals-draft-action@master 13 | with: 14 | journal: joss 15 | # This should be the path to the paper within your repo. 16 | paper-path: ./joss/paper.md 17 | - name: Upload 18 | uses: actions/upload-artifact@v4 19 | with: 20 | name: paper 21 | # This is the output path where Pandoc will write the compiled 22 | # PDF. Note, this should be the same directory as the input 23 | # paper.md 24 | path: ./joss/paper.pdf -------------------------------------------------------------------------------- /.github/workflows/python_package_macos.yaml: -------------------------------------------------------------------------------- 1 | name: Build MacOS 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | build: 7 | 8 | runs-on: macos-latest 9 | name: Python ${{ matrix.python-version }} 10 | strategy: 11 | matrix: 12 | python-version: ["3.10", "3.11"] 13 | 14 | steps: 15 | - uses: actions/checkout@v4 16 | 17 | - name: Set up Python ${{ matrix.python-version }} 18 | uses: actions/setup-python@v5 19 | with: 20 | python-version: ${{ matrix.python-version }} 21 | 22 | - name: Install dependencies 23 | run: | 24 | python -m pip install --upgrade pip 25 | pip install pytest-cov || pip install --user pytest-cov; 26 | 27 | - name: Lint with flake8 28 | run: | 29 | pip install flake8 30 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics 31 | flake8 . --count --exit-zero --max-complexity=10 --max-line-length=80 --statistics 32 | 33 | - name: Test with pytest 34 | run: | 35 | pip debug --verbose . 36 | pip install ".[test]" 37 | pytest --cov=FDApy/ --cov-report=xml 38 | -------------------------------------------------------------------------------- /.github/workflows/python_package_ubuntu.yaml: -------------------------------------------------------------------------------- 1 | name: Build 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | build: 7 | 8 | runs-on: ubuntu-latest 9 | name: Python ${{ matrix.python-version }} 10 | strategy: 11 | matrix: 12 | python-version: ["3.10", "3.11"] 13 | 14 | steps: 15 | - uses: actions/checkout@v4 16 | 17 | - name: Set up Python ${{ matrix.python-version }} 18 | uses: actions/setup-python@v5 19 | with: 20 | python-version: ${{ matrix.python-version }} 21 | 22 | - name: Install dependencies 23 | run: | 24 | python -m pip install --upgrade pip 25 | pip install pytest-cov || pip install --user pytest-cov; 26 | 27 | - name: Lint with flake8 28 | run: | 29 | pip install flake8 30 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics 31 | flake8 . --count --exit-zero --max-complexity=10 --max-line-length=80 --statistics 32 | 33 | - name: Run tests 34 | run: | 35 | pip debug --verbose . 36 | pip install numba 37 | pip install ".[test]" 38 | pytest --cov=FDApy/ 39 | 40 | - name: Upload coverage to Codecov 41 | uses: codecov/codecov-action@v4.0.1 42 | with: 43 | token: ${{ secrets.CODECOV_TOKEN }} 44 | -------------------------------------------------------------------------------- /.github/workflows/python_package_windows.yaml: -------------------------------------------------------------------------------- 1 | name: Build Windows 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | build: 7 | 8 | runs-on: windows-latest 9 | name: Python ${{ matrix.python-version }} 10 | strategy: 11 | matrix: 12 | python-version: ["3.10", "3.11"] 13 | 14 | steps: 15 | - uses: actions/checkout@v4 16 | 17 | - name: Set up Python ${{ matrix.python-version }} 18 | uses: actions/setup-python@v5 19 | with: 20 | python-version: ${{ matrix.python-version }} 21 | 22 | - name: Install dependencies 23 | run: | 24 | python -m pip install --upgrade pip 25 | pip install pytest-cov || pip install --user pytest-cov; 26 | 27 | - name: Lint with flake8 28 | run: | 29 | pip install flake8 30 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics 31 | flake8 . --count --exit-zero --max-complexity=10 --max-line-length=80 --statistics 32 | 33 | - name: Test with pytest 34 | run: | 35 | pip debug --verbose . 36 | pip install ".[test]" 37 | pytest --cov=FDApy/ --cov-report=xml 38 | -------------------------------------------------------------------------------- /.github/workflows/python_publish.yaml: -------------------------------------------------------------------------------- 1 | # This workflow uses actions that are not certified by GitHub. 2 | # They are provided by a third-party and are governed by 3 | # separate terms of service, privacy policy, and support 4 | # documentation. 5 | 6 | # GitHub recommends pinning actions to a commit SHA. 7 | # To get a newer version, you will need to update the SHA. 8 | # You can also reference a tag or branch, but the action may change without warning. 9 | 10 | name: Upload Python Package 11 | 12 | on: 13 | release: 14 | types: [published] 15 | 16 | jobs: 17 | deploy: 18 | 19 | runs-on: ubuntu-latest 20 | 21 | steps: 22 | - uses: actions/checkout@v4 23 | 24 | - name: Set up Python 25 | uses: actions/setup-python@v5 26 | with: 27 | python-version: '3.x' 28 | 29 | - name: Install dependencies 30 | run: | 31 | python -m pip install --upgrade pip 32 | pip install build 33 | 34 | - name: Build package 35 | run: python -m build 36 | 37 | - name: Publish package 38 | uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 39 | with: 40 | user: __token__ 41 | password: ${{ secrets.PYPI_API_TOKEN }} 42 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # .gitignore 2 | 3 | # Sublime text project 4 | FDApy.sublime-project 5 | FDApy.sublime-workspace 6 | 7 | # Visual code project 8 | FDApy.code-workspace 9 | .vscode 10 | 11 | # Compiled python modules 12 | *.pyc 13 | 14 | # Setuptools distribution folder 15 | /dist/ 16 | /build/ 17 | 18 | # Python egg metadata, regenerated from source files by setuptools. 19 | /*.egg-info 20 | 21 | # Sublime text files 22 | *.sublime-project 23 | *.sublime-workspace 24 | 25 | # Jupyter files 26 | .ipynb_checkpoints/* 27 | examples/.ipynb_checkpoints/* 28 | FDApy/.ipynb_checkpoints/* 29 | notebooks/* 30 | 31 | # Some data files 32 | *.rda 33 | *.rds 34 | 35 | # Tox 36 | .tox/* 37 | 38 | # Egg files 39 | .egg/* 40 | .eggs/* 41 | 42 | # Build docs files 43 | docs/_build/* 44 | docs/_static/* 45 | docs/auto_examples/* 46 | docs/backreferences/* 47 | docs/modules/autosummary/* 48 | 49 | # Test files 50 | .junit/* 51 | .pytest_cache/* 52 | tests/__pycache__/* 53 | 54 | # Coverage files 55 | htmlcov/* 56 | .coverage 57 | 58 | # Mac files 59 | .DS_Store 60 | -------------------------------------------------------------------------------- /.prospector.yml: -------------------------------------------------------------------------------- 1 | strictness: veryhigh 2 | max-line-length: 88 3 | doc-warnings: yes 4 | 5 | ignore-paths: 6 | - .github 7 | - build 8 | - docs 9 | - examples 10 | - tests 11 | 12 | requirements: 13 | - requirements.txt 14 | 15 | pylint: 16 | run: false 17 | 18 | pyflakes: 19 | run: false 20 | 21 | mccabe: 22 | run: false 23 | 24 | pep8: 25 | run: true 26 | disable: 27 | - E203 28 | 29 | pep257: 30 | run: true 31 | 32 | pydocstyle: 33 | run: true 34 | convention: numpy 35 | disable: 36 | # Whitespace issues 37 | - D203 38 | - D205 39 | - D212 40 | - D213 41 | # Docstring content issues 42 | - D400 43 | - D415 -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # .readthedocs.yaml 2 | # Read the Docs configuration file 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 4 | 5 | # Required 6 | version: 2 7 | 8 | # Set the version of Python and other tools you might need 9 | build: 10 | os: ubuntu-22.04 11 | tools: 12 | python: "3.10" 13 | # You can also specify other tool versions: 14 | # nodejs: "16" 15 | # rust: "1.55" 16 | # golang: "1.17" 17 | apt_packages: 18 | - cmake 19 | - gcc 20 | 21 | # Build documentation in the docs/ directory with Sphinx 22 | sphinx: 23 | builder: html 24 | configuration: docs/conf.py 25 | 26 | # If using Sphinx, optionally build your docs in additional formats such as PDF 27 | # formats: 28 | # - pdf 29 | 30 | # Optionally declare the Python requirements required to build your docs 31 | python: 32 | install: 33 | - method: pip 34 | path: . 35 | extra_requirements: 36 | - docs 37 | -------------------------------------------------------------------------------- /CHANGELOG.rst: -------------------------------------------------------------------------------- 1 | ========= 2 | Changelog 3 | ========= 4 | 5 | 6 | Version 1.0.3 (2025-02-25) 7 | ========================== 8 | 9 | - JOSS paper 10 | 11 | Version 1.0.2 (2024-09-24) 12 | ========================== 13 | 14 | - Add P-splines smoothing method. 15 | - Add basis representation for functional data. 16 | 17 | Version 1.0.0 (2023-12-29) 18 | ========================== 19 | 20 | - Initial release. 21 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: 1.2.0 2 | message: "If you use this software, please cite it as below." 3 | authors: 4 | - family-names: "Golovkine" 5 | given-names: "Steven" 6 | orcid: "https://orcid.org/0000-0002-5994-2671" 7 | affiliation: "University of Limerick" 8 | email: steven_golovkine@icloud.com 9 | title: "StevenGolovkine/FDApy: a Python package for functional data" 10 | doi: 10.5281/zenodo.3891521 11 | date-released: 2025-02-28 12 | url: "https://github.com/StevenGolovkine/FDApy" 13 | license: MIT 14 | keywords: 15 | - functional data analysis 16 | - Python 17 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | We as members, contributors, and leaders pledge to make participation in our 6 | community a harassment-free experience for everyone, regardless of age, body 7 | size, visible or invisible disability, ethnicity, sex characteristics, gender 8 | identity and expression, level of experience, education, socio-economic status, 9 | nationality, personal appearance, race, religion, or sexual identity 10 | and orientation. 11 | 12 | We pledge to act and interact in ways that contribute to an open, welcoming, 13 | diverse, inclusive, and healthy community. 14 | 15 | ## Our Standards 16 | 17 | Examples of behavior that contributes to a positive environment for our 18 | community include: 19 | 20 | * Demonstrating empathy and kindness toward other people 21 | * Being respectful of differing opinions, viewpoints, and experiences 22 | * Giving and gracefully accepting constructive feedback 23 | * Accepting responsibility and apologizing to those affected by our mistakes, 24 | and learning from the experience 25 | * Focusing on what is best not just for us as individuals, but for the 26 | overall community 27 | 28 | Examples of unacceptable behavior include: 29 | 30 | * The use of sexualized language or imagery, and sexual attention or 31 | advances of any kind 32 | * Trolling, insulting or derogatory comments, and personal or political attacks 33 | * Public or private harassment 34 | * Publishing others' private information, such as a physical or email 35 | address, without their explicit permission 36 | * Other conduct which could reasonably be considered inappropriate in a 37 | professional setting 38 | 39 | ## Enforcement Responsibilities 40 | 41 | Community leaders are responsible for clarifying and enforcing our standards of 42 | acceptable behavior and will take appropriate and fair corrective action in 43 | response to any behavior that they deem inappropriate, threatening, offensive, 44 | or harmful. 45 | 46 | Community leaders have the right and responsibility to remove, edit, or reject 47 | comments, commits, code, wiki edits, issues, and other contributions that are 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation 49 | decisions when appropriate. 50 | 51 | ## Scope 52 | 53 | This Code of Conduct applies within all community spaces, and also applies when 54 | an individual is officially representing the community in public spaces. 55 | Examples of representing our community include using an official e-mail address, 56 | posting via an official social media account, or acting as an appointed 57 | representative at an online or offline event. 58 | 59 | ## Enforcement 60 | 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 62 | reported to the community leaders responsible for enforcement at 63 | steven_golovkine@icloud.com. 64 | All complaints will be reviewed and investigated promptly and fairly. 65 | 66 | All community leaders are obligated to respect the privacy and security of the 67 | reporter of any incident. 68 | 69 | ## Enforcement Guidelines 70 | 71 | Community leaders will follow these Community Impact Guidelines in determining 72 | the consequences for any action they deem in violation of this Code of Conduct: 73 | 74 | ### 1. Correction 75 | 76 | **Community Impact**: Use of inappropriate language or other behavior deemed 77 | unprofessional or unwelcome in the community. 78 | 79 | **Consequence**: A private, written warning from community leaders, providing 80 | clarity around the nature of the violation and an explanation of why the 81 | behavior was inappropriate. A public apology may be requested. 82 | 83 | ### 2. Warning 84 | 85 | **Community Impact**: A violation through a single incident or series 86 | of actions. 87 | 88 | **Consequence**: A warning with consequences for continued behavior. No 89 | interaction with the people involved, including unsolicited interaction with 90 | those enforcing the Code of Conduct, for a specified period of time. This 91 | includes avoiding interactions in community spaces as well as external channels 92 | like social media. Violating these terms may lead to a temporary or 93 | permanent ban. 94 | 95 | ### 3. Temporary Ban 96 | 97 | **Community Impact**: A serious violation of community standards, including 98 | sustained inappropriate behavior. 99 | 100 | **Consequence**: A temporary ban from any sort of interaction or public 101 | communication with the community for a specified period of time. No public or 102 | private interaction with the people involved, including unsolicited interaction 103 | with those enforcing the Code of Conduct, is allowed during this period. 104 | Violating these terms may lead to a permanent ban. 105 | 106 | ### 4. Permanent Ban 107 | 108 | **Community Impact**: Demonstrating a pattern of violation of community 109 | standards, including sustained inappropriate behavior, harassment of an 110 | individual, or aggression toward or disparagement of classes of individuals. 111 | 112 | **Consequence**: A permanent ban from any sort of public interaction within 113 | the community. 114 | 115 | ## Attribution 116 | 117 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], 118 | version 2.0, available at 119 | https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. 120 | 121 | Community Impact Guidelines were inspired by [Mozilla's code of conduct 122 | enforcement ladder](https://github.com/mozilla/diversity). 123 | 124 | [homepage]: https://www.contributor-covenant.org 125 | 126 | For answers to common questions about this code of conduct, see the FAQ at 127 | https://www.contributor-covenant.org/faq. Translations are available at 128 | https://www.contributor-covenant.org/translations. 129 | -------------------------------------------------------------------------------- /CONTRIBUTING.rst: -------------------------------------------------------------------------------- 1 | .. highlight:: shell 2 | 3 | ============ 4 | Contributing 5 | ============ 6 | 7 | Contributions are welcome, and they are greatly appreciated! Every little bit 8 | helps, and credit will always be given. 9 | 10 | You can contribute in many ways: 11 | 12 | Types of Contributions 13 | ---------------------- 14 | 15 | Report Bugs 16 | ~~~~~~~~~~~ 17 | 18 | Report bugs at https://github.com/StevenGolovkine/FDApy/issues. 19 | 20 | If you are reporting a bug, please include: 21 | 22 | * Your operating system name and version. 23 | * Any details about your local setup that might be helpful in troubleshooting. 24 | * Detailed steps to reproduce the bug. 25 | 26 | Fix Bugs 27 | ~~~~~~~~ 28 | 29 | Look through the GitHub issues for bugs. Anything tagged with "bug" and "help 30 | wanted" is open to whoever wants to implement it. 31 | 32 | Implement Features 33 | ~~~~~~~~~~~~~~~~~~ 34 | 35 | Look through the GitHub issues for features. Anything tagged with "enhancement" and "help wanted" is open to whoever wants to implement it. 36 | 37 | Write Documentation 38 | ~~~~~~~~~~~~~~~~~~~ 39 | 40 | *FDApy* could always use more documentation, whether as part of the official *FDApy* docs, in docstrings, or even on the web in blog posts, 41 | articles, and such. 42 | 43 | Submit Feedback 44 | ~~~~~~~~~~~~~~~ 45 | 46 | The best way to send feedback is to file an issue at https://github.com/StevenGolovkine/FDApy/issues. 47 | 48 | If you are proposing a feature: 49 | 50 | * Explain in detail how it would work. 51 | * Keep the scope as narrow as possible, to make it easier to implement. 52 | * Remember that this is a volunteer-driven project, and that contributions 53 | are welcome :) 54 | 55 | Get Started! 56 | ------------ 57 | 58 | Ready to contribute? Here's how to set up `FDApy` for local development. 59 | 60 | 1. Fork the `FDApy` repo on GitHub. 61 | 2. Clone your fork locally:: 62 | 63 | git clone https://github.com/your_name_here/FDApy 64 | 65 | 3. Install your local copy into a virtualenv. Assuming you have virtualenvwrapper installed, this is how you set up your fork for local development:: 66 | 67 | mkvirtualenv FDApy 68 | cd FDApy/ 69 | pip install -e . 70 | 71 | 4. Create a branch for local development:: 72 | 73 | git checkout -b name-of-your-bugfix-or-feature 74 | 75 | Now you can make your changes locally. 76 | 77 | 5. When you're done making changes, check that your changes pass flake8 and the tests:: 78 | 79 | flake8 FDApy tests 80 | pytest 81 | 82 | To get flake8, just pip install it into your virtualenv. 83 | 84 | 6. Commit your changes and push your branch to GitHub:: 85 | 86 | git add . 87 | git commit -m "Your detailed description of your changes." 88 | git push origin name-of-your-bugfix-or-feature 89 | 90 | 7. Submit a pull request through the GitHub website. 91 | 92 | Pull Request Guidelines 93 | ----------------------- 94 | 95 | Before you submit a pull request, check that it meets these guidelines: 96 | 97 | 1. The pull request should include tests. 98 | 2. If the pull request adds functionality, the docs should be updated. Put 99 | your new functionality into a function with a docstring, and add the 100 | feature to the list in docs/index.rst. 101 | 3. The pull request should work for Python 3.10 and above. 102 | 103 | Git rebase 104 | ---------- 105 | 106 | The `git rebase` command should be use with caution. It is fine to use it for your own branches, but it should not be used for branches that are shared with others. Once a branch has been pushed to the repository, it should not be rebased. 107 | 108 | Tips 109 | ---- 110 | 111 | To run a subset of tests:: 112 | 113 | 114 | python -m unittest tests.test_basis 115 | -------------------------------------------------------------------------------- /CONTRIBUTORS.md: -------------------------------------------------------------------------------- 1 | ## Contributors 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 |
Steven
Steven

💻
Edward Gunning
Edward Gunning

🔬
Marcel Stimberg
Marcel Stimberg

👀
Quant Girl
Quant Girl

👀
Carlos Ramos Carreño
Carlos Ramos Carreño

👀
18 | 19 | 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /FDApy/__init__.py: -------------------------------------------------------------------------------- 1 | """FDApy package.""" 2 | 3 | import lazy_loader as lazy 4 | 5 | __getattr__, __dir__, __all__ = lazy.attach( 6 | __name__, 7 | submodules=[ 8 | "clustering", 9 | "misc", 10 | "preprocessing", 11 | "regression", 12 | "representation", 13 | "simulation", 14 | "visualization", 15 | ], 16 | submod_attrs={ 17 | "representation": [ 18 | "DenseFunctionalData", 19 | "IrregularFunctionalData", 20 | "MultivariateFunctionalData", 21 | ], 22 | "misc": ["read_csv"], 23 | }, 24 | ) 25 | 26 | __version__ = "1.0.3" 27 | -------------------------------------------------------------------------------- /FDApy/misc/__init__.py: -------------------------------------------------------------------------------- 1 | """Miscellaneous module.""" 2 | 3 | import lazy_loader as lazy 4 | 5 | __getattr__, __dir__, __all__ = lazy.attach( 6 | __name__, 7 | submodules=["loader", "utils"], 8 | submod_attrs={"loader": ["read_csv"]}, 9 | ) 10 | -------------------------------------------------------------------------------- /FDApy/misc/loader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*-coding:utf8 -* 3 | 4 | """ 5 | Loaders 6 | ------- 7 | 8 | """ 9 | import numpy as np 10 | import numpy.typing as npt 11 | import pandas as pd 12 | 13 | from ..representation.argvals import DenseArgvals, IrregularArgvals 14 | from ..representation.values import DenseValues, IrregularValues 15 | from ..representation.functional_data import ( 16 | DenseFunctionalData, 17 | IrregularFunctionalData, 18 | ) 19 | 20 | 21 | ############################################################################### 22 | # Loader for csv 23 | def read_csv(filepath: str, **kwargs) -> DenseFunctionalData | IrregularFunctionalData: 24 | """Load CSV file into functional data object. 25 | 26 | Build a DenseFunctionalData or IrregularFunctionalData object upon a CSV 27 | file passed as parameter. If the CSV file does not contain any `NA` values, the 28 | data will be loaded as a DenseFunctionalData object. Otherwise, it will be loaded 29 | as an IrregularFunctionalData object. See the `Canadian Weather example 30 | <../../auto_examples/data_analysis/plot_canadian_weather.html>`_ and 31 | `CD4 example <../../auto_examples/data_analysis/plot_cd4.html>`_ for the formating 32 | of the CSV file. 33 | 34 | 35 | Notes 36 | ----- 37 | We assumed that the data are unidimensional without check. 38 | 39 | Parameters 40 | ---------- 41 | filepath 42 | Any valid string path is acceptable. 43 | kwargs 44 | Keywords arguments to passed to the pd.read_csv function. 45 | 46 | Returns 47 | ------- 48 | DenseFunctionalData | IrregularFunctionalData 49 | The loaded CSV file. 50 | 51 | """ 52 | data = pd.read_csv(filepath, **kwargs) 53 | 54 | try: 55 | all_argvals = data.columns.astype(np.int64).to_numpy() 56 | except ValueError: 57 | all_argvals = np.arange(0, len(data.columns)) 58 | 59 | if not data.isna().values.any(): 60 | return _read_csv_dense(data, all_argvals) 61 | else: 62 | return _read_csv_irregular(data, all_argvals) 63 | 64 | 65 | def _read_csv_dense( 66 | data: pd.DataFrame, argvals: npt.NDArray[np.float64] 67 | ) -> DenseFunctionalData: 68 | """Load a csv file into a DenseFunctionalData object. 69 | 70 | Parameters 71 | ---------- 72 | data 73 | Input dataframe. 74 | argvals 75 | An array of argvals. 76 | 77 | Returns 78 | ------- 79 | DenseFunctionalData 80 | The loaded csv file 81 | 82 | """ 83 | argvals_ = DenseArgvals({"input_dim_0": argvals}) 84 | values = DenseValues(np.array(data)) 85 | return DenseFunctionalData(argvals_, values) 86 | 87 | 88 | def _read_csv_irregular( 89 | data: pd.DataFrame, argvals: npt.NDArray[np.float64] 90 | ) -> IrregularFunctionalData: 91 | """Load a csv file into an IrregularFunctionalData object. 92 | 93 | Parameters 94 | ---------- 95 | data 96 | Input dataframe. 97 | argvals 98 | An array of argvals. 99 | 100 | Returns 101 | ------- 102 | IrregularFunctionalData 103 | The loaded csv file. 104 | 105 | """ 106 | argvals_ = { 107 | idx: DenseArgvals({"input_dim_0": argvals[~np.isnan(row)]}) 108 | for idx, row in enumerate(data.values) 109 | } 110 | values = {idx: row[~np.isnan(row)] for idx, row in enumerate(data.values)} 111 | return IrregularFunctionalData(IrregularArgvals(argvals_), IrregularValues(values)) 112 | -------------------------------------------------------------------------------- /FDApy/preprocessing/__init__.py: -------------------------------------------------------------------------------- 1 | """Preprocessing module.""" 2 | 3 | import lazy_loader as lazy 4 | 5 | __getattr__, __dir__, __all__ = lazy.attach( 6 | __name__, 7 | submodules=["dim_reduction", "smoothing"], 8 | submod_attrs={ 9 | "dim_reduction": [ 10 | "FCPTPA", 11 | "UFPCA", 12 | "MFPCA", 13 | ], 14 | "smoothing": ["LocalPolynomial", "PSplines"], 15 | }, 16 | ) 17 | -------------------------------------------------------------------------------- /FDApy/preprocessing/dim_reduction/__init__.py: -------------------------------------------------------------------------------- 1 | """Dimension reduction module.""" 2 | 3 | import lazy_loader as lazy 4 | 5 | __getattr__, __dir__, __all__ = lazy.attach( 6 | __name__, 7 | submodules=["fcp_tpa", "ufpca", "mfpca"], 8 | submod_attrs={"fcp_tpa": ["FCPTPA"], "ufpca": ["UFPCA"], "mfpca": ["MFPCA"]}, 9 | ) 10 | -------------------------------------------------------------------------------- /FDApy/preprocessing/smoothing/__init__.py: -------------------------------------------------------------------------------- 1 | """Smoothing module.""" 2 | 3 | import lazy_loader as lazy 4 | 5 | __getattr__, __dir__, __all__ = lazy.attach( 6 | __name__, 7 | submodules=["local_polynomial", "psplines"], 8 | submod_attrs={"local_polynomial": ["LocalPolynomial"], "psplines": ["PSplines"]}, 9 | ) 10 | -------------------------------------------------------------------------------- /FDApy/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StevenGolovkine/FDApy/6626ebc6a70a6496f3058c4abb0bb97f5bdb20be/FDApy/py.typed -------------------------------------------------------------------------------- /FDApy/representation/__init__.py: -------------------------------------------------------------------------------- 1 | """Representation module.""" 2 | 3 | import lazy_loader as lazy 4 | 5 | __getattr__, __dir__, __all__ = lazy.attach( 6 | __name__, 7 | submodules=["argvals", "basis", "functional_data", "values"], 8 | submod_attrs={ 9 | "functional_data": [ 10 | "FunctionalData", 11 | "GridFunctionalData", 12 | "BasisFunctionalData", 13 | "DenseFunctionalData", 14 | "IrregularFunctionalData", 15 | "MultivariateFunctionalData", 16 | "DenseFunctionalDataIterator", 17 | "IrregularFunctionalDataIterator", 18 | "BasisFunctionalDataIterator", 19 | ], 20 | "argvals": ["Argvals", "DenseArgvals", "IrregularArgvals"], 21 | "values": ["Values", "DenseValues", "IrregularValues"], 22 | "basis": ["Basis", "MultivariateBasis"], 23 | }, 24 | ) 25 | -------------------------------------------------------------------------------- /FDApy/simulation/__init__.py: -------------------------------------------------------------------------------- 1 | """Simulation module.""" 2 | 3 | import lazy_loader as lazy 4 | 5 | __getattr__, __dir__, __all__ = lazy.attach( 6 | __name__, 7 | submodules=["brownian", "datasets", "karhunen", "simulation"], 8 | submod_attrs={ 9 | "simulation": ["Simulation"], 10 | "brownian": ["Brownian"], 11 | "datasets": ["Datasets"], 12 | "karhunen": ["KarhunenLoeve"], 13 | }, 14 | ) 15 | -------------------------------------------------------------------------------- /FDApy/simulation/datasets.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*-coding:utf8 -* 3 | 4 | """ 5 | Datasets 6 | -------- 7 | 8 | """ 9 | import numpy as np 10 | import numpy.typing as npt 11 | 12 | from typing import Callable 13 | 14 | from ..representation.argvals import DenseArgvals 15 | from ..representation.values import DenseValues 16 | from ..representation.functional_data import DenseFunctionalData 17 | from .simulation import Simulation 18 | 19 | 20 | ############################################################################# 21 | # Definition of the simulation settings 22 | 23 | 24 | def _zhang_chen( 25 | n_obs: int, argvals: npt.NDArray[np.float64], rnorm: Callable = np.random.normal 26 | ) -> npt.NDArray[np.float64]: 27 | """Define a simulation from Zhang and Chen (2007). 28 | 29 | This function reproduces simulation in [1]_. 30 | 31 | References 32 | ---------- 33 | .. [1] Zhang, J.-T. and Chen J. (2007), Statistical Inferences for 34 | Functional Data, The Annals of Statistics, Vol. 35, No. 3. 35 | 36 | """ 37 | cos = np.cos(2 * np.pi * argvals) 38 | sin = np.sin(2 * np.pi * argvals) 39 | 40 | mu = 1.2 + 2.3 * cos + 4.2 * sin 41 | 42 | results = np.zeros((n_obs, len(argvals))) 43 | for idx in np.arange(n_obs): 44 | coefs = rnorm(0, (1, np.sqrt(2), np.sqrt(3))) 45 | vi = coefs[0] + coefs[1] * cos + coefs[2] * sin 46 | eps = rnorm(0, np.sqrt(0.1 * (1 + argvals))) 47 | results[idx, :] = mu + vi + eps 48 | return results 49 | 50 | 51 | ############################################################################# 52 | # Definition of the Datasets simulation 53 | 54 | 55 | class Datasets(Simulation): 56 | r"""Simulate published paper datasets. 57 | 58 | Parameters 59 | ---------- 60 | basis_name: str 61 | Name of the datasets to simulate. 62 | 63 | Attributes 64 | ---------- 65 | data 66 | An object that represents the simulated data. 67 | noisy_data 68 | An object that represents a noisy version of the simulated data. 69 | sparse_data 70 | An object that represents a sparse version of the simulated data. 71 | 72 | """ 73 | 74 | def __init__(self, basis_name: str, random_state: int | None = None) -> None: 75 | """Initialize Datasets object.""" 76 | super().__init__(basis_name, random_state) 77 | 78 | def new( 79 | self, 80 | n_obs: int, 81 | n_clusters: int = 1, 82 | argvals: npt.NDArray[np.float64] | None = None, 83 | **kwargs, 84 | ) -> None: 85 | """Simulate realizations of the Datasets. 86 | 87 | This function generates ``n_obs`` realizations of the Datasets object. 88 | 89 | Parameters 90 | ---------- 91 | n_obs 92 | Number of observations to simulate. 93 | n_clusters 94 | Not used in this context. 95 | argvals 96 | Not used in this context. We will use the ``argvals`` from the 97 | :mod:`Basis` object as ``argvals`` of the simulation. Here to be 98 | compliant with the class :mod:`Simulation`. 99 | 100 | Returns 101 | ------- 102 | None 103 | Create the class attributes `data`. 104 | 105 | """ 106 | if self.basis_name == "zhang_chen": 107 | self.data = DenseFunctionalData( 108 | argvals=DenseArgvals({"input_dim_0": argvals}), 109 | values=DenseValues(_zhang_chen(n_obs=n_obs, argvals=argvals)), 110 | ) 111 | else: 112 | raise NotImplementedError 113 | -------------------------------------------------------------------------------- /FDApy/visualization/__init__.py: -------------------------------------------------------------------------------- 1 | """Visualization module.""" 2 | 3 | import lazy_loader as lazy 4 | 5 | __getattr__, __dir__, __all__ = lazy.attach( 6 | __name__, 7 | submodules=["_plot"], 8 | submod_attrs={"_plot": ["plot", "plot_multivariate"]}, 9 | ) 10 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Steven Golovkine 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.rst -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | 2 | =================================================== 3 | FDApy: a Python package to analyze functional data 4 | =================================================== 5 | 6 | .. image:: https://img.shields.io/pypi/pyversions/FDApy 7 | :target: https://pypi.org/project/FDApy/ 8 | :alt: PyPI - Python Version 9 | 10 | .. image:: https://img.shields.io/pypi/v/FDApy 11 | :target: https://pypi.org/project/FDApy/ 12 | :alt: PyPI 13 | 14 | .. image:: https://github.com/StevenGolovkine/FDApy/actions/workflows/python_package_ubuntu.yaml/badge.svg 15 | :target: https://github.com/StevenGolovkine/FDApy/actions 16 | :alt: Github - Workflow 17 | 18 | .. image:: https://img.shields.io/badge/License-MIT-blue.svg 19 | :target: https://raw.githubusercontent.com/StevenGolovkine/FDApy/master/LICENSE 20 | :alt: PyPI - License 21 | 22 | .. image:: https://codecov.io/gh/StevenGolovkine/FDApy/branch/master/graph/badge.svg?token=S2H0D3QQMR 23 | :target: https://codecov.io/gh/StevenGolovkine/FDApy 24 | :alt: Coverage 25 | 26 | .. image:: https://app.codacy.com/project/badge/Grade/3d9062cffc304ad4bb7c76bf97cc965c 27 | :target: https://app.codacy.com/gh/StevenGolovkine/FDApy/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade 28 | :alt: Code Quality 29 | 30 | .. image:: https://readthedocs.org/projects/fdapy/badge/?version=latest 31 | :target: https://fdapy.readthedocs.io/en/latest/?badge=latest 32 | :alt: Documentation Status 33 | 34 | .. image:: https://joss.theoj.org/papers/10.21105/joss.07526/status.svg 35 | :target: https://doi.org/10.21105/joss.07526 36 | :alt: JOSS 37 | 38 | .. image:: https://zenodo.org/badge/155183454.svg 39 | :target: https://zenodo.org/badge/latestdoi/155183454 40 | :alt: DOI 41 | 42 | .. image:: https://img.shields.io/github/all-contributors/StevenGolovkine/FDApy?color=ee8449&style=flat-square 43 | :target: https://github.com/StevenGolovkine/FDApy/blob/master/CONTRIBUTORS.md 44 | :alt: Contributors 45 | 46 | 47 | Description 48 | =========== 49 | 50 | Functional data analysis (FDA) is a statistical methodology for analyzing data that can be characterized as functions. These functions could represent measurements taken over time, space, frequency, probability, etc. The goal of FDA is to extract meaningful information from these functions and to model their behavior. 51 | 52 | The package aims to provide functionalities for creating and manipulating general functional data objects. It thus supports the analysis of various types of functional data, whether densely or irregularly sampled, multivariate, or multidimensional. Functional data can be represented over a grid of points or using a basis of functions. *FDApy* implements dimension reduction techniques and smoothing methods, facilitating the extraction of patterns from complex functional datasets. A large simulation toolbox, based on basis decomposition, is provided. It allows to configure parameters for simulating different clusters within the data. Finally, some visualization tools are also available. 53 | 54 | Check out the `examples `_ for an overview of the package functionalities. 55 | 56 | Check out the `API reference `_ for an exhaustive list of the available features within the package. 57 | 58 | 59 | Documentation 60 | ============= 61 | 62 | The documentation is available `here `__, which included detailled information about API references and several examples presenting the different functionalities. 63 | 64 | 65 | Installation 66 | ============ 67 | 68 | Up to now, *FDApy* is availlable in Python 3.10 on any Linux platforms. The stable version can be installed via `PyPI `_: 69 | 70 | .. code:: 71 | 72 | pip install FDApy 73 | 74 | Installation from source 75 | ------------------------ 76 | 77 | It is possible to install the latest version of the package by cloning this repository and doing the manual installation. 78 | 79 | .. code:: bash 80 | 81 | git clone https://github.com/StevenGolovkine/FDApy.git 82 | pip install ./FDApy 83 | 84 | Requirements 85 | ------------ 86 | 87 | *FDApy* depends on the following packages: 88 | 89 | * `lazy_loader `_ - A loader for Python submodules 90 | * `matplotlib `_ - Plotting with Python 91 | * `numpy `_ (< 2.0.0) - The fundamental package for scientific computing with Python 92 | * `pandas `_ (>= 2.0.0)- Powerful Python data analysis toolkit 93 | * `scikit-learn `_ (>= 1.2.0)- Machine learning in Python 94 | * `scipy `_ (>= 1.10.0) - Scientific computation in Python 95 | 96 | 97 | Citing FDApy 98 | ============ 99 | 100 | If you use FDApy in a scientific publication, we would appreciate citations to the following software repository: 101 | 102 | .. code-block:: 103 | 104 | @misc{golovkine_2024_fdapy, 105 | author = {Golovkine, Steven}, 106 | doi = {10.5281/zenodo.3891521}, 107 | title = {FDApy: A Python Package to analyze functional data}, 108 | url = {https://github.com/StevenGolovkine/FDApy}, 109 | year = {2024} 110 | } 111 | 112 | You may also cite the paper: 113 | 114 | .. code-block:: 115 | 116 | @article{golovkine_2024_fdapy_paper, 117 | title = {{{FDApy}}: A {{Python}} Package for Functional Data}, 118 | author = {Golovkine, Steven}, 119 | date = {2025-03-04}, 120 | journaltitle = {Journal of Open Source Software}, 121 | volume = {10}, 122 | number = {107}, 123 | pages = {7526}, 124 | issn = {2475-9066}, 125 | doi = {10.21105/joss.07526}, 126 | url = {https://joss.theoj.org/papers/10.21105/joss.07526} 127 | } 128 | 129 | 130 | Contributing 131 | ============ 132 | 133 | Contributions are welcome, and they are greatly appreciated! Every little bit 134 | helps, and credit will always be given. Contributing guidelines are provided `here `_. The people involved in the development of the package can be found in the `contributors page `_. 135 | 136 | License 137 | ======= 138 | 139 | The package is licensed under the MIT License. A copy of the `license `_ can be found along with the code. 140 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/_templates/autosummary/base.rst: -------------------------------------------------------------------------------- 1 | {{ objname | escape | underline}} 2 | 3 | .. currentmodule:: {{ module }} 4 | 5 | .. auto{{ objtype }}:: {{ objname }} 6 | 7 | .. include:: {{package}}/backreferences/{{fullname}}.examples 8 | -------------------------------------------------------------------------------- /docs/_templates/autosummary/class.rst: -------------------------------------------------------------------------------- 1 | {{ objname | escape | underline}} 2 | 3 | .. currentmodule:: {{ module }} 4 | 5 | .. autoclass:: {{ objname }} 6 | 7 | {% block methods %} 8 | {% if methods %} 9 | .. rubric:: Methods 10 | 11 | .. autosummary:: 12 | {% for item in methods %} 13 | {% if item != "__init__" %} 14 | ~{{ name }}.{{ item }} 15 | {% endif %} 16 | {%- endfor %} 17 | {% endif %} 18 | 19 | {% for item in methods %} 20 | {% if item != "__init__" %} 21 | .. automethod:: {{ item }} 22 | {% endif %} 23 | {%- endfor %} 24 | {% endblock %} 25 | 26 | .. include:: {{package}}/backreferences/{{fullname}}.examples -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. FDApy documentation master file, created by 2 | sphinx-quickstart on Tue Jun 9 11:47:19 2020. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | FDApy: a Python package to analyze functional data 7 | ================================================== 8 | 9 | .. toctree:: 10 | :maxdepth: 1 11 | :titlesonly: 12 | :hidden: 13 | 14 | auto_examples/index 15 | 16 | 17 | .. toctree:: 18 | :maxdepth: 2 19 | :titlesonly: 20 | :hidden: 21 | :caption: More documentation 22 | 23 | modules 24 | 25 | 26 | Description 27 | =========== 28 | 29 | Functional data analysis (FDA) is a statistical methodology for analyzing data that can be characterized as functions. These functions could represent measurements taken over time, space, frequency, probability, etc. The goal of FDA is to extract meaningful information from these functions and to model their behavior. 30 | 31 | The package aims to provide functionalities for creating and manipulating general functional data objects. It thus supports the analysis of various types of functional data, whether densely or irregularly sampled, multivariate, or multidimensional. Functional data can be represented over a grid of points or using a basis of functions. `FDApy` implements dimension reduction techniques and smoothing methods, facilitating the extraction of patterns from complex functional datasets. A large simulation toolbox, based on basis decomposition, is provided. It allows to configure parameters for simulating different clusters within the data. Finally, some visualization tools are also available. 32 | 33 | Check out the `examples `_ for an overview of the package functionalities. 34 | 35 | Check out the `API reference `_ for an exhaustive list of the available features within the package. 36 | 37 | Installation 38 | ============ 39 | 40 | Up to now, *FDApy* is availlable in Python 3.10 on any Linux platforms. The stable version can be installed via `PyPI `_: 41 | 42 | .. code:: 43 | 44 | pip install FDApy 45 | 46 | Installation from source 47 | ------------------------ 48 | 49 | It is possible to install the latest version of the package by cloning this repository and doing the manual installation. 50 | 51 | .. code:: bash 52 | 53 | git clone https://github.com/StevenGolovkine/FDApy.git 54 | pip install ./FDApy 55 | 56 | Requirements 57 | ------------ 58 | 59 | *FDApy* depends on the following packages: 60 | 61 | * `lazy_loader `_ - A loader for Python submodules 62 | * `matplotlib `_ - Plotting with Python 63 | * `numpy `_ (< 2.0.0) - The fundamental package for scientific computing with Python 64 | * `pandas `_ (>= 2.0.0)- Powerful Python data analysis toolkit 65 | * `scikit-learn `_ (>= 1.2.0)- Machine learning in Python 66 | * `scipy `_ (>= 1.10.0) - Scientific computation in Python 67 | 68 | Contributing 69 | ============ 70 | 71 | Contributions are welcome, and they are greatly appreciated! Every little bit 72 | helps, and credit will always be given. Contributing guidelines are provided `here `_. The people involved in the development of the package can be found in the `contributors page `_. 73 | 74 | License 75 | ======= 76 | 77 | The package is licensed under the MIT License. A copy of the `license `_ can be found along with the code. 78 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/modules.rst: -------------------------------------------------------------------------------- 1 | API References 2 | ============== 3 | 4 | .. toctree:: 5 | :includehidden: 6 | :maxdepth: 2 7 | :caption: Modules 8 | :titlesonly: 9 | 10 | modules/representation 11 | modules/preprocessing 12 | modules/simulation 13 | modules/misc 14 | -------------------------------------------------------------------------------- /docs/modules/misc.rst: -------------------------------------------------------------------------------- 1 | ============= 2 | Miscellaneous 3 | ============= 4 | 5 | The package provides several functionalities to facilitate the use of functional data analysis. 6 | 7 | 8 | Visualization 9 | ============= 10 | 11 | Basic plots are provided to visualize univariate and multivariate functional data. 12 | 13 | .. autosummary:: 14 | :toctree: autosummary 15 | 16 | FDApy.visualization.plot 17 | FDApy.visualization.plot_multivariate 18 | 19 | 20 | Loader 21 | ====== 22 | 23 | The package provides a loader to read CSV files into functional data objects. 24 | 25 | .. autosummary:: 26 | :toctree: autosummary 27 | 28 | FDApy.misc.read_csv 29 | -------------------------------------------------------------------------------- /docs/modules/preprocessing.rst: -------------------------------------------------------------------------------- 1 | ============= 2 | Preprocessing 3 | ============= 4 | 5 | Before the analysis, functional data need to be preprocessed. The package provides several classes to preprocess functional data, including smoothing and dimension reduction. 6 | 7 | Smoothing 8 | ========= 9 | 10 | Observations of functional data are often noisy. Smoothing methods are used to remove the noise and extract the underlying patterns. The package provides two classes to smooth functional data: :class:`LocalPolynomial` and :class:`PSplines`. 11 | 12 | .. autosummary:: 13 | :toctree: autosummary 14 | 15 | FDApy.preprocessing.LocalPolynomial 16 | FDApy.preprocessing.PSplines 17 | 18 | 19 | Dimension reduction 20 | =================== 21 | 22 | Due to the infinite-dimensional nature of functional data, dimension reduction techniques are important tools in functional data analysis. They are used to extract the most relevant information. The package provides three classes to reduce the dimension of functional data: :class:`UFPCA`, :class:`MFPCA`, and :class:`FCPTPA`. 23 | 24 | .. autosummary:: 25 | :toctree: autosummary 26 | 27 | FDApy.preprocessing.UFPCA 28 | FDApy.preprocessing.MFPCA 29 | FDApy.preprocessing.FCPTPA 30 | 31 | -------------------------------------------------------------------------------- /docs/modules/representation.rst: -------------------------------------------------------------------------------- 1 | ================================= 2 | Representation of functional data 3 | ================================= 4 | 5 | The first step is the analysis of functional data is to represent them in a suitable way. The package provides several classes to represent functional data, depending on the type of data to represent. The package provides classes to represent univariate functional data, irregular functional data, multivariate functional data, and functional data in a basis representation. 6 | 7 | 8 | Generic representation 9 | ====================== 10 | 11 | Representations of functional data are based on three abstract classes. The first one, :class:`Argvals`, represents the arguments of the functions. The second one, :class:`Values`, represents the values of the functions. The third one, :class:`FunctionalData`, represents the functional data object, which is a pair of :class:`Argvals` and :class:`Values`. The package provides several implementations of these classes, depending on the type of functional data to represent. These classes cannot be instantiated directly, but they are used as base classes for the specific implementations. 12 | 13 | .. autosummary:: 14 | :toctree: autosummary 15 | 16 | FDApy.representation.Argvals 17 | FDApy.representation.Values 18 | FDApy.representation.FunctionalData 19 | 20 | 21 | Representing Argvals and Values 22 | =============================== 23 | 24 | Functional data representations are based on two main components: the arguments of the functions and the values of the functions. The package provides several classes to represent these components, depending on the type of data to represent. 25 | 26 | .. autosummary:: 27 | :toctree: autosummary 28 | 29 | FDApy.representation.DenseArgvals 30 | FDApy.representation.DenseValues 31 | 32 | 33 | .. autosummary:: 34 | :toctree: autosummary 35 | 36 | FDApy.representation.IrregularArgvals 37 | FDApy.representation.IrregularValues 38 | 39 | 40 | Univariate Functional Data 41 | ========================== 42 | 43 | Univariate functional data are realizations of a random process: 44 | 45 | .. math:: 46 | X: \mathcal{T} \subset \mathbb{R}^d \rightarrow \mathbb{R}. 47 | 48 | The package provides two representations of univariate functional data: grid representation and basis representation. 49 | 50 | 51 | Grid representation 52 | ------------------- 53 | 54 | Univariate functional data can be represented as a set of values on a grid. The :class:`GridFunctionalData` class is the abstract class to represent univariate functional data on a grid. The package provides two implementations of this class: :class:`DenseFunctionalData` and :class:`IrregularFunctionalData`. The class :class:`DenseFunctionalData` represents functional data of arbitrary dimension (one for curves, two for images, etc.) on a common set of sampling points, while the class :class:`IrregularFunctionalData` represents functional data of arbitrary dimension sampled on different sets of points (the number and location of the sampling points vary between functional observations). 55 | 56 | 57 | .. autosummary:: 58 | :toctree: autosummary 59 | 60 | FDApy.representation.GridFunctionalData 61 | FDApy.representation.DenseFunctionalData 62 | FDApy.representation.IrregularFunctionalData 63 | 64 | 65 | Basis representation 66 | -------------------- 67 | 68 | The basis representation of univariate functional data consists of a linear combination of basis functions. 69 | 70 | .. autosummary:: 71 | :toctree: autosummary 72 | 73 | FDApy.representation.BasisFunctionalData 74 | 75 | 76 | Multivariate Functional Data 77 | ============================ 78 | 79 | Multivariate functional data are realizations of a multivariate random process. Multivariate functional data objects are vectors of univariate functional data objects, eventually defined on different domains. The class :class:`MultivariateFunctionalData` allows for the combination of different types of functional data objects (:class:`DenseFunctionalData`, :class:`IrregularFunctionalData`, and :class:`BasisFunctionalData`). It is also possible to mix unidimensional data (curves) with multidimensional data (images, surfaces, etc.). 80 | 81 | .. autosummary:: 82 | :toctree: autosummary 83 | 84 | FDApy.representation.MultivariateFunctionalData 85 | 86 | 87 | Basis 88 | ===== 89 | 90 | The package provides two classes to represent basis of functions. The class :class:`Basis` represents a basis of functions, while the class :class:`MultivariateBasis` represents a multivariate basis of functions. Currently, the available bases are: Fourier basis, B-spline basis, Legendre basis and Wiener basis. The user may also define custom bases. 91 | 92 | .. autosummary:: 93 | :toctree: autosummary 94 | 95 | FDApy.representation.Basis 96 | FDApy.representation.MultivariateBasis 97 | 98 | 99 | Iterators 100 | ========= 101 | 102 | The package provides several iterators to handle functional data objects. These iterators allow for the iteration over the functional data objects (e.g. `for` loops, list comprehensions, etc.). These are defined in the classes: :class:`DenseFunctionalDataIterator`, :class:`IrregularFunctionalDataIterator`, and :class:`BasisFunctionalDataIterator`. 103 | -------------------------------------------------------------------------------- /docs/modules/simulation.rst: -------------------------------------------------------------------------------- 1 | ========== 2 | Simulation 3 | ========== 4 | 5 | The package contains a full simulation toolbox to generate functional data. This toolbox can be used for the implementation, the test and the comparison of new methodologies. 6 | 7 | 8 | Simulation 9 | ========== 10 | 11 | The simulation is based on basis decomposition and allows to configure parameters for simulating different clusters within the data. The package provides three classes to simulate functional data: :class:`Simulation`, :class:`Brownian`, and :class:`KarhunenLoeve`. The :class:`Simulation` class is the abstract class to simulate functional data. New simulation classes can be added by extending this abstract class. The :class:`Brownian` class simulates functional data with different Brownian paths. The :class:`KarhunenLoeve` class simulates functional data using the Karhunen-Loève expansion. 12 | 13 | .. autosummary:: 14 | :toctree: autosummary 15 | 16 | FDApy.simulation.Simulation 17 | FDApy.simulation.Brownian 18 | FDApy.simulation.KarhunenLoeve 19 | 20 | 21 | Datasets 22 | ======== 23 | 24 | The :class:`Datasets` class provides a set of functions to simulate functional datasets that have already been used in the literature. 25 | 26 | .. autosummary:: 27 | :toctree: autosummary 28 | 29 | FDApy.simulation.Datasets 30 | -------------------------------------------------------------------------------- /docs/refs.bib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StevenGolovkine/FDApy/6626ebc6a70a6496f3058c4abb0bb97f5bdb20be/docs/refs.bib -------------------------------------------------------------------------------- /docs/sg_execution_times.rst: -------------------------------------------------------------------------------- 1 | 2 | :orphan: 3 | 4 | .. _sphx_glr_sg_execution_times: 5 | 6 | 7 | Computation times 8 | ================= 9 | **00:00.135** total execution time for 28 files **from all galleries**: 10 | 11 | .. container:: 12 | 13 | .. raw:: html 14 | 15 | 19 | 20 | 21 | 22 | 27 | 28 | .. list-table:: 29 | :header-rows: 1 30 | :class: table table-striped sg-datatable 31 | 32 | * - Example 33 | - Time 34 | - Mem (MB) 35 | * - :ref:`sphx_glr_auto_examples_representation_plot_basis_functional.py` (``../examples/representation/plot_basis_functional.py``) 36 | - 00:00.135 37 | - 0.0 38 | * - :ref:`sphx_glr_auto_examples_basis_plot_basis_1d.py` (``../examples/basis/plot_basis_1d.py``) 39 | - 00:00.000 40 | - 0.0 41 | * - :ref:`sphx_glr_auto_examples_basis_plot_basis_2d.py` (``../examples/basis/plot_basis_2d.py``) 42 | - 00:00.000 43 | - 0.0 44 | * - :ref:`sphx_glr_auto_examples_basis_plot_basis_multivariate_1d.py` (``../examples/basis/plot_basis_multivariate_1d.py``) 45 | - 00:00.000 46 | - 0.0 47 | * - :ref:`sphx_glr_auto_examples_data_analysis_plot_canadian_weather.py` (``../examples/data_analysis/plot_canadian_weather.py``) 48 | - 00:00.000 49 | - 0.0 50 | * - :ref:`sphx_glr_auto_examples_data_analysis_plot_cd4.py` (``../examples/data_analysis/plot_cd4.py``) 51 | - 00:00.000 52 | - 0.0 53 | * - :ref:`sphx_glr_auto_examples_fpca_plot_fpca_1d.py` (``../examples/fpca/plot_fpca_1d.py``) 54 | - 00:00.000 55 | - 0.0 56 | * - :ref:`sphx_glr_auto_examples_fpca_plot_fpca_1d_sparse.py` (``../examples/fpca/plot_fpca_1d_sparse.py``) 57 | - 00:00.000 58 | - 0.0 59 | * - :ref:`sphx_glr_auto_examples_fpca_plot_fpca_2d.py` (``../examples/fpca/plot_fpca_2d.py``) 60 | - 00:00.000 61 | - 0.0 62 | * - :ref:`sphx_glr_auto_examples_fpca_plot_mfpca_1d.py` (``../examples/fpca/plot_mfpca_1d.py``) 63 | - 00:00.000 64 | - 0.0 65 | * - :ref:`sphx_glr_auto_examples_fpca_plot_mfpca_1d_2d.py` (``../examples/fpca/plot_mfpca_1d_2d.py``) 66 | - 00:00.000 67 | - 0.0 68 | * - :ref:`sphx_glr_auto_examples_fpca_plot_mfpca_1d_sparse.py` (``../examples/fpca/plot_mfpca_1d_sparse.py``) 69 | - 00:00.000 70 | - 0.0 71 | * - :ref:`sphx_glr_auto_examples_fpca_plot_mfpca_2d.py` (``../examples/fpca/plot_mfpca_2d.py``) 72 | - 00:00.000 73 | - 0.0 74 | * - :ref:`sphx_glr_auto_examples_misc_plot_local_polynomials_1d.py` (``../examples/misc/plot_local_polynomials_1d.py``) 75 | - 00:00.000 76 | - 0.0 77 | * - :ref:`sphx_glr_auto_examples_misc_plot_local_polynomials_2d.py` (``../examples/misc/plot_local_polynomials_2d.py``) 78 | - 00:00.000 79 | - 0.0 80 | * - :ref:`sphx_glr_auto_examples_misc_plot_psplines_1d.py` (``../examples/misc/plot_psplines_1d.py``) 81 | - 00:00.000 82 | - 0.0 83 | * - :ref:`sphx_glr_auto_examples_misc_plot_psplines_2d.py` (``../examples/misc/plot_psplines_2d.py``) 84 | - 00:00.000 85 | - 0.0 86 | * - :ref:`sphx_glr_auto_examples_representation_plot_dense_functional.py` (``../examples/representation/plot_dense_functional.py``) 87 | - 00:00.000 88 | - 0.0 89 | * - :ref:`sphx_glr_auto_examples_representation_plot_irregular_functional.py` (``../examples/representation/plot_irregular_functional.py``) 90 | - 00:00.000 91 | - 0.0 92 | * - :ref:`sphx_glr_auto_examples_representation_plot_multivariate_functional.py` (``../examples/representation/plot_multivariate_functional.py``) 93 | - 00:00.000 94 | - 0.0 95 | * - :ref:`sphx_glr_auto_examples_simulation_plot_brownian.py` (``../examples/simulation/plot_brownian.py``) 96 | - 00:00.000 97 | - 0.0 98 | * - :ref:`sphx_glr_auto_examples_simulation_plot_cluster.py` (``../examples/simulation/plot_cluster.py``) 99 | - 00:00.000 100 | - 0.0 101 | * - :ref:`sphx_glr_auto_examples_simulation_plot_cluster_multivariate.py` (``../examples/simulation/plot_cluster_multivariate.py``) 102 | - 00:00.000 103 | - 0.0 104 | * - :ref:`sphx_glr_auto_examples_simulation_plot_karhunen.py` (``../examples/simulation/plot_karhunen.py``) 105 | - 00:00.000 106 | - 0.0 107 | * - :ref:`sphx_glr_auto_examples_simulation_plot_karhunen_multivariate.py` (``../examples/simulation/plot_karhunen_multivariate.py``) 108 | - 00:00.000 109 | - 0.0 110 | * - :ref:`sphx_glr_auto_examples_simulation_plot_simulation.py` (``../examples/simulation/plot_simulation.py``) 111 | - 00:00.000 112 | - 0.0 113 | * - :ref:`sphx_glr_auto_examples_smoothing_plot_smooth_data_1d.py` (``../examples/smoothing/plot_smooth_data_1d.py``) 114 | - 00:00.000 115 | - 0.0 116 | * - :ref:`sphx_glr_auto_examples_smoothing_plot_smooth_data_2d.py` (``../examples/smoothing/plot_smooth_data_2d.py``) 117 | - 00:00.000 118 | - 0.0 119 | -------------------------------------------------------------------------------- /examples/README.rst: -------------------------------------------------------------------------------- 1 | .. _general_examples: 2 | 3 | Examples 4 | ======== 5 | 6 | Examples are included in the documentation to illustrate the usage of the package. The examples are divided into different categories, each corresponding to a specific functionality of the package. 7 | 8 | .. toctree:: 9 | :includehidden: 10 | :maxdepth: 2 11 | :titlesonly: 12 | -------------------------------------------------------------------------------- /examples/basis/README.rst: -------------------------------------------------------------------------------- 1 | .. _basis_examples: 2 | 3 | Basis 4 | ----- 5 | 6 | These examples illustrate the use of the :mod:`FDApy.basis` module. It contains examples of the different basis functions available in the package. -------------------------------------------------------------------------------- /examples/basis/plot_basis_1d.py: -------------------------------------------------------------------------------- 1 | """ 2 | One-dimensional Basis 3 | ===================== 4 | 5 | """ 6 | 7 | # Author: Steven Golovkine 8 | # License: MIT 9 | 10 | # Load packages 11 | import numpy as np 12 | 13 | from FDApy.representation import Basis 14 | from FDApy.representation import DenseArgvals 15 | from FDApy.visualization import plot 16 | 17 | ############################################################################### 18 | # The package include different basis functions to represent functional data. In this section, we are showing the building blocks of the representation of basis functions. To define a :class:`~FDApy.representation.Basis` object, we need to specify the name of the basis, the number of functions in the basis and the sampling points. The sampling points are defined as a :class:`~FDApy.representation.DenseArgvals`. 19 | 20 | 21 | ############################################################################### 22 | # We will show the basis functions for the Fourier, B-splines and Wiener basis. The number of functions in the basis is set to :math:`5` and the sampling points are defined as a :class:`~FDApy.representation.DenseArgvals` object with a hundred points between :math:`0` and :math:`1`. 23 | 24 | # Parameters 25 | n_functions = 5 26 | argvals = DenseArgvals({"input_dim_0": np.linspace(0, 1, 101)}) 27 | 28 | 29 | ############################################################################### 30 | # Fourier basis 31 | # ------------- 32 | # First, we will show the basis functions for the Fourier basis. The basis functions consist of the sine and cosine functions with a frequency that increases with the number of the function. Note that the first function is a constant function. This basis may be used to represent periodic functions. 33 | 34 | basis = Basis(name="fourier", n_functions=n_functions, argvals=argvals) 35 | 36 | _ = plot(basis) 37 | 38 | ############################################################################### 39 | # B-splines basis 40 | # --------------- 41 | # Second, we will show the basis functions for the B-splines basis. The basis functions are piecewise polynomials that are smooth at the knots. The number of knots is equal to the number of functions in the basis minus :math:`2`. This basis may be used to represent smooth functions. 42 | 43 | basis = Basis(name="bsplines", n_functions=n_functions, argvals=argvals) 44 | 45 | _ = plot(basis) 46 | 47 | ############################################################################### 48 | # Wiener basis 49 | # ------------ 50 | # Third, we will show the basis functions for the Wiener basis. The basis functions are the eigenfunctions of a Brownian process. This basis may be used to represent rough functions. 51 | 52 | basis = Basis(name="wiener", n_functions=n_functions, argvals=argvals) 53 | 54 | _ = plot(basis) 55 | -------------------------------------------------------------------------------- /examples/basis/plot_basis_2d.py: -------------------------------------------------------------------------------- 1 | """ 2 | Two-dimensional Basis 3 | ===================== 4 | 5 | """ 6 | 7 | # Author: Steven Golovkine 8 | # License: MIT 9 | 10 | # Load packages 11 | import numpy as np 12 | 13 | from FDApy.representation import Basis 14 | from FDApy.representation import DenseArgvals 15 | from FDApy.visualization import plot 16 | 17 | ############################################################################### 18 | # It is possible to define multi-dimensional basis functions using the :class:`~FDApy.representation.Basis` object. Multidimensional basis functions are defined as a tensor product of unidimensional basis. To create a multidimensional basis functions, we need a tuple of names and a tuple of number of functions. The sampling points are defined as a :class:`~FDApy.representation.DenseArgvals` object where each entry corresponds to the sampling points of one input dimension. 19 | 20 | # Parameters 21 | name = ("fourier", "fourier") 22 | n_functions = (5, 5) 23 | argvals = DenseArgvals( 24 | {"input_dim_0": np.linspace(0, 1, 101), "input_dim_1": np.linspace(0, 1, 51)} 25 | ) 26 | 27 | ############################################################################### 28 | # Here, we show the basis functions for the tensor product of two Fourier basis. The basis functions consist of the tensor product of sine and cosine functions with a frequency that increases with the number of the function. 29 | basis = Basis(name=name, n_functions=n_functions, argvals=argvals) 30 | 31 | _ = plot(basis) 32 | -------------------------------------------------------------------------------- /examples/basis/plot_basis_multivariate_1d.py: -------------------------------------------------------------------------------- 1 | """ 2 | Multivariate Basis 3 | ================== 4 | 5 | """ 6 | 7 | # Author: Steven Golovkine 8 | # License: MIT 9 | 10 | # Load packages 11 | import numpy as np 12 | 13 | from FDApy.representation import MultivariateBasis 14 | from FDApy.representation import DenseArgvals 15 | from FDApy.visualization import plot_multivariate 16 | 17 | ############################################################################### 18 | # Similarly to the multivariate functional, it is possible to define multivariate basis functions using the :class:`~FDApy.representation.MultivariateBasis` object. Multivariate basis functions are defined as a list of univariate basis functions and are represented with a :class:`~FDApy.representation.MultivariateBasis` object. The univariate basis functions can be of whatever dimension (curves, surfaces, ...). There is no restriction on the number of elements in the list but each univariate element must have the same number of functions. 19 | 20 | ############################################################################## 21 | # First example 22 | # ------------- 23 | # First, we will define a multivariate basis where the first component is the Fourier basis and the second component is the Legendre basis. The number of functions in the basis is set to :math:`3` and the sampling points are defined as a :class:`~FDApy.representation.DenseArgvals` object with eleven points between :math:`0` and :math:`1` for the first component and with eleven points between :math:`0` and :math:`0.5` for the second component. 24 | 25 | basis_name = ["fourier", "legendre"] 26 | argvals = [ 27 | DenseArgvals({"input_dim_0": np.linspace(0, 1, 11)}), 28 | DenseArgvals({"input_dim_0": np.linspace(0, 0.5, 11)}), 29 | ] 30 | n_functions = [3, 3] 31 | 32 | basis = MultivariateBasis(name=basis_name, n_functions=n_functions, argvals=argvals) 33 | 34 | _ = plot_multivariate(basis) 35 | 36 | 37 | ############################################################################### 38 | # Second example 39 | # -------------- 40 | # The second example is a multivariate basis where the first component is a multidimensional basis built from the tensor product of the Fourier basis and the second component is a multidimensional basis built from the tensor product of the Legendre basis. The number of functions in the multivariate basis is set to :math:`3`. 41 | 42 | # Parameters 43 | basis_name = [("fourier", "fourier"), ("legendre", "legendre")] 44 | argvals = [ 45 | DenseArgvals( 46 | {"input_dim_0": np.linspace(0, 1, 11), "input_dim_1": np.linspace(0, 0.5, 11)} 47 | ), 48 | DenseArgvals( 49 | {"input_dim_0": np.linspace(0, 0.5, 11), "input_dim_1": np.linspace(0, 1, 11)} 50 | ), 51 | ] 52 | n_functions = [(3, 3), (3, 3)] 53 | 54 | basis = MultivariateBasis(name=basis_name, n_functions=n_functions, argvals=argvals) 55 | 56 | _ = plot_multivariate(basis) 57 | 58 | ############################################################################### 59 | # Third example 60 | # ------------- 61 | # The third example is a multivariate basis where the first component is a Fourier basis (unidimensional) and the second component is a tensor product of Legendre basis (multidimensional). The number of functions in the multivariate basis is set to :math:`9`. To be coherent with the number of functions in the Fourier basis, the number of functions in the Legendre basis is set to :math:`3` for each dimension (it results in :math:`3 \times 3 = 9` multidimensional functions). 62 | 63 | # Parameters 64 | basis_name = ["fourier", ("legendre", "legendre")] 65 | argvals = [ 66 | DenseArgvals({"input_dim_0": np.linspace(0, 1, 11)}), 67 | DenseArgvals( 68 | {"input_dim_0": np.linspace(0, 0.5, 11), "input_dim_1": np.linspace(0, 1, 11)} 69 | ), 70 | ] 71 | n_functions = [9, (3, 3)] 72 | 73 | basis = MultivariateBasis(name=basis_name, n_functions=n_functions, argvals=argvals) 74 | 75 | _ = plot_multivariate(basis) 76 | -------------------------------------------------------------------------------- /examples/data/canadian_precipitation_monthly.csv: -------------------------------------------------------------------------------- 1 | "","Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec" 2 | "St. Johns",4.65161290322581,4.73571428571429,4.23548387096774,3.61666666666667,3.25161290322581,3.27,2.65161290322581,3.82258064516129,4.12666666666667,4.90967741935484,4.78333333333333,4.68064516129032 3 | "Halifax",4.63225806451613,4.14642857142857,4.13548387096774,4.01,3.52903225806452,3.12666666666667,3.02258064516129,3.40322580645161,3.20333333333333,4.12258064516129,5.14,5.36451612903226 4 | "Sydney",4.76451612903226,4.44642857142857,4.42258064516129,4.07666666666667,3.18064516129032,2.97666666666667,2.75483870967742,3.11612903225806,3.50666666666667,4.55161290322581,5.19333333333333,5.50967741935484 5 | "Yarmouth",4.02258064516129,3.63928571428571,3.30967741935484,3.37333333333333,3.09354838709677,3.04666666666667,2.63870967741935,2.78064516129032,2.95333333333333,3.50967741935484,4.51666666666667,4.64193548387097 6 | "Charlottvl",3.42903225806452,3.23214285714286,3.02903225806452,2.99333333333333,3.10645161290323,3.02,2.65806451612903,2.99354838709677,3.09,3.66129032258065,4.03,4.25806451612903 7 | "Fredericton",3.06774193548387,2.88214285714286,3.00645161290323,2.78666666666667,3.02903225806452,2.95,2.68709677419355,3.14516129032258,3.02333333333333,3.13870967741935,3.61333333333333,3.73225806451613 8 | "Scheffervll",1.79032258064516,1.26785714285714,1.39032258064516,1.67,1.60967741935484,2.39,3.20322580645161,2.88387096774194,3.11333333333333,3.03870967741935,2.26666666666667,1.64838709677419 9 | "Arvida",1.9,1.74285714285714,1.58387096774194,1.47,2.4741935483871,2.92,3.61612903225806,3,3.29,2.30322580645161,2.45666666666667,2.66129032258065 10 | "Bagottville",1.96129032258065,1.71785714285714,1.71612903225806,1.78,2.57741935483871,3.02666666666667,3.76129032258065,3.26451612903226,3.25666666666667,2.56451612903226,2.53,2.39677419354839 11 | "Quebec",2.93225806451613,2.53928571428571,2.71290322580645,2.60333333333333,3.36451612903226,3.69666666666667,3.89677419354839,3.8258064516129,4.07333333333333,3.16774193548387,3.48333333333333,3.38387096774194 12 | "Sherbrooke",2.37096774193548,2.16428571428571,2.45806451612903,2.58333333333333,2.99032258064516,3.42333333333333,3.72903225806452,4.15806451612903,3.36333333333333,2.92903225806452,3.24,2.99032258064516 13 | "Montreal",2.15483870967742,2.00714285714286,2.18387096774194,2.53666666666667,2.18064516129032,2.79,2.79354838709677,3.17741935483871,2.88666666666667,2.45161290322581,3.08666666666667,2.65806451612903 14 | "Ottawa",1.92903225806452,2.07857142857143,2.15483870967742,2.43333333333333,2.45806451612903,2.6,2.91612903225806,2.94193548387097,2.71333333333333,2.36451612903226,2.84666666666667,2.54838709677419 15 | "Toronto",1.49032258064516,1.55714285714286,1.79354838709677,2.31333333333333,2.14193548387097,2.26666666666667,2.56129032258064,2.73225806451613,2.43,1.99677419354839,2.37333333333333,2.04193548387097 16 | "London",2.28064516129032,2.10357142857143,2.38709677419355,2.69333333333333,2.40645161290323,2.76333333333333,2.64193548387097,2.83548387096774,2.88666666666667,2.46451612903226,3.03666666666667,2.9741935483871 17 | "Thunderbay",1.00967741935484,0.860714285714286,1.27741935483871,1.64,2.24838709677419,2.74666666666667,2.70322580645161,2.79354838709677,2.86666666666667,1.96451612903226,1.69333333333333,1.28709677419355 18 | "Winnipeg",0.596774193548387,0.5,0.712903225806452,1.20666666666667,1.88387096774194,2.80333333333333,2.46129032258065,2.47741935483871,1.73,0.958064516129032,0.74,0.612903225806452 19 | "The Pas",0.512903225806452,0.510714285714286,0.65483870967742,0.893333333333333,1.07096774193548,2.11666666666667,2.30967741935484,2.03548387096774,1.9,1.24838709677419,0.846666666666667,0.62258064516129 20 | "Churchill",0.529032258064516,0.435714285714286,0.583870967741935,0.69,0.967741935483871,1.44,1.68709677419355,1.94516129032258,1.87,1.47741935483871,1.16666666666667,0.603225806451613 21 | "Regina",0.451612903225806,0.435714285714286,0.545161290322581,0.683333333333333,1.54193548387097,2.38666666666667,2.01612903225806,1.3741935483871,1.14,0.664516129032258,0.396666666666667,0.525806451612903 22 | "Pr. Albert",0.487096774193548,0.482142857142857,0.541935483870968,0.77,1.39354838709677,2.31333333333333,2.35161290322581,1.7741935483871,1.30333333333333,0.735483870967742,0.56,0.609677419354839 23 | "Uranium Cty",0.67741935483871,0.521428571428571,0.609677419354839,0.636666666666667,0.693548387096774,1.26333333333333,1.70967741935484,1.72258064516129,1.26666666666667,1.18709677419355,0.93,0.664516129032258 24 | "Edmonton",0.761290322580645,0.560714285714286,0.503225806451613,0.766666666666667,1.43870967741935,2.57666666666667,3.1,2.2258064516129,1.54333333333333,0.616129032258064,0.543333333333333,0.6 25 | "Calgary",0.364516129032258,0.342857142857143,0.461290322580645,0.79,1.75483870967742,2.72666666666667,2.20967741935484,1.65806451612903,1.51666666666667,0.5,0.413333333333333,0.390322580645161 26 | "Kamloops",0.819354838709677,0.485714285714286,0.316129032258065,0.5,0.725806451612903,1.01,0.916129032258065,0.974193548387097,0.886666666666667,0.493548387096774,0.77,1.02258064516129 27 | "Vancouver",4.8741935483871,4.21071428571429,3.44516129032258,2.63333333333333,1.96129032258065,1.62666666666667,1.16451612903226,1.29677419354839,2.01666666666667,3.58709677419355,5.62,5.61612903225806 28 | "Victoria",4.49354838709677,3.46428571428571,2.31290322580645,1.49333333333333,1.09354838709677,0.923333333333333,0.629032258064516,0.783870967741936,1.14,2.36451612903226,4.54333333333333,4.8258064516129 29 | "Pr. George",1.77741935483871,1.21428571428571,1.03548387096774,0.993333333333333,1.60967741935484,2.24666666666667,1.9,1.83870967741935,1.96333333333333,1.9258064516129,1.77,1.70967741935484 30 | "Pr. Rupert",8.22903225806452,7.675,6.00322580645161,5.92666666666667,4.65483870967742,3.97333333333333,3.61290322580645,5.18064516129032,8.45333333333333,12.2161290322581,10.19,9.15483870967742 31 | "Whitehorse",0.564516129032258,0.435714285714286,0.374193548387097,0.263333333333333,0.438709677419355,1.06666666666667,1.26129032258065,1.24838709677419,1.21333333333333,0.75483870967742,0.67,0.619354838709677 32 | "Dawson",0.625806451612903,0.564285714285714,0.374193548387097,0.353333333333333,0.770967741935484,1.33666666666667,1.51290322580645,1.45483870967742,1.11333333333333,1.03225806451613,0.873333333333333,0.732258064516129 33 | "Yellowknife",0.490322580645161,0.446428571428571,0.380645161290323,0.34,0.570967741935484,0.806666666666667,1.13225806451613,1.2741935483871,0.97,1.10645161290323,0.796666666666667,0.470967741935484 34 | "Iqaluit",0.690322580645161,0.635714285714286,0.67741935483871,0.886666666666667,0.919354838709677,1.20666666666667,1.85161290322581,2.01935483870968,1.77666666666667,1.29354838709677,0.996666666666667,0.635483870967742 35 | "Inuvik",0.516129032258065,0.417857142857143,0.367741935483871,0.436666666666667,0.606451612903226,0.736666666666667,1.06129032258065,1.43548387096774,0.866666666666667,0.94516129032258,0.59,0.535483870967742 36 | "Resolute",0.119354838709677,0.107142857142857,0.193548387096774,0.21,0.32258064516129,0.413333333333333,0.716129032258065,1.01612903225806,0.753333333333333,0.451612903225806,0.233333333333333,0.170967741935484 37 | -------------------------------------------------------------------------------- /examples/data_analysis/README.rst: -------------------------------------------------------------------------------- 1 | .. _real_data_analysis: 2 | 3 | Real data analysis 4 | ------------------ 5 | 6 | These examples illustrate the use of the package using real datasets. 7 | -------------------------------------------------------------------------------- /examples/data_analysis/plot_canadian_weather.py: -------------------------------------------------------------------------------- 1 | """ 2 | Canadian weather dataset 3 | ========================= 4 | 5 | """ 6 | 7 | # Author: Steven Golovkine 8 | # License: MIT 9 | 10 | # Load packages 11 | import matplotlib.pyplot as plt 12 | import numpy as np 13 | 14 | from FDApy.representation import DenseArgvals 15 | from FDApy.preprocessing import UFPCA 16 | 17 | from FDApy import read_csv 18 | from FDApy.visualization import plot 19 | 20 | ############################################################################## 21 | # In this section, we will use the Canadian weather dataset to illustrate the use of the package. We will first load the data and plot it. Then, we will smooth the data using local polynomial regression. Finally, we will perform UFPCA on the smoothed data and plot the eigenfunctions. 22 | 23 | ############################################################################## 24 | # First, we load the data. The dataset contains daily temperature data for :math:`35` Canadian cities. The dataset can be downloaded from the `here `_. This is an example of a :class:`~FDApy.representation.DenseFunctionalData` object. It also shows how the CSV file should be formatted and can be read using the :func:`~FDApy.read_csv` function. 25 | 26 | # Load data 27 | temp_data = read_csv("../data/canadian_temperature_daily.csv", index_col=0) 28 | 29 | _ = plot(temp_data) 30 | plt.show() 31 | 32 | 33 | ############################################################################### 34 | # We will now smooth the data using local polynomial regression on the grid :math:`\{1, 2, 3, \dots, 365\}`. We will use the Epanechnikov kernel with a bandwidth of :math:`30` and a degree of :math:`1`. The smoothing is performed using the :func:`~FDApy.representation.DenseFunctionalData.smooth` method. We will then plot the smoothed data. 35 | 36 | points = DenseArgvals({"input_dim_0": np.linspace(1, 365, 365)}) 37 | kernel_name = "epanechnikov" 38 | bandwidth = 30 39 | degree = 1 40 | 41 | temp_smooth = temp_data.smooth( 42 | points=points, 43 | method="LP", 44 | kernel_name=kernel_name, 45 | bandwidth=bandwidth, 46 | degree=degree, 47 | ) 48 | 49 | fig, axes = plt.subplots(2, 2, figsize=(10, 8)) 50 | for idx, ax in enumerate(axes.flat): 51 | plot(temp_data[idx], colors="k", alpha=0.2, ax=ax) 52 | plot(temp_smooth[idx], colors="r", ax=ax) 53 | ax.set_title(f"Observation {idx + 1}") 54 | plt.show() 55 | 56 | 57 | ############################################################################### 58 | # We will now perform UFPCA on the smoothed data. We will use the inner product method and keep the principal components that explain 99% of the variance. We will then plot the eigenfunctions. The scores are then computed using the inner-product matrix. 59 | 60 | ufpca = UFPCA(n_components=0.99, method="inner-product") 61 | ufpca.fit(temp_smooth) 62 | scores = ufpca.transform(method="InnPro") 63 | 64 | _ = plot(ufpca.eigenfunctions) 65 | plt.show() 66 | 67 | 68 | ############################################################################### 69 | # Finally, the data can be reconstructed using the scores. We plot the reconstruction of the first 10 observations. 70 | data_recons = ufpca.inverse_transform(scores) 71 | 72 | fig, axes = plt.subplots(nrows=5, ncols=2, figsize=(16, 16)) 73 | for idx_plot, idx in enumerate(np.random.choice(temp_data.n_obs, 10)): 74 | temp_ax = axes.flatten()[idx_plot] 75 | temp_ax = plot(temp_data[idx], colors="k", alpha=0.2, ax=temp_ax, label="Data") 76 | plot(temp_smooth[idx], colors="r", ax=temp_ax, label="Smooth") 77 | plot(data_recons[idx], colors="b", ax=temp_ax, label="Reconstruction") 78 | temp_ax.legend() 79 | plt.show() 80 | -------------------------------------------------------------------------------- /examples/data_analysis/plot_cd4.py: -------------------------------------------------------------------------------- 1 | """ 2 | CD4 dataset 3 | ============ 4 | 5 | """ 6 | 7 | # Author: Steven Golovkine 8 | # License: MIT 9 | 10 | # Load packages 11 | import matplotlib.pyplot as plt 12 | import numpy as np 13 | 14 | from FDApy import read_csv 15 | from FDApy.visualization import plot 16 | 17 | ############################################################################## 18 | # In this section, we will use the CD4 dataset to illustrate the use of the package concerning irregularly sampled functional data. The dataset contains CD4 counts for :math:`366` patients. The dataset can be downloaded from the `here `_. This is an example of a :class:`~FDApy.representation.IrregularFunctionalData` object. It also shows how the CSV file should be formatted and can be read using the :func:`~FDApy.read_csv` function. 19 | 20 | # Load data 21 | cd4_data = read_csv("../data/cd4.csv", index_col=0) 22 | 23 | _ = plot(cd4_data) 24 | plt.show() 25 | -------------------------------------------------------------------------------- /examples/fpca/README.rst: -------------------------------------------------------------------------------- 1 | .. _simulation_fpca: 2 | 3 | Dimension Reduction 4 | ------------------- 5 | 6 | These examples illustrate the use of the :mod:`FDApy.preprocessing.dim_reduction` module. It contains various functionalities to reduce the dimension of functional data. 7 | -------------------------------------------------------------------------------- /examples/fpca/plot_fpca_1d.py: -------------------------------------------------------------------------------- 1 | """ 2 | FPCA of 1-dimensional data 3 | =========================== 4 | 5 | """ 6 | 7 | # Author: Steven Golovkine 8 | # License: MIT 9 | 10 | # Load packages 11 | import matplotlib.pyplot as plt 12 | import numpy as np 13 | 14 | from FDApy.representation import DenseArgvals 15 | from FDApy.simulation import KarhunenLoeve 16 | from FDApy.preprocessing import UFPCA 17 | from FDApy.visualization import plot 18 | 19 | ############################################################################### 20 | # In this section, we are showing how to perform a functional principal component on one-dimensional data using the :class:`~FDApy.preprocessing.UFPCA` class. We will compare two methods to perform the dimension reduction: the decomposition of the covariance operator and the decomposition of the inner-product matrix. We will use the first :math:`K = 5` principal components to reconstruct the curves. 21 | 22 | 23 | # Set general parameters 24 | rng = 42 25 | n_obs = 50 26 | 27 | # Parameters of the basis 28 | name = "fourier" 29 | n_functions = 25 30 | argvals = DenseArgvals({"input_dim_0": np.linspace(0, 1, 101)}) 31 | 32 | 33 | ############################################################################### 34 | # We simulate :math:`N = 50` curves on the one-dimensional observation grid :math:`\{0, 0.01, 0.02, \cdots, 1\}`, based on the first :math:`K = 25` Fourier basis functions on :math:`[0, 1]` and the variance of the scores random variables decreasing exponentially. 35 | 36 | kl = KarhunenLoeve( 37 | n_functions=n_functions, basis_name=name, argvals=argvals, random_state=rng 38 | ) 39 | kl.new(n_obs=n_obs, clusters_std="exponential") 40 | kl.add_noise(noise_variance=0.05) 41 | data = kl.noisy_data 42 | 43 | _ = plot(data) 44 | plt.show() 45 | 46 | ############################################################################### 47 | # Estimation of the eigencomponents 48 | # --------------------------------- 49 | # 50 | # The :class:`~FDApy.preprocessing.UFPCA` class requires two parameters: the number of components to estimate and the method to use. The method parameter can be either `covariance` or `inner-product`. The first method estimates the eigenfunctions by decomposing the covariance operator, while the second method estimates the eigenfunctions by decomposing the inner-product matrix. 51 | 52 | # First, we perform a univariate FPCA using a decomposition of the covariance operator. 53 | ufpca_cov = UFPCA(n_components=5, method="covariance") 54 | ufpca_cov.fit(data) 55 | 56 | # Plot the eigenfunctions using the decomposition of the covariance operator. 57 | _ = plot(ufpca_cov.eigenfunctions) 58 | plt.show() 59 | 60 | ############################################################################### 61 | # 62 | 63 | # Second, we perform a univariate FPCA using a decomposition of the inner-product matrix. 64 | ufpca_innpro = UFPCA(n_components=5, method="inner-product") 65 | ufpca_innpro.fit(data) 66 | 67 | # Plot the eigenfunctions using the decomposition of the inner-product matrix. 68 | _ = plot(ufpca_innpro.eigenfunctions) 69 | plt.show() 70 | 71 | ############################################################################### 72 | # Estimation of the scores 73 | # ------------------------ 74 | # 75 | # Once the eigenfunctions are estimated, we can compute the scores using numerical integration, the PACE algorithm or the eigenvectors from the decomposition of the inner-product matrix. The :func:`~FDApy.preprocessing.UFPCA.transform` method requires the data as argument and the method to use. The method parameter can be either `NumInt`, `PACE` or `InnPro`. Note that, when using the eigenvectors from the decomposition of the inner-product matrix, new data can not be passed as argument of the :func:`~FDApy.preprocessing.UFPCA.transform` method because the estimation is performed using the eigenvectors of the inner-product matrix. 76 | 77 | scores_numint = ufpca_cov.transform(data, method="NumInt") 78 | scores_pace = ufpca_cov.transform(data, method="PACE") 79 | scores_innpro = ufpca_innpro.transform(method="InnPro") 80 | 81 | # Plot of the scores 82 | plt.scatter(scores_numint[:, 0], scores_numint[:, 1], label="NumInt") 83 | plt.scatter(scores_pace[:, 0], scores_pace[:, 1], label="PACE") 84 | plt.scatter(scores_innpro[:, 0], scores_innpro[:, 1], label="InnPro") 85 | plt.legend() 86 | plt.show() 87 | 88 | 89 | ############################################################################### 90 | # Comparison of the methods 91 | # ------------------------- 92 | # 93 | # Finally, we compare the methods by reconstructing the curves using the first :math:`K = 5` principal components. We plot a sample of curves and their reconstruction. 94 | 95 | data_recons_numint = ufpca_cov.inverse_transform(scores_numint) 96 | data_recons_pace = ufpca_cov.inverse_transform(scores_pace) 97 | data_recons_innpro = ufpca_innpro.inverse_transform(scores_innpro) 98 | 99 | colors_numint = np.array([[0.9, 0, 0, 1]]) 100 | colors_pace = np.array([[0, 0.9, 0, 1]]) 101 | colors_innpro = np.array([[0.9, 0, 0.9, 1]]) 102 | 103 | fig, axes = plt.subplots(nrows=5, ncols=2, figsize=(16, 16)) 104 | for idx_plot, idx in enumerate(np.random.choice(n_obs, 10)): 105 | temp_ax = axes.flatten()[idx_plot] 106 | temp_ax = plot(kl.data[idx], ax=temp_ax, label="True") 107 | plot( 108 | data_recons_numint[idx], 109 | colors=colors_numint, 110 | ax=temp_ax, 111 | label="Reconstruction NumInt", 112 | ) 113 | plot( 114 | data_recons_pace[idx], 115 | colors=colors_pace, 116 | ax=temp_ax, 117 | label="Reconstruction PACE", 118 | ) 119 | plot( 120 | data_recons_innpro[idx], 121 | colors=colors_innpro, 122 | ax=temp_ax, 123 | label="Reconstruction InnPro", 124 | ) 125 | temp_ax.legend() 126 | plt.show() 127 | -------------------------------------------------------------------------------- /examples/fpca/plot_fpca_1d_sparse.py: -------------------------------------------------------------------------------- 1 | """ 2 | FPCA of 1-dimensional sparse data 3 | ================================== 4 | 5 | """ 6 | 7 | ############################################################################### 8 | # 9 | 10 | # Author: Steven Golovkine 11 | # License: MIT 12 | 13 | # Load packages 14 | import matplotlib.pyplot as plt 15 | import numpy as np 16 | 17 | from FDApy.representation import DenseArgvals 18 | from FDApy.simulation import KarhunenLoeve 19 | from FDApy.preprocessing import UFPCA 20 | from FDApy.visualization import plot 21 | 22 | ############################################################################### 23 | # In this section, we are showing how to perform a functional principal component on one-dimensional sparse data using the :class:`~FDApy.preprocessing.UFPCA` class. We will compare two methods to perform the dimension reduction: the decomposition of the covariance operator and the decomposition of the inner-product matrix. We will use the first :math:`K = 10` principal components to reconstruct the curves. 24 | 25 | 26 | # Set general parameters 27 | rng = 42 28 | n_obs = 50 29 | 30 | # Parameters of the basis 31 | name = "fourier" 32 | n_functions = 25 33 | argvals = DenseArgvals({"input_dim_0": np.linspace(0, 1, 101)}) 34 | 35 | 36 | ############################################################################### 37 | # We simulate :math:`N = 50` curves on the one-dimensional observation grid :math:`\{0, 0.01, 0.02, \cdots, 1\}`, based on the first :math:`K = 25` Fourier basis functions on :math:`[0, 1]` and the variance of the scores random variables decreasing exponentially. We add noise and sparsify the data. 38 | 39 | kl = KarhunenLoeve( 40 | n_functions=n_functions, basis_name=name, argvals=argvals, random_state=rng 41 | ) 42 | kl.new(n_obs=n_obs, clusters_std="exponential") 43 | kl.add_noise_and_sparsify(noise_variance=0.01, percentage=0.5, epsilon=0.05) 44 | data = kl.sparse_data 45 | 46 | _ = plot(data) 47 | 48 | ############################################################################### 49 | # Estimation of the eigencomponents 50 | # --------------------------------- 51 | # 52 | # The :class:`~FDApy.preprocessing.UFPCA` class requires two parameters: the number of components to estimate and the method to use. The method parameter can be either `covariance` or `inner-product`. The first method estimates the eigenfunctions by decomposing the covariance operator, while the second method estimates the eigenfunctions by decomposing the inner-product matrix. 53 | 54 | # First, we perform a univariate FPCA using a decomposition of the covariance operator. 55 | ufpca_cov = UFPCA(n_components=10, method="covariance") 56 | ufpca_cov.fit(data, method_smoothing="PS") 57 | 58 | # Plot the eigenfunctions 59 | _ = plot(ufpca_cov.eigenfunctions) 60 | plt.show() 61 | 62 | 63 | ############################################################################### 64 | # 65 | 66 | # Second, we perform a univariate FPCA using a decomposition of the inner-product matrix. 67 | ufpca_innpro = UFPCA(n_components=10, method="inner-product") 68 | ufpca_innpro.fit(data, method_smoothing="PS") 69 | 70 | # Plot the eigenfunctions 71 | _ = plot(ufpca_innpro.eigenfunctions) 72 | plt.show() 73 | 74 | 75 | ############################################################################### 76 | # Estimation of the scores 77 | # ------------------------ 78 | # 79 | # Once the eigenfunctions are estimated, we can compute the scores using numerical integration, the PACE algortihm or the eigenvectors from the decomposition of the inner-product matrix. The :func:`~FDApy.preprocessing.UFPCA.transform` method requires the data as argument and the method to use. The method parameter can be either `NumInt`, `PACE` or `InnPro`. Note that, when using the eigenvectors from the decomposition of the inner-product matrix, new data can not be passed as argument of the `transform` method because the estimation is performed using the eigenvectors of the inner-product matrix. 80 | 81 | scores_numint = ufpca_cov.transform(data, method="NumInt") 82 | scores_pace = ufpca_cov.transform(data, method="PACE") 83 | scores_innpro = ufpca_innpro.transform(method="InnPro") 84 | 85 | # Plot of the scores 86 | plt.scatter(scores_numint[:, 0], scores_numint[:, 1], label="NumInt") 87 | plt.scatter(scores_pace[:, 0], scores_pace[:, 1], label="PACE") 88 | plt.scatter(scores_innpro[:, 0], scores_innpro[:, 1], label='InnPro') 89 | plt.legend() 90 | plt.show() 91 | 92 | 93 | ############################################################################### 94 | # Comparison of the methods 95 | # ------------------------- 96 | # 97 | # Finally, we compare the methods by reconstructing the curves using the first :math:`K = 10` principal components. We plot a sample of curves and their reconstruction. 98 | 99 | data_recons_numint = ufpca_cov.inverse_transform(scores_numint) 100 | data_recons_pace = ufpca_cov.inverse_transform(scores_pace) 101 | data_recons_innpro = ufpca_innpro.inverse_transform(scores_innpro) 102 | 103 | colors_numint = np.array([[0.9, 0, 0, 1]]) 104 | colors_pace = np.array([[0, 0.9, 0, 1]]) 105 | colors_innpro = np.array([[0.9, 0, 0.9, 1]]) 106 | 107 | fig, axes = plt.subplots(nrows=5, ncols=2, figsize=(16, 16)) 108 | for idx_plot, idx in enumerate(np.random.choice(n_obs, 10)): 109 | temp_ax = axes.flatten()[idx_plot] 110 | temp_ax = plot(kl.data[idx], ax=temp_ax, label="True") 111 | plot( 112 | data_recons_numint[idx], 113 | colors=colors_numint, 114 | ax=temp_ax, 115 | label="Reconstruction NumInt", 116 | ) 117 | plot( 118 | data_recons_pace[idx], 119 | colors=colors_pace, 120 | ax=temp_ax, 121 | label="Reconstruction PACE", 122 | ) 123 | plot( 124 | data_recons_innpro[idx], 125 | colors=colors_innpro, 126 | ax=temp_ax, 127 | label="Reconstruction InnPro", 128 | ) 129 | temp_ax.legend() 130 | plt.show() 131 | -------------------------------------------------------------------------------- /examples/fpca/plot_fpca_2d.py: -------------------------------------------------------------------------------- 1 | """ 2 | FPCA of 2-dimensional data 3 | =========================== 4 | 5 | """ 6 | 7 | # Author: Steven Golovkine 8 | # License: MIT 9 | 10 | # Load packages 11 | import matplotlib.pyplot as plt 12 | import numpy as np 13 | 14 | from FDApy.representation import DenseArgvals 15 | from FDApy.simulation import KarhunenLoeve 16 | from FDApy.preprocessing import UFPCA, FCPTPA 17 | from FDApy.visualization import plot 18 | 19 | ############################################################################### 20 | # In this section, we are showing how to perform a functional principal component on two-dimensional data using the :class:`~FDApy.preprocessing.UFPCA` and :class:`~FDApy.preprocessing.FCPTPA` classes. We will compare two methods to perform the dimension reduction: the FCP-TPA and the decomposition of the inner-product matrix. We will use the first :math:`K = 5` principal components to reconstruct the curves. 21 | 22 | 23 | # Set general parameters 24 | rng = 42 25 | n_obs = 50 26 | 27 | # Parameters of the basis 28 | name = ("fourier", "fourier") 29 | n_functions = (5, 5) 30 | argvals = DenseArgvals({ 31 | "input_dim_0": np.linspace(0, 1, 21), 32 | "input_dim_1": np.linspace(-0.5, 0.5, 21) 33 | }) 34 | 35 | 36 | ############################################################################### 37 | # We simulate :math:`N = 50` images on the two-dimensional observation grid :math:`\{0, 0.05, 0.1, \cdots, 1\} \times \{0, 0.05, 0.1, \cdots, 1\}`, based on the tensor product of the first :math:`K = 5` Fourier basis functions on :math:`[0, 1] \times [0, 1]` and the variance of the scores random variables decreases exponentially. 38 | 39 | kl = KarhunenLoeve( 40 | basis_name=name, 41 | n_functions=n_functions, 42 | argvals=argvals, 43 | add_intercept=False, 44 | random_state=rng, 45 | ) 46 | kl.new(n_obs=n_obs, clusters_std="exponential") 47 | data = kl.data 48 | 49 | _ = plot(data) 50 | 51 | 52 | ############################################################################### 53 | # Estimation of the eigencomponents 54 | # --------------------------------- 55 | # 56 | # The class :class:`~FDApy.preprocessing.FCPTPA` requires two parameters: the number of components to estimate and if normalization is needed. The method also requires hyperparameters for the FCP-TPA algorithm. The hyperparameters are the penalty matrices for the first and second dimensions, the range of the alpha parameter for the first and second dimensions, the tolerance for the convergence of the algorithm, the maximum number of iterations, and if the tolerance should be adapted during the iterations. The class :class:`~FDApy.preprocessing.UFPCA` requires two parameters: the number of components to estimate and the method to use. For two-dimensional data, the method parameter can only be `inner-product`. It will estimate the eigenfunctions by decomposing the inner-product matrix. 57 | 58 | 59 | # First, we perform a univariate FPCA using the FPCTPA. 60 | # Hyperparameters for FCP-TPA 61 | n_points = data.n_points 62 | mat_v = np.diff(np.identity(n_points[0])) 63 | mat_w = np.diff(np.identity(n_points[1])) 64 | 65 | penal_v = np.dot(mat_v, mat_v.T) 66 | penal_w = np.dot(mat_w, mat_w.T) 67 | 68 | ufpca_fcptpa = FCPTPA(n_components=5, normalize=True) 69 | ufpca_fcptpa.fit( 70 | data, 71 | penalty_matrices={"v": penal_v, "w": penal_w}, 72 | alpha_range={"v": (1e-4, 1e4), "w": (1e-4, 1e4)}, 73 | tolerance=1e-4, 74 | max_iteration=15, 75 | adapt_tolerance=True, 76 | ) 77 | 78 | ############################################################################### 79 | # 80 | 81 | # Second, we perform a univariate FPCA using a decomposition of the inner-product matrix. 82 | ufpca_innpro = UFPCA(n_components=5, method="inner-product") 83 | ufpca_innpro.fit(data) 84 | 85 | 86 | ############################################################################### 87 | # Estimation of the scores 88 | # ------------------------ 89 | # 90 | # Once the eigenfunctions are estimated, we can estimate the scores -- projection of the curves onto the eigenfunctions -- using the eigenvectors from the decomposition of the covariance operator or the inner-product matrix. We can then reconstruct the curves using the scores. The :func:`~FDApy.preprocessing.UFPCA.transform` method requires the data as argument and the method to use. The method parameter can be either `NumInt`, `PACE` or `InnPro`. Note that, when using the eigenvectors from the decomposition of the inner-product matrix, new data can not be passed as argument of the `transform` method because the estimation is performed using the eigenvectors of the inner-product matrix. 91 | 92 | scores_fcptpa = ufpca_fcptpa.transform(data) 93 | scores_innpro = ufpca_innpro.transform(method="InnPro") 94 | 95 | # Plot of the scores 96 | plt.scatter(scores_fcptpa[:, 0], scores_fcptpa[:, 1], label="FCPTPA") 97 | plt.scatter(scores_innpro[:, 0], scores_innpro[:, 1], label="InnPro") 98 | plt.legend() 99 | plt.show() 100 | 101 | 102 | ############################################################################### 103 | # Comparison of the methods 104 | # ------------------------- 105 | # 106 | # Finally, we compare the methods by reconstructing the curves using the first :math:`K = 5` principal components. We plot a sample of curves and their reconstruction. 107 | 108 | data_recons_fcptpa = ufpca_fcptpa.inverse_transform(scores_fcptpa) 109 | data_recons_innpro = ufpca_innpro.inverse_transform(scores_innpro) 110 | 111 | fig, axes = plt.subplots(nrows=5, ncols=3, figsize=(16, 16)) 112 | for idx_plot, idx in enumerate(np.random.choice(n_obs, 5)): 113 | axes[idx_plot, 0] = plot(data[idx], ax=axes[idx_plot, 0]) 114 | axes[idx_plot, 0].set_title("True") 115 | 116 | axes[idx_plot, 1] = plot(data_recons_fcptpa[idx], ax=axes[idx_plot, 1]) 117 | axes[idx_plot, 1].set_title("FCPTPA") 118 | 119 | axes[idx_plot, 2] = plot(data_recons_innpro[idx], ax=axes[idx_plot, 2]) 120 | axes[idx_plot, 2].set_title("InnPro") 121 | plt.show() 122 | -------------------------------------------------------------------------------- /examples/fpca/plot_mfpca_1d.py: -------------------------------------------------------------------------------- 1 | """ 2 | MFPCA of 1-dimensional data 3 | =========================== 4 | 5 | """ 6 | 7 | # Author: Steven Golovkine 8 | # License: MIT 9 | 10 | # Load packages 11 | import matplotlib.pyplot as plt 12 | import numpy as np 13 | 14 | from FDApy.representation import DenseArgvals 15 | from FDApy.simulation import KarhunenLoeve 16 | from FDApy.preprocessing import MFPCA 17 | from FDApy.visualization import plot, plot_multivariate 18 | 19 | ############################################################################### 20 | # In this section, we are showing how to perform a multivariate functional principal component analysis on one-dimensional data using the :class:`~FDApy.preprocessing.MFPCA` class. We will compare two methods to perform the dimension reduction: the decomposition of the covariance operator and the decomposition of the inner-product matrix. We will use :math:`0.95\%` of the variance explained in the data to reconstruct the curves. 21 | 22 | # Set general parameters 23 | rng = 42 24 | n_obs = 50 25 | idx = 5 26 | colors = np.array([[0.5, 0, 0, 1]]) 27 | 28 | 29 | # Parameters of the basis 30 | name = ["bsplines", "fourier"] 31 | n_functions = [5, 5] 32 | argvals = [ 33 | DenseArgvals({"input_dim_0": np.linspace(0, 1, 101)}), 34 | DenseArgvals({"input_dim_0": np.linspace(-0.5, 0.5, 101)}), 35 | ] 36 | 37 | ############################################################################### 38 | # We simulate :math:`N = 50` curves of a 2-dimensional process. The first 39 | # component of the process is defined on the one-dimensional observation grid 40 | # :math:`\{0, 0.01, 0.02, \cdots, 1\}`, based on the first :math:`K = 5` 41 | # B-splines basis functions on :math:`[0, 1]` and the variance of the scores 42 | # random variables equal to :math:`1`. The second component of the process is 43 | # defined on the one-dimensional observation grid 44 | # :math:`\{0, 0.01, 0.02, \cdots, 1\}`, based on the first :math:`K = 5` 45 | # Fourier basis functions on :math:`[0, 1]` and the variance of the scores 46 | # random variables equal to :math:`1`. 47 | kl = KarhunenLoeve( 48 | basis_name=name, n_functions=n_functions, argvals=argvals, random_state=rng 49 | ) 50 | kl.new(n_obs=n_obs) 51 | kl.add_noise(noise_variance=0.05) 52 | data = kl.noisy_data 53 | 54 | _ = plot_multivariate(data) 55 | plt.show() 56 | 57 | 58 | ############################################################################### 59 | # Estimation of the eigencomponents 60 | # --------------------------------- 61 | # 62 | # The :class:`~FDApy.preprocessing.MFPCA` class requires two parameters: the number of components to estimate and the method to use. The method parameter can be either `covariance` or `inner-product`. The first method estimates the eigenfunctions by decomposing the covariance operator, while the second method estimates the eigenfunctions by decomposing the inner-product matrix. In the case of a decomposition of the covariance operator, the method also requires the univariate expansions to estimate the eigenfunctions of each component. Here, we use the univariate functional principal component analysis with penalized splines to estimate the eigenfunctions of each component. 63 | 64 | # First, we perform a multivariate FPCA using a decomposition of the covariance operator. 65 | univariate_expansions = [ 66 | {"method": "UFPCA", "n_components": 15, "method_smoothing": "PS"}, 67 | {"method": "UFPCA", "n_components": 15, "method_smoothing": "PS"}, 68 | ] 69 | 70 | mfpca_cov = MFPCA( 71 | n_components=0.95, method="covariance", univariate_expansions=univariate_expansions 72 | ) 73 | mfpca_cov.fit(data, scores_method="PACE") 74 | 75 | # Plot the eigenfunctions using the decomposition of the covariance operator. 76 | _ = plot_multivariate(mfpca_cov.eigenfunctions) 77 | plt.show() 78 | 79 | 80 | ############################################################################### 81 | # 82 | 83 | # Second, we perform a multivariate FPCA using a decomposition of the inner-product matrix. 84 | mfpca_innpro = MFPCA(n_components=0.95, method="inner-product") 85 | mfpca_innpro.fit(data) 86 | 87 | # Plot the eigenfunctions using the decomposition of the inner-product matrix. 88 | _ = plot_multivariate(mfpca_innpro.eigenfunctions) 89 | plt.show() 90 | 91 | 92 | ############################################################################### 93 | # Estimation of the scores 94 | # ------------------------ 95 | # 96 | # Once the eigenfunctions are estimated, we can compute the scores using numerical integration or the eigenvectors from the decomposition of the inner-product matrix. Note that, when using the eigenvectors from the decomposition of the inner-product matrix, new data can not be passed as argument of the :func:`~FDApy.preprocessing.MFPCA.transform` method because the estimation is performed using the eigenvectors of the inner-product matrix. 97 | scores_numint = mfpca_cov.transform(data, method="NumInt") 98 | scores_innpro = mfpca_innpro.transform(method="InnPro") 99 | 100 | # Plot of the scores 101 | plt.scatter(scores_numint[:, 0], scores_numint[:, 1], label="NumInt") 102 | plt.scatter(scores_innpro[:, 0], scores_innpro[:, 1], label="InnPro") 103 | plt.legend() 104 | plt.show() 105 | 106 | 107 | ############################################################################### 108 | # Comparison of the methods 109 | # ------------------------- 110 | # 111 | # Finally, we compare the methods by reconstructing the curves using :math:`0.95\%` of the variance explained. 112 | data_recons_numint = mfpca_cov.inverse_transform(scores_numint) 113 | data_recons_innpro = mfpca_innpro.inverse_transform(scores_innpro) 114 | 115 | ############################################################################### 116 | # 117 | 118 | colors_numint = np.array([[0.9, 0, 0, 1]]) 119 | colors_pace = np.array([[0, 0.9, 0, 1]]) 120 | colors_innpro = np.array([[0.9, 0, 0.9, 1]]) 121 | 122 | 123 | fig, axes = plt.subplots(nrows=5, ncols=2, figsize=(16, 16)) 124 | for idx_plot, idx in enumerate(np.random.choice(n_obs, 5)): 125 | for idx_data, (dd, dd_numint, dd_innpro) in enumerate( 126 | zip(kl.data.data, data_recons_numint.data, data_recons_innpro.data) 127 | ): 128 | axes[idx_plot, idx_data] = plot( 129 | dd[idx], ax=axes[idx_plot, idx_data], label="True" 130 | ) 131 | axes[idx_plot, idx_data] = plot( 132 | dd_numint[idx], 133 | colors=colors_numint, 134 | ax=axes[idx_plot, idx_data], 135 | label="Reconstruction NumInt", 136 | ) 137 | axes[idx_plot, idx_data] = plot( 138 | dd_innpro[idx], 139 | colors=colors_innpro, 140 | ax=axes[idx_plot, idx_data], 141 | label="Reconstruction InnPro", 142 | ) 143 | axes[idx_plot, idx_data].legend() 144 | plt.show() 145 | -------------------------------------------------------------------------------- /examples/fpca/plot_mfpca_1d_2d.py: -------------------------------------------------------------------------------- 1 | """ 2 | MFPCA of 1- and 2-dimensional data 3 | ================================== 4 | 5 | """ 6 | 7 | 8 | # Author: Steven Golovkine 9 | # License: MIT 10 | 11 | # Load packages 12 | import matplotlib.pyplot as plt 13 | import numpy as np 14 | 15 | from FDApy.representation import DenseArgvals 16 | from FDApy.simulation import KarhunenLoeve 17 | from FDApy.preprocessing import MFPCA 18 | from FDApy.visualization import plot, plot_multivariate 19 | 20 | ############################################################################### 21 | # In this section, we are showing how to perform a multivariate functional principal component analysis on one-dimensional and two-dimensional data using the :class:`~FDApy.preprocessing.MFPCA` class. We will compare two methods to perform the dimension reduction: the decomposition of the covariance operator and the decomposition of the inner-product matrix. We will use :math:`0.9\%` of the variance explained in the data to reconstruct the curves. 22 | 23 | # Set general parameters 24 | rng = 42 25 | n_obs = 50 26 | idx = 5 27 | 28 | # Parameters of the basis 29 | name = ["bsplines", ("fourier", "fourier")] 30 | n_functions = [9, (3, 3)] 31 | argvals = [ 32 | DenseArgvals({"input_dim_0": np.linspace(0, 1, 101)}), 33 | DenseArgvals( 34 | {"input_dim_0": np.linspace(0, 1, 21), "input_dim_1": np.linspace(0, 1, 21)} 35 | ), 36 | ] 37 | 38 | ############################################################################### 39 | # We simulate :math:`N = 50` curves of a 2-dimensional process. The first 40 | # component of the process is defined on the one-dimensional observation grid 41 | # :math:`\{0, 0.01, 0.02, \cdots, 1\}`, based on the first :math:`K = 5` 42 | # B-splines basis functions on :math:`[0, 1]` and the variance of the scores 43 | # random variables equal to :math:`1`. The second component of the 44 | # process is defined on the two-dimensional observation grid 45 | # :math:`\{0, 0.05, 0.1, \cdots, 1\} \times \{0, 0.05, 0.1, \cdots, 1\}`, 46 | # based on the tensor product of the first :math:`K = 5` Fourier 47 | # basis functions on :math:`[0, 1] \times [0, 1]` and the variance of 48 | # the scores random variables equal to :math:`1`. 49 | kl = KarhunenLoeve( 50 | basis_name=name, n_functions=n_functions, argvals=argvals, random_state=rng 51 | ) 52 | kl.new(n_obs=50) 53 | data = kl.data 54 | 55 | _ = plot_multivariate(data) 56 | 57 | 58 | ############################################################################### 59 | # Estimation of the eigencomponents 60 | # --------------------------------- 61 | # 62 | # The :class:`~FDApy.preprocessing.MFPCA` class requires two parameters: the number of components to estimate and the method to use. The method parameter can be either `covariance` or `inner-product`. The first method estimates the eigenfunctions by decomposing the covariance operator, while the second method estimates the eigenfunctions by decomposing the inner-product matrix. In the case of a decomposition of the covariance operator, the method also requires the univariate expansions to estimate the eigenfunctions of each component. Here, we use the univariate functional principal component analysis with penalized splines to estimate the eigenfunctions of the first component and the FCP-TPA to estimate the eigenfunctions of the second component. 63 | 64 | # First, we perform a multivariate FPCA using a decomposition of the covariance operator. 65 | univariate_expansions = [ 66 | {"method": "UFPCA", "n_components": 15, "method_smoothing": "PS"}, 67 | {"method": "FCPTPA", "n_components": 20}, 68 | ] 69 | mfpca_cov = MFPCA( 70 | n_components=0.9, method="covariance", univariate_expansions=univariate_expansions 71 | ) 72 | mfpca_cov.fit(data) 73 | 74 | ############################################################################### 75 | # 76 | 77 | # Second, we perform a multivariate FPCA using a decomposition of the inner-product matrix. 78 | mfpca_innpro = MFPCA(n_components=0.95, method="inner-product") 79 | mfpca_innpro.fit(data) 80 | 81 | 82 | ############################################################################### 83 | # Estimation of the scores 84 | # ------------------------ 85 | # 86 | # Once the eigenfunctions are estimated, we can compute the scores using numerical integration or the eigenvectors from the decomposition of the inner-product matrix. Note that, when using the eigenvectors from the decomposition of the inner-product matrix, new data can not be passed as argument of the :func:`~FDApy.preprocessing.MFPCA.transform` method because the estimation is performed using the eigenvectors of the inner-product matrix. 87 | 88 | scores_cov = mfpca_cov.transform(data, method="NumInt") 89 | scores_innpro = mfpca_innpro.transform(method="InnPro") 90 | 91 | # Plot of the scores 92 | _ = plt.scatter(scores_cov[:, 0], scores_cov[:, 1], label="NumInt") 93 | _ = plt.scatter(scores_innpro[:, 0], scores_innpro[:, 1], label="InnPro") 94 | plt.legend() 95 | plt.show() 96 | 97 | 98 | ############################################################################### 99 | # Comparison of the methods 100 | # ------------------------- 101 | # 102 | # Finally, we compare the methods by reconstructing the curves using :math:`0.9\%` of the variance explained. 103 | data_recons_cov = mfpca_cov.inverse_transform(scores_cov) 104 | data_recons_innpro = mfpca_innpro.inverse_transform(scores_innpro) 105 | 106 | 107 | ############################################################################### 108 | # 109 | indexes = np.random.choice(n_obs, 5) 110 | 111 | colors_numint = np.array([[0.9, 0, 0, 1]]) 112 | colors_pace = np.array([[0, 0.9, 0, 1]]) 113 | colors_innpro = np.array([[0.9, 0, 0.9, 1]]) 114 | 115 | fig, axes = plt.subplots(nrows=5, ncols=4, figsize=(16, 16)) 116 | for idx_plot, idx in enumerate(indexes): 117 | plot(data.data[0][idx], ax=axes[idx_plot, 0], label="True") 118 | plot( 119 | data_recons_cov.data[0][idx], 120 | colors=colors_numint, 121 | ax=axes[idx_plot, 0], 122 | label="Reconstruction NumInt", 123 | ) 124 | plot( 125 | data_recons_innpro.data[0][idx], 126 | colors=colors_innpro, 127 | ax=axes[idx_plot, 0], 128 | label="Reconstruction InnPro", 129 | ) 130 | axes[idx_plot, 0].legend() 131 | 132 | axes[idx_plot, 1] = plot(data.data[1][idx], ax=axes[idx_plot, 1]) 133 | axes[idx_plot, 1].set_title("True") 134 | 135 | axes[idx_plot, 2] = plot(data_recons_cov.data[1][idx], ax=axes[idx_plot, 2]) 136 | axes[idx_plot, 2].set_title("FCPTPA") 137 | 138 | axes[idx_plot, 3] = plot(data_recons_innpro.data[1][idx], ax=axes[idx_plot, 3]) 139 | axes[idx_plot, 3].set_title("InnPro") 140 | 141 | plt.show() 142 | -------------------------------------------------------------------------------- /examples/fpca/plot_mfpca_1d_sparse.py: -------------------------------------------------------------------------------- 1 | """ 2 | MFPCA of 1-dimensional sparse data 3 | ================================== 4 | 5 | """ 6 | 7 | # Author: Steven Golovkine 8 | # License: MIT 9 | 10 | # Load packages 11 | import matplotlib.pyplot as plt 12 | import numpy as np 13 | 14 | from FDApy.representation import DenseArgvals 15 | from FDApy.simulation import KarhunenLoeve 16 | from FDApy.preprocessing import MFPCA 17 | from FDApy.visualization import plot, plot_multivariate 18 | 19 | ############################################################################### 20 | # In this section, we are showing how to perform a multivariate functional principal component analysis on one-dimensional sparse data using the :class:`~FDApy.preprocessing.MFPCA` class. We will compare two methods to perform the dimension reduction: the decomposition of the covariance operator and the decomposition of the inner-product matrix. We will use the first :math:`K = 3` principal components to reconstruct the curves. 21 | 22 | # Set general parameters 23 | rng = 42 24 | n_obs = 50 25 | idx = 5 26 | colors = np.array([[0.5, 0, 0, 1]]) 27 | 28 | 29 | # Parameters of the basis 30 | name = ["bsplines", "fourier"] 31 | n_functions = [5, 5] 32 | argvals = [ 33 | DenseArgvals({"input_dim_0": np.linspace(0, 1, 101)}), 34 | DenseArgvals({"input_dim_0": np.linspace(0, 1, 101)}), 35 | ] 36 | 37 | ############################################################################### 38 | # We simulate :math:`N = 50` curves of a 2-dimensional process. The first component of the process is defined on the one-dimensional observation grid :math:`\{0, 0.01, 0.02, \cdots, 1\}`, based on the first :math:`K = 5` B-splines basis functions on :math:`[0, 1]` and the variance of the scores random variables equal to :math:`1`. The second component of the process is defined on the one-dimensional observation grid :math:`\{0, 0.01, 0.02, \cdots, 1\}`, based on the first :math:`K = 5` Fourier basis functions on :math:`[0, 1]` and the variance of the scores random variables equal to :math:`1`. 39 | 40 | kl = KarhunenLoeve( 41 | basis_name=name, n_functions=n_functions, argvals=argvals, random_state=rng 42 | ) 43 | kl.new(n_obs=n_obs) 44 | kl.add_noise_and_sparsify(noise_variance=0.05, percentage=0.5, epsilon=0.05) 45 | data = kl.sparse_data 46 | 47 | _ = plot_multivariate(data) 48 | 49 | 50 | ############################################################################### 51 | # Estimation of the eigencomponents 52 | # --------------------------------- 53 | # 54 | # The :class:`~FDApy.preprocessing.MFPCA` class requires two parameters: the number of components to estimate and the method to use. The method parameter can be either `covariance` or `inner-product`. The first method estimates the eigenfunctions by decomposing the covariance operator, while the second method estimates the eigenfunctions by decomposing the inner-product matrix. In the case of a decomposition of the covariance operator, the method also requires the univariate expansions to estimate the eigenfunctions of each component. Here, we use the univariate functional principal component analysis with penalized splines to estimate the eigenfunctions of each component. 55 | 56 | univariate_expansions = [ 57 | {"method": "UFPCA", "n_components": 15, "method_smoothing": "PS"}, 58 | {"method": "UFPCA", "n_components": 15, "method_smoothing": "PS"}, 59 | ] 60 | mfpca_cov = MFPCA( 61 | n_components=3, method="covariance", univariate_expansions=univariate_expansions 62 | ) 63 | mfpca_cov.fit(data, method_smoothing="PS") 64 | 65 | # Plot the eigenfunctions using the decomposition of the covariance operator. 66 | _ = plot_multivariate(mfpca_cov.eigenfunctions) 67 | plt.show() 68 | 69 | 70 | ############################################################################### 71 | # 72 | 73 | # Second, we perform a multivariate FPCA using a decomposition of the inner-product matrix. 74 | mfpca_innpro = MFPCA(n_components=3, method="inner-product") 75 | mfpca_innpro.fit(data, method_smoothing="PS") 76 | 77 | # Plot the eigenfunctions using the decomposition of the inner-product matrix. 78 | _ = plot_multivariate(mfpca_innpro.eigenfunctions) 79 | plt.show() 80 | 81 | 82 | ############################################################################### 83 | # Estimation of the scores 84 | # ------------------------ 85 | # 86 | # Once the eigenfunctions are estimated, we can compute the scores using numerical integration or the eigenvectors from the decomposition of the inner-product matrix. Note that, when using the eigenvectors from the decomposition of the inner-product matrix, new data can not be passed as argument of the :func:`~FDApy.preprocessing.MFPCA.transform` method because the estimation is performed using the eigenvectors of the inner-product matrix. 87 | scores_numint = mfpca_cov.transform(data, method="NumInt") 88 | scores_innpro = mfpca_innpro.transform(method="InnPro") 89 | 90 | # Plot of the scores 91 | _ = plt.scatter(scores_numint[:, 0], scores_numint[:, 1], label="NumInt") 92 | _ = plt.scatter(scores_innpro[:, 0], scores_innpro[:, 1], label="InnPro") 93 | plt.legend() 94 | plt.show() 95 | 96 | 97 | ############################################################################### 98 | # Comparison of the methods 99 | # ------------------------- 100 | # 101 | # Finally, we compare the two methods by reconstructing the curves using the scores. We use the first :math:`K = 3` principal components to reconstruct the curves. 102 | data_recons_numint = mfpca_cov.inverse_transform(scores_numint) 103 | data_recons_innpro = mfpca_innpro.inverse_transform(scores_innpro) 104 | 105 | ############################################################################### 106 | # 107 | colors_numint = np.array([[0.9, 0, 0, 1]]) 108 | colors_pace = np.array([[0, 0.9, 0, 1]]) 109 | colors_innpro = np.array([[0.9, 0, 0.9, 1]]) 110 | 111 | fig, axes = plt.subplots(nrows=5, ncols=2, figsize=(16, 16)) 112 | for idx_plot, idx in enumerate(np.random.choice(n_obs, 5)): 113 | for idx_data, (dd, dd_numint, dd_innpro) in enumerate( 114 | zip(kl.data.data, data_recons_numint.data, data_recons_innpro.data) 115 | ): 116 | axes[idx_plot, idx_data] = plot( 117 | dd[idx], ax=axes[idx_plot, idx_data], label="True" 118 | ) 119 | axes[idx_plot, idx_data] = plot( 120 | dd_numint[idx], 121 | colors=colors_numint, 122 | ax=axes[idx_plot, idx_data], 123 | label="Reconstruction NumInt", 124 | ) 125 | axes[idx_plot, idx_data] = plot( 126 | dd_innpro[idx], 127 | colors=colors_innpro, 128 | ax=axes[idx_plot, idx_data], 129 | label="Reconstruction InnPro", 130 | ) 131 | axes[idx_plot, idx_data].legend() 132 | plt.show() 133 | -------------------------------------------------------------------------------- /examples/fpca/plot_mfpca_2d.py: -------------------------------------------------------------------------------- 1 | """ 2 | MFPCA of 2-dimensional data 3 | =========================== 4 | 5 | """ 6 | 7 | # Author: Steven Golovkine 8 | # License: MIT 9 | 10 | # Load packages 11 | import matplotlib.pyplot as plt 12 | import numpy as np 13 | 14 | from FDApy.representation import DenseArgvals 15 | from FDApy.simulation import KarhunenLoeve 16 | from FDApy.preprocessing import MFPCA 17 | from FDApy.visualization import plot, plot_multivariate 18 | 19 | ############################################################################### 20 | # In this section, we are showing how to perform a multivariate functional principal component analysis on two-dimensional data using the :class:`~FDApy.preprocessing.MFPCA` class. We will compare two methods to perform the dimension reduction: the FCP-TPA and the decomposition of the inner-product matrix. We will use the first :math:`K = 5` principal components to reconstruct the curves. 21 | 22 | 23 | # Set general parameters 24 | rng = 42 25 | n_obs = 50 26 | idx = 5 27 | 28 | 29 | # Parameters of the basis 30 | name = [("bsplines", "bsplines"), ("fourier", "fourier")] 31 | n_functions = [(5, 5), (5, 5)] 32 | argvals = [ 33 | DenseArgvals( 34 | {"input_dim_0": np.linspace(0, 1, 21), "input_dim_1": np.linspace(0, 1, 21)} 35 | ), 36 | DenseArgvals( 37 | {"input_dim_0": np.linspace(0, 1, 21), "input_dim_1": np.linspace(0, 1, 21)} 38 | ), 39 | ] 40 | 41 | 42 | ############################################################################### 43 | # We simulate :math:`N = 50` curves of a 2-dimensional process. The first 44 | # component of the process is defined on the two-dimensional observation grid 45 | # :math:`\{0, 0.05, 0.1, \cdots, 1\} \times \{0, 0.05, 0.1, \cdots, 1\}`, 46 | # based on the tensor product of the first :math:`K = 5` B-splines 47 | # basis functions on :math:`[0, 1] \times [0, 1]` and the variance of 48 | # the scores random variables equal to :math:`1`. The second component of the 49 | # process is defined on the two-dimensional observation grid 50 | # :math:`\{0, 0.05, 0.1, \cdots, 1\} \times \{0, 0.05, 0.01, \cdots, 1\}`, 51 | # based on the tensor product of the first :math:`K = 5` Fourier 52 | # basis functions on :math:`[0, 1] \times [0, 1]` and the variance of 53 | # the scores random variables equal to :math:`1`. 54 | kl = KarhunenLoeve( 55 | basis_name=name, n_functions=n_functions, argvals=argvals, random_state=rng 56 | ) 57 | kl.new(n_obs=50) 58 | data = kl.data 59 | 60 | _ = plot_multivariate(data) 61 | 62 | 63 | ############################################################################### 64 | # Estimation of the eigencomponents 65 | # --------------------------------- 66 | # 67 | # The :class:`~FDApy.preprocessing.MFPCA` class requires two parameters: the number of components to estimate and the method to use. The method parameter can be either `covariance` or `inner-product`. The first method estimates the eigenfunctions by decomposing the covariance operator, while the second method estimates the eigenfunctions by decomposing the inner-product matrix. In the case of a decomposition of the covariance operator, the method also requires the univariate expansions to estimate the eigenfunctions of each component. Here, we use the FCP-TPA to estimate the eigenfunctions of each component. 68 | 69 | # First, we perform a multivariate FPCA using a decomposition of the covariance operator. 70 | univariate_expansions = [ 71 | {"method": "FCPTPA", "n_components": 20}, 72 | {"method": "FCPTPA", "n_components": 20}, 73 | ] 74 | mfpca_cov = MFPCA( 75 | n_components=5, method="covariance", univariate_expansions=univariate_expansions 76 | ) 77 | mfpca_cov.fit(data, method_smoothing="PS") 78 | 79 | ############################################################################### 80 | # 81 | 82 | # Second, we perform a multivariate FPCA using a decomposition of the inner-product matrix. 83 | mfpca_innpro = MFPCA(n_components=5, method="inner-product") 84 | mfpca_innpro.fit(data, method_smoothing="PS") 85 | 86 | 87 | ############################################################################### 88 | # Estimation of the scores 89 | # ------------------------ 90 | # 91 | # Once the eigenfunctions are estimated, we can compute the scores using numerical integration or the eigenvectors from the decomposition of the inner-product matrix. Note that, when using the eigenvectors from the decomposition of the inner-product matrix, new data can not be passed as argument of the :func:`~FDApy.preprocessing.MFPCA.transform` method because the estimation is performed using the eigenvectors of the inner-product matrix. 92 | scores_cov = mfpca_cov.transform(data, method="NumInt") 93 | scores_innpro = mfpca_innpro.transform(method="InnPro") 94 | 95 | # Plot of the scores 96 | _ = plt.scatter(scores_cov[:, 0], scores_cov[:, 1], label="FCPTPA") 97 | _ = plt.scatter(scores_innpro[:, 0], scores_innpro[:, 1], label="InnPro") 98 | plt.legend() 99 | plt.show() 100 | 101 | 102 | ############################################################################### 103 | # Comparison of the methods 104 | # ------------------------- 105 | # 106 | # Finally, we compare the reconstruction of the curves using the first :math:`K = 5` principal components. 107 | data_recons_cov = mfpca_cov.inverse_transform(scores_cov) 108 | data_recons_innpro = mfpca_innpro.inverse_transform(scores_innpro) 109 | 110 | 111 | indexes = np.random.choice(n_obs, 5) 112 | # For the first component 113 | fig, axes = plt.subplots(nrows=5, ncols=3, figsize=(16, 16)) 114 | for idx_plot, idx in enumerate(indexes): 115 | axes[idx_plot, 0] = plot(data.data[0][idx], ax=axes[idx_plot, 0]) 116 | axes[idx_plot, 0].set_title("True") 117 | 118 | axes[idx_plot, 1] = plot(data_recons_cov.data[0][idx], ax=axes[idx_plot, 1]) 119 | axes[idx_plot, 1].set_title("FCPTPA") 120 | 121 | axes[idx_plot, 2] = plot(data_recons_innpro.data[0][idx], ax=axes[idx_plot, 2]) 122 | axes[idx_plot, 2].set_title("InnPro") 123 | plt.show() 124 | 125 | # For the second component 126 | fig, axes = plt.subplots(nrows=5, ncols=3, figsize=(16, 16)) 127 | for idx_plot, idx in enumerate(indexes): 128 | axes[idx_plot, 0] = plot(data.data[1][idx], ax=axes[idx_plot, 0]) 129 | axes[idx_plot, 0].set_title("True") 130 | 131 | axes[idx_plot, 1] = plot(data_recons_cov.data[1][idx], ax=axes[idx_plot, 1]) 132 | axes[idx_plot, 1].set_title("FCPTPA") 133 | 134 | axes[idx_plot, 2] = plot(data_recons_innpro.data[1][idx], ax=axes[idx_plot, 2]) 135 | axes[idx_plot, 2].set_title("InnPro") 136 | plt.show() 137 | -------------------------------------------------------------------------------- /examples/misc/README.rst: -------------------------------------------------------------------------------- 1 | .. _misc_examples: 2 | 3 | Miscellaneous 4 | ------------- 5 | 6 | These examples illustrate diverse functionalities of the package that are not directly related to the main modules and to functional data analysis. They could be useful for the user to understand how to use the package in a more general context. 7 | 8 | -------------------------------------------------------------------------------- /examples/misc/plot_local_polynomials_1d.py: -------------------------------------------------------------------------------- 1 | """ 2 | Smoothing of 1D data using local polynomial regression 3 | ====================================================== 4 | 5 | """ 6 | 7 | # Author: Steven Golovkine 8 | # License: MIT 9 | 10 | # Load packages 11 | import matplotlib.pyplot as plt 12 | import numpy as np 13 | 14 | from FDApy.preprocessing import LocalPolynomial 15 | 16 | ############################################################################### 17 | # The package includes a class to perform local polynomial regression. The class :class:`~FDApy.preprocessing.LocalPolynomial` allows to fit a local polynomial regression to a functional data object. Local polynomial regression is a non-parametric method that fits a polynomial to the data in a local neighborhood of each point. 18 | 19 | ############################################################################### 20 | # We will show how to use the class :class:`~FDApy.preprocessing.LocalPolynomial` to smooth a one-dimensional dataset. We will simulate a dataset from a cosine function and add some noise. The goal is to recover the cosine function by fitting a local polynomial regression. The :class:`~FDApy.preprocessing.LocalPolynomial` class requires the specification of the kernel, the bandwidth and the degree of the polynomial. The kernel is used to define the weights of the local regression. Four kernels are implemented: `gaussian`, `epanechnikov`, `tricube` and `bisquare`. The bandwidth is used to define the size of the local neighborhood. The degree of the polynomial is used to define the order of the polynomial to fit. If the degree is set to :math:`0`, the local regression is a local constant regression. If the degree is set to :math:`1`, the local regression is a local linear regression. If the degree is set to :math:`2`, the local regression is a local quadratic regression. 21 | 22 | # Set general parameters 23 | rng = 42 24 | rnorm = np.random.default_rng(rng).standard_normal 25 | n_points = 101 26 | 27 | # Simulate data 28 | x = rnorm(n_points) 29 | y = np.cos(x) + 0.2 * rnorm(n_points) 30 | x_new = np.linspace(-1, 1, 51) 31 | 32 | ############################################################################### 33 | # Here, we are interested in the influence of the degree of the polynomial on the local polynomial regression. We will fit a local polynomial regression with degree :math:`0`, :math:`1` and :math:`2`. The bandwidth is set to :math:`0.5` and the kernel is set to `epanechnikov`. We remark that the local polynomial regression with degree :math:`2` overfits the data, while the local polynomial regression with degree :math:`0` or :math:`1` roughly recover the cosine function. 34 | 35 | # Fit local polynomial regression with degree 0 36 | lp = LocalPolynomial(kernel_name="epanechnikov", bandwidth=0.5, degree=0) 37 | y_pred_0 = lp.predict(y=y, x=x, x_new=x_new) 38 | 39 | # Fit local polynomial regression with degree 1 40 | lp = LocalPolynomial(kernel_name="epanechnikov", bandwidth=0.5, degree=1) 41 | y_pred_1 = lp.predict(y=y, x=x, x_new=x_new) 42 | 43 | # Fit local polynomial regression with degree 2 44 | lp = LocalPolynomial(kernel_name="epanechnikov", bandwidth=0.5, degree=2) 45 | y_pred_2 = lp.predict(y=y, x=x, x_new=x_new) 46 | 47 | # Plot results 48 | plt.scatter(x, y, c="grey", alpha=0.2) 49 | plt.plot(np.sort(x), np.cos(np.sort(x)), c="k", label="True") 50 | plt.plot(x_new, y_pred_0, c="r", label="Degree 0") 51 | plt.plot(x_new, y_pred_1, c="g", label="Degree 1") 52 | plt.plot(x_new, y_pred_2, c="y", label="Degree 2") 53 | plt.legend() 54 | plt.show() 55 | 56 | 57 | ############################################################################### 58 | # Here, we are interested in the influence of the bandwidth on the local polynomial regression. We will fit a local polynomial regression with bandwidth :math:`0.2`, :math:`0.5` and :math:`0.8`. The degree is set to :math:`1` and the kernel is set to `epanechnikov`. We remark that the local polynomial regression with bandwidth :math:`0.2` overfits the data. The better fit is obtained with the local polynomial regression with bandwidth :math:`0.8`. 59 | 60 | # Fit local polynomial regression with bandwidth 0.2 61 | lp = LocalPolynomial(kernel_name="epanechnikov", bandwidth=0.2, degree=1) 62 | y_pred_0 = lp.predict(y=y, x=x, x_new=x_new) 63 | 64 | # Fit local polynomial regression with bandwidth 0.5 65 | lp = LocalPolynomial(kernel_name="epanechnikov", bandwidth=0.5, degree=1) 66 | y_pred_1 = lp.predict(y=y, x=x, x_new=x_new) 67 | 68 | # Fit local polynomial regression with bandwidth 0.8 69 | lp = LocalPolynomial(kernel_name="epanechnikov", bandwidth=0.8, degree=1) 70 | y_pred_2 = lp.predict(y=y, x=x, x_new=x_new) 71 | 72 | # Plot results 73 | plt.scatter(x, y, c="grey", alpha=0.2) 74 | plt.plot(np.sort(x), np.cos(np.sort(x)), c="k", label="True") 75 | plt.plot(x_new, y_pred_0, c="r", label="$\lambda = 0.2$") 76 | plt.plot(x_new, y_pred_1, c="g", label="$\lambda = 0.5$") 77 | plt.plot(x_new, y_pred_2, c="y", label="$\lambda = 0.8$") 78 | plt.legend() 79 | plt.show() 80 | -------------------------------------------------------------------------------- /examples/misc/plot_psplines_1d.py: -------------------------------------------------------------------------------- 1 | """ 2 | Smoothing of 1D data using P-Splines 3 | ==================================== 4 | 5 | """ 6 | 7 | # Author: Steven Golovkine 8 | # License: MIT 9 | 10 | # Load packages 11 | import matplotlib.pyplot as plt 12 | import numpy as np 13 | 14 | from FDApy.preprocessing import PSplines 15 | 16 | ############################################################################### 17 | # The package includes a class to perform P-Splines smoothing. The class :class:`~FDApy.preprocessing.PSplines` allows to fit a P-Splines regression to a functional data object. P-Splines regression is a non-parametric method that fits a spline to the data. The spline is defined by a basis of B-Splines. The B-Splines basis is defined by a set of knots. The P-Splines regression is a penalized regression that adds a discrete constraint to the fit. The influence of the penalty is controlled by the parameter `penalty`. 18 | # 19 | 20 | ############################################################################### 21 | # We will show how to use the class :class:`~FDApy.preprocessing.PSplines` to smooth a one-dimensional dataset. We will simulate a dataset from a cosine function and add some noise. The goal is to recover the cosine function by fitting a P-Splines regression. The :class:`~FDApy.preprocessing.PSplines` class requires the specification of the number of segments and the degree of the B-Splines basis. The number of segments is used to define the number of knots. The degree of the B-Splines basis is used to define the order of the B-Splines basis. If the degree is set to :math:`0`, the B-Splines basis is a set of step functions. If the degree is set to :math:`1`, the B-Splines basis is a set of piecewise linear functions. If the degree is set to :math:`2`, the B-Splines basis is a set of piecewise quadratic functions. To fit the model, the method :meth:`~FDApy.preprocessing.PSplines.fit` requires the data and the penalty. 22 | # 23 | 24 | # Set general parameters 25 | rng = 42 26 | rnorm = np.random.default_rng(rng).standard_normal 27 | n_points = 101 28 | 29 | # Simulate data 30 | x = np.sort(rnorm(n_points)) 31 | y = np.cos(x) + 0.2 * rnorm(n_points) 32 | x_new = np.linspace(-2, 2, 51) 33 | 34 | ############################################################################### 35 | # Here, we are interested in the influence of the degree of the B-Splines basis on the P-Splines regression. We will fit a P-Splines regression with degree :math:`0`, :math:`1` and :math:`2`. The number of segments is set to :math:`20` and the penalty is set to :math:`5`. We remark that the P-Splines regression with degree :math:`0` is not a good fit to data, while the P-Splines regression with degree :math:`1` or :math:`2` are roughly similar and recover the cosine function. 36 | # 37 | 38 | # Fit P-Splines regression with degree 0 39 | ps = PSplines(n_segments=20, degree=0) 40 | ps.fit(y, x, penalty=5) 41 | y_pred_0 = ps.predict(x_new) 42 | 43 | # Fit P-Splines regression with degree 1 44 | ps = PSplines(n_segments=20, degree=1) 45 | ps.fit(y, x, penalty=5) 46 | y_pred_1 = ps.predict(x_new) 47 | 48 | # Fit P-Splines regression with degree 2 49 | ps = PSplines(n_segments=20, degree=2) 50 | ps.fit(y, x, penalty=5) 51 | y_pred_2 = ps.predict(x_new) 52 | 53 | # Plot results 54 | plt.scatter(x, y, c="grey", alpha=0.2) 55 | plt.plot(np.sort(x), np.cos(np.sort(x)), c="k", label="True") 56 | plt.plot(x_new, y_pred_0, c="r", label="Degree 0") 57 | plt.plot(x_new, y_pred_1, c="g", label="Degree 1") 58 | plt.plot(x_new, y_pred_2, c="y", label="Degree 2") 59 | plt.legend() 60 | plt.show() 61 | 62 | ############################################################################### 63 | # Here, we are interested in the influence of the penalty on the P-Splines regression. We will fit a P-Splines regression with penalty :math:`10`, :math:`1` and :math:`0.1`. The number of segments is set to :math:`20` and the degree is set to :math:`3`. The better fit is obtained with the P-Splines regression with penalty :math:`10`. 64 | # 65 | 66 | # Fit P-Splines regression with penalty=10 67 | ps = PSplines(n_segments=20, degree=3) 68 | ps.fit(y, x, penalty=10) 69 | y_pred_0 = ps.predict(x_new) 70 | 71 | # Fit P-Splines regression with penalty=1 72 | ps = PSplines(n_segments=20, degree=3) 73 | ps.fit(y, x, penalty=1) 74 | y_pred_1 = ps.predict(x_new) 75 | 76 | # Fit P-Splines regression with penalty=0.1 77 | ps = PSplines(n_segments=20, degree=3) 78 | ps.fit(y, x, penalty=0.1) 79 | y_pred_2 = ps.predict(x_new) 80 | 81 | # Plot results 82 | plt.scatter(x, y, c="grey", alpha=0.2) 83 | plt.plot(np.sort(x), np.cos(np.sort(x)), c="k", label="True") 84 | plt.plot(x_new, y_pred_0, c="r", label="$\lambda = 10$") 85 | plt.plot(x_new, y_pred_1, c="g", label="$\lambda = 1$") 86 | plt.plot(x_new, y_pred_2, c="y", label="$\lambda = 0.1$") 87 | plt.legend() 88 | plt.show() 89 | -------------------------------------------------------------------------------- /examples/representation/README.rst: -------------------------------------------------------------------------------- 1 | .. _representation_examples: 2 | 3 | Representation 4 | -------------- 5 | 6 | These examples illustrate the use of the :mod:`FDApy.representation` module. It contains various functionalities to represent functional data in different formats. 7 | -------------------------------------------------------------------------------- /examples/representation/plot_basis_functional.py: -------------------------------------------------------------------------------- 1 | """ 2 | Representation of functional data using a basis 3 | =============================================== 4 | 5 | """ 6 | 7 | # Author: Steven Golovkine 8 | # License: MIT 9 | 10 | # Load packages 11 | import matplotlib.pyplot as plt 12 | import numpy as np 13 | 14 | from FDApy.representation import Basis, BasisFunctionalData 15 | from FDApy.representation import DenseArgvals 16 | from FDApy.visualization import plot 17 | 18 | ############################################################################### 19 | # In this section, we are showing the building blocks of the representation of functional data using a basis. To define a :class:`~FDApy.representation.BasisFunctionalData` object, we need a :class:`~FDApy.representation.Basis` object and a set of coefficients. The basis object contains the information about the basis functions and the argvals. The coefficients are the weights of the basis functions. The basis functions are evaluated at the argvals and multiplied by the coefficients to obtain the functional data. 20 | 21 | 22 | ############################################################################### 23 | # For unidimensional functional data 24 | # ---------------------------------- 25 | # First, we will consider unidimensional functional data. We represent a functional data using a Fourier basis. The coefficients are drawn from a normal distribution. 26 | 27 | n_functions = 5 28 | argvals = DenseArgvals({"input_dim_0": np.linspace(0, 1, 101)}) 29 | 30 | basis = Basis(name="fourier", n_functions=n_functions, argvals=argvals) 31 | coefs = np.random.normal(size=(3, n_functions)) 32 | 33 | data = BasisFunctionalData(basis=basis, coefficients=coefs) 34 | 35 | plot(data) 36 | plt.show() 37 | 38 | 39 | ############################################################################### 40 | # For two-dimensional functional data 41 | # ----------------------------------- 42 | # Second, we will consider two-dimensional functional data. We represent a functional data using a tensor product of two Fourier basis. The coefficients are drawn from a normal distribution. 43 | 44 | name = ("fourier", "fourier") 45 | n_functions = (5, 5) 46 | argvals = DenseArgvals( 47 | {"input_dim_0": np.linspace(0, 1, 101), "input_dim_1": np.linspace(0, 1, 51)} 48 | ) 49 | 50 | basis = Basis(name=name, n_functions=n_functions, argvals=argvals) 51 | coefs = np.random.normal(size=(3, np.prod(n_functions))) 52 | 53 | data_2d = BasisFunctionalData(basis=basis, coefficients=coefs) 54 | 55 | plot(data_2d) 56 | plt.show() 57 | -------------------------------------------------------------------------------- /examples/representation/plot_dense_functional.py: -------------------------------------------------------------------------------- 1 | """ 2 | Representation of univariate and dense functional data 3 | ====================================================== 4 | 5 | """ 6 | 7 | # Author: Steven Golovkine 8 | # License: MIT 9 | 10 | # Load packages 11 | import numpy as np 12 | 13 | from FDApy import DenseFunctionalData 14 | from FDApy.representation import DenseArgvals, DenseValues 15 | from FDApy.visualization import plot 16 | 17 | ############################################################################### 18 | # In this section, we are showing the building blocks of the representation of univariate and dense functional data. To define a :class:`~FDApy.representation.DenseFunctionalData` object, we need a set of :class:`~FDApy.representation.DenseArgvals` (the sampling points of the curves) and a set of :class:`~FDApy.representation.DenseValues` (the observed points of the curves). The sampling points of the functional data are defined as a dictionary where each entry is a one-dimensional numpy :class:`~numpy.ndarray` that represents an input dimension (one entry corresponds to curves, two entries correspond to surface, ...). The shape of the array of the first dimension would be :math:`(m_1,)`, the shape of the array of the second dimension would be :math:`(m_2,)` and so on. Curves will thus be sampled on :math:`m_1` points, surface will be sampled on :math:`m_1 \times m_2`, etc. The values of the functional data are defined as an :class:`~numpy.ndarray`. The shape of the array is :math:`(n, m_1, m_2, \dots)` where :math:`n` is the number of curves in the sample. 19 | 20 | ############################################################################### 21 | # For unidimensional functional data 22 | # ---------------------------------- 23 | # First, we will consider unidimensional dense functional data. We represent two observations of a functional data regularly sampled on a hundred points between :math:`0` and :math:`\pi`. The shape of the array of the values is :math:`(2, 100)`. The first dimension corresponds to the number of curves and the second dimension corresponds to the input dimension. 24 | 25 | argvals = np.linspace(0, np.pi, num=100) 26 | X = np.array([np.sin(2 * np.pi * argvals), np.cos(2 * np.pi * argvals)]) 27 | 28 | fdata = DenseFunctionalData( 29 | argvals=DenseArgvals({"input_dim_0": argvals}), values=DenseValues(X) 30 | ) 31 | 32 | _ = plot(fdata) 33 | 34 | 35 | ############################################################################### 36 | # For two-dimensional functional data 37 | # ----------------------------------- 38 | # Second, we will consider two-dimensional dense functional data. We represent two observations of a functional data regularly sampled on a hundred points between :math:`0` and :math:`\pi` for each dimension. The shape of the array of the values is :math:`(2, 100, 100)`. The first dimension corresponds to the number of curves, the second dimension corresponds to the first input dimension and the third dimension corresponds to the second input dimension. 39 | 40 | argvals = np.linspace(0, np.pi, num=100) 41 | X = np.array( 42 | [ 43 | np.outer(np.sin(argvals), np.cos(argvals)), 44 | np.outer(np.sin(-argvals), np.cos(argvals)), 45 | ] 46 | ) 47 | 48 | fdata = DenseFunctionalData( 49 | argvals=DenseArgvals({"input_dim_0": argvals, "input_dim_1": argvals}), 50 | values=DenseValues(X), 51 | ) 52 | 53 | _ = plot(fdata) 54 | 55 | ############################################################################### 56 | # For higher-dimensional functional data 57 | # -------------------------------------- 58 | # It is possible to define functional data with more than two dimensions. All you have to do is to add more entries in the dictionary of the argvals and another dimension in the values array. However, no plotting function is available for data with more than two dimensions. 59 | -------------------------------------------------------------------------------- /examples/representation/plot_irregular_functional.py: -------------------------------------------------------------------------------- 1 | """ 2 | Representation of univariate and irregular functional data 3 | ========================================================== 4 | 5 | """ 6 | 7 | # Author: Steven Golovkine 8 | # License: MIT 9 | 10 | # Load packages 11 | import numpy as np 12 | 13 | from FDApy import IrregularFunctionalData 14 | from FDApy.representation import DenseArgvals, IrregularArgvals, IrregularValues 15 | from FDApy.visualization import plot 16 | 17 | ############################################################################### 18 | # In this section, we are showing the building blocks of the representation of univariate and irregular functional data. To define a :class:`~FDApy.representation.IrregularFunctionalData` object, we need a set of :class:`~FDApy.representation.IrregularArgvals` (the sampling points of the curves) and a set of :class:`~FDApy.representation.IrregularValues` (the observed points of the curves). The sampling points of the data are defined as a dictionary where each entry corresponds to an observation. Each entry of the dictionary corresponds to the sampling points of one observation and is represented as a :class:`~FDApy.representation.DenseArgvals`. The values of the functional data are defined in a dictionary where each entry represents an observation as an :class:`~numpy.ndarray`. Each entry should have the same dimension has the corresponding entry in the :class:`~FDApy.representation.IrregularArgvals` dictionary. 19 | 20 | ############################################################################### 21 | # For unidimensional functional data 22 | # ---------------------------------- 23 | # First, we will define unidimensional irregular functional data. We represent two observations of a functional data irregularly sampled on a set of points. The first observation in sampled on :math:`20` points between :math:`0` and :math:`1` and the second observation is sampled on :math:`15` points between :math:`0.2` and :math:`0.8`. The values of the functional data is a dictionary where each entry corresponds to the observed values of one observation. 24 | 25 | argvals = IrregularArgvals( 26 | { 27 | 0: DenseArgvals({"input_dim_0": np.linspace(0, 1, num=20)}), 28 | 1: DenseArgvals({"input_dim_0": np.linspace(0.2, 0.8, num=15)}), 29 | } 30 | ) 31 | X = IrregularValues( 32 | { 33 | 0: np.sin(2 * np.pi * argvals[0]["input_dim_0"]), 34 | 1: np.cos(2 * np.pi * argvals[1]["input_dim_0"]), 35 | } 36 | ) 37 | 38 | fdata = IrregularFunctionalData(argvals=argvals, values=X) 39 | 40 | _ = plot(fdata) 41 | 42 | 43 | ############################################################################### 44 | # For two-dimensional functional data 45 | # ----------------------------------- 46 | # Second, we will consider two-dimensional functional data where the observations are not sampled on the same grid. We represent two observations of a functional data irregularly sampled on a set of points. The first observation is sampled on a grid of :math:`20 \times 20` sampling points and the second observation is sampled on a grid of :math:`15 \times 15` sampling points. The values of the functional data is a dictionary where each entry corresponds to the observed values of one observation. 47 | 48 | argvals = IrregularArgvals( 49 | { 50 | 0: DenseArgvals( 51 | { 52 | "input_dim_0": np.linspace(0, 1, num=20), 53 | "input_dim_1": np.linspace(0, 1, num=20), 54 | } 55 | ), 56 | 1: DenseArgvals( 57 | { 58 | "input_dim_0": np.linspace(0.2, 0.8, num=15), 59 | "input_dim_1": np.linspace(0.2, 0.8, num=15), 60 | } 61 | ), 62 | } 63 | ) 64 | X = IrregularValues( 65 | { 66 | 0: np.outer( 67 | np.sin(argvals[0]["input_dim_0"]), np.cos(argvals[0]["input_dim_1"]) 68 | ), 69 | 1: np.outer( 70 | np.sin(-argvals[1]["input_dim_0"]), np.cos(argvals[1]["input_dim_1"]) 71 | ), 72 | } 73 | ) 74 | 75 | fdata = IrregularFunctionalData(argvals=argvals, values=X) 76 | 77 | _ = plot(fdata) 78 | 79 | ############################################################################### 80 | # For higher-dimensional functional data 81 | # -------------------------------------- 82 | # It is possible to define functional data with more than two dimensions. All you have to do is to add more dimension in the argvals and another dimension in the values array. However, no plotting function is available for data with more than two dimensions. 83 | -------------------------------------------------------------------------------- /examples/representation/plot_multivariate_functional.py: -------------------------------------------------------------------------------- 1 | """ 2 | Representation of multivariate functional data 3 | ============================================== 4 | 5 | """ 6 | 7 | # Author: Steven Golovkine 8 | # License: MIT 9 | 10 | # Load packages 11 | import numpy as np 12 | 13 | from FDApy import ( 14 | DenseFunctionalData, 15 | IrregularFunctionalData, 16 | MultivariateFunctionalData, 17 | ) 18 | from FDApy.representation import DenseArgvals, IrregularArgvals 19 | from FDApy.representation import DenseValues, IrregularValues 20 | from FDApy.visualization import plot_multivariate 21 | 22 | 23 | ############################################################################### 24 | # Multivariate functional data are defined as a list of univariate functional data and are represented with a :class:`~FDApy.representation.MultivariateFunctionalData` object. The univariate functional data can be of whatever dimension (curves, surfaces, ...) and dense, irregular or defined using a basis of function. There is no restriction on the number of elements in the list but each univariate element must have the same number of observations. It is possible to mix unidimensional and multidimensional functional data in the same list. 25 | 26 | ############################################################################### 27 | # First example 28 | # ------------- 29 | # First, we will define two univariate unidimensional dense functional data. Creating a list of these two objects, we can define a :class:`~FDApy.representation.MultivariateFunctionalData` object. We consider two observations of the multivariate functional data. The first feature is sampled on a hundred points between :math:`0` and :math:`\pi` and the second feature is sampled on fifty points between :math:`0` and :math:`1`. 30 | 31 | argvals = np.linspace(0, np.pi, num=100) 32 | X = np.array([np.sin(2 * np.pi * argvals), np.cos(2 * np.pi * argvals)]) 33 | fdata_first = DenseFunctionalData( 34 | argvals=DenseArgvals({"input_dim_0": argvals}), values=DenseValues(X) 35 | ) 36 | 37 | argvals = np.linspace(0, 1, num=50) 38 | X = np.array([np.exp(-argvals), np.log(1 + argvals)]) 39 | fdata_second = DenseFunctionalData( 40 | argvals=DenseArgvals({"input_dim_0": argvals}), values=DenseValues(X) 41 | ) 42 | 43 | fdata = MultivariateFunctionalData([fdata_first, fdata_second]) 44 | 45 | _ = plot_multivariate(fdata) 46 | 47 | 48 | ############################################################################### 49 | # Second exmaple 50 | # -------------- 51 | # Second, we will define a multivariate functional data with one univariate dense functional data and one univariate irregular functional data. Both univariate functional data are two-dimensional. We consider two observations of the multivariate functional data. For the first feature, the first observation is sampled on a grid of :math:`20 \times 20` sampling points and the second observation is sampled on a grid of :math:`15 \times 15` sampling points. For the second feature, the observations are sampled on a hundred points between :math:`0` and :math:`\pi` for each dimension. 52 | 53 | argvals = IrregularArgvals( 54 | { 55 | 0: DenseArgvals( 56 | { 57 | "input_dim_0": np.linspace(0, 1, num=20), 58 | "input_dim_1": np.linspace(0, 1, num=20), 59 | } 60 | ), 61 | 1: DenseArgvals( 62 | { 63 | "input_dim_0": np.linspace(0.2, 0.8, num=15), 64 | "input_dim_1": np.linspace(0.2, 0.8, num=15), 65 | } 66 | ), 67 | } 68 | ) 69 | X = IrregularValues( 70 | { 71 | 0: np.outer( 72 | np.sin(argvals[0]["input_dim_0"]), np.cos(argvals[0]["input_dim_1"]) 73 | ), 74 | 1: np.outer( 75 | np.sin(-argvals[1]["input_dim_0"]), np.cos(argvals[1]["input_dim_1"]) 76 | ), 77 | } 78 | ) 79 | 80 | fdata_first = IrregularFunctionalData(argvals=argvals, values=X) 81 | 82 | argvals = np.linspace(0, np.pi, num=100) 83 | X = np.array( 84 | [ 85 | np.outer(np.sin(argvals), np.cos(argvals)), 86 | np.outer(np.sin(-argvals), np.cos(argvals)), 87 | ] 88 | ) 89 | fdata_second = DenseFunctionalData( 90 | argvals=DenseArgvals({"input_dim_0": argvals, "input_dim_1": argvals}), 91 | values=DenseValues(X), 92 | ) 93 | 94 | fdata = MultivariateFunctionalData([fdata_first, fdata_second]) 95 | 96 | _ = plot_multivariate(fdata) 97 | -------------------------------------------------------------------------------- /examples/simulation/README.rst: -------------------------------------------------------------------------------- 1 | .. _simulation_examples: 2 | 3 | Simulation 4 | ---------- 5 | 6 | These examples illustrate the use of the :mod:`FDApy.simulation` module. It contains various functionalities to simulate functional data. -------------------------------------------------------------------------------- /examples/simulation/plot_brownian.py: -------------------------------------------------------------------------------- 1 | """ 2 | Simulation of Brownian motion 3 | ============================= 4 | 5 | """ 6 | 7 | # Author: Steven Golovkine 8 | # License: MIT 9 | 10 | # Load packages 11 | import numpy as np 12 | 13 | from FDApy.simulation import Brownian 14 | from FDApy.visualization import plot 15 | 16 | ############################################################################### 17 | # The package provides a class to simulate different types of Brownian motion using the following classe: :class:`~FDApy.simulation.Brownian`. The type of Brownian motion can be standard, geometric or fractional. 18 | 19 | 20 | # Set general parameters 21 | rng = 42 22 | n_obs = 10 23 | argvals = np.arange(0, 1.01, 0.01) 24 | 25 | # Set Brownian parameters 26 | init_point = 1.0 27 | mu = 1.0 28 | sigma = 0.5 29 | hurst = 0.8 30 | 31 | ############################################################################### 32 | # Standard Brownian motion 33 | # ------------------------ 34 | # 35 | # A standard Brownian motion is a stochastic process define as 36 | # :math:`\{X_t\}_{t \geq 0}`. The process has the following properties: 37 | # 38 | # * :math:`\{X_t\}_{t \geq 0}` is a Gaussian process. 39 | # 40 | # * For :math:`s, t \geq 0`, :math:`\mathbb{E}(X_t) = 0` and :math:`\mathbb{E}(X_sX_t) = \min(s, t)`. 41 | # 42 | # * The function :math:`t \rightarrow X_t` is continuous with probablity :math:`1`. 43 | # 44 | # To simulate a standard Brownian motion, you can use the following code: 45 | br = Brownian(name="standard", random_state=rng) 46 | br.new(n_obs=n_obs, argvals=argvals, init_point=init_point) 47 | 48 | _ = plot(br.data) 49 | 50 | 51 | ############################################################################### 52 | # Geometric Brownian motion 53 | # ------------------------- 54 | # 55 | # A geometric Brownian motion is a stochastic process :math:`\{X_t\}_{t \geq 0}` 56 | # in which the logarithm of the randomly varying quantity is a Brownian motion 57 | # with drift. 58 | # 59 | # The process :math:`\{X_t\}_{t \geq 0}` satisfies the following stochastic 60 | # differential equation: 61 | # 62 | # .. math:: 63 | # dX_t = \mu X_t dt + \sigma X_t dW_t 64 | # 65 | # where :math:`\{W_t\}_{t \geq 0}` is a Brownian motion, :math:`\mu` is the 66 | # percentage drift and :math:`\sigma` is the percentage volatility. 67 | # 68 | # To simulate a geometric Brownian motion, you can use the following code: 69 | br = Brownian(name="geometric", random_state=rng) 70 | br.new(n_obs=n_obs, argvals=argvals, init_point=init_point, mu=mu, sigma=sigma) 71 | 72 | _ = plot(br.data) 73 | 74 | 75 | ############################################################################### 76 | # Fractional Brownian motion 77 | # -------------------------- 78 | # 79 | # A fractional Brownian motion is a stochastic process 80 | # :math:`\{X_t\}_{t \geq 0}` that generalize Brownian motion. Let 81 | # :math:`H \in (0, 1)` be the Hurst parameter. The process has the following 82 | # properties: 83 | # 84 | # * :math:`\{X_t\}_{t \geq 0}` is a Gaussian process. 85 | # 86 | # * For :math:`s, t \geq 0`, :math:`\mathbb{E}(X_t) = 0` and :math:`\mathbb{E}(X_sX_t) = \frac{1}{2}\left(|s|^{2H} + |t|^{2H} - |s - t|^{2H}\right)`. 87 | # 88 | # * The function :math:`t \rightarrow X_t` is continuous with probablity :math:`1`. 89 | # 90 | # The value of :math:`H` defines the process. If :math:`H = 1/2`, :math:`\{X_t\} 91 | # _{t \geq 0}` is a Brownian motion. If :math:`H > 1/2`, the increments of 92 | # :math:`\{X_t\}_{t \geq 0}` are positively correlated. If :math:`H < 1/2`, the 93 | # increments of :math:`\{X_t\}_{t \geq 0}` are negatively correlated. 94 | # 95 | # To simulate a fractional Brownian motion, you can use the following code: 96 | br = Brownian(name="fractional", random_state=rng) 97 | br.new(n_obs=n_obs, argvals=argvals, hurst=hurst) 98 | 99 | _ = plot(br.data) 100 | -------------------------------------------------------------------------------- /examples/simulation/plot_cluster.py: -------------------------------------------------------------------------------- 1 | """ 2 | Simulation of clusters of univariate functional data 3 | ==================================================== 4 | 5 | """ 6 | 7 | # Author: Steven Golovkine 8 | # License: MIT 9 | 10 | # Load packages 11 | import numpy as np 12 | 13 | from FDApy.representation import DenseArgvals 14 | from FDApy.simulation import KarhunenLoeve 15 | from FDApy.visualization import plot 16 | 17 | ############################################################################### 18 | # The package provides a class to simulate clusters of univariate functional data based on the Karhunen-Loève decomposition. The class :class:`~FDApy.simulation.KarhunenLoeve` allows to simulate functional data based on the truncated Karhunen-Loève representation of a functional process. 19 | 20 | ############################################################################### 21 | # We simulate :math:`N = 20` curves on the one-dimensional observation grid :math:`\{0, 0.01, 0.02, \cdots, 1\}`, based on the first :math:`K = 25` Fourier basis functions on :math:`[0, 1]`. The clusters are defined through the coefficients in the Karhunen-Loève decomposition and parametrize using the `centers` parameter. The centers of the clusters are generated as Gaussian random variables with parameters defined by a `mean` and a `covariance`. We also consider an exponential decreasing of the eigenvalues. 22 | 23 | 24 | # Set general parameters 25 | rng = 42 26 | n_obs = 20 27 | 28 | # Define the random state 29 | random_state = np.random.default_rng(rng) 30 | 31 | # Parameters of the basis 32 | name = "fourier" 33 | n_functions = 25 34 | argvals = DenseArgvals({"input_dim_0": np.linspace(0, 1, 101)}) 35 | 36 | # Parameters of the clusters 37 | n_clusters = 2 38 | mean = np.array([0, 0]) 39 | covariance = np.array([[1, -0.6], [-0.6, 1]]) 40 | centers = random_state.multivariate_normal(mean, covariance, size=n_functions) 41 | 42 | kl = KarhunenLoeve( 43 | basis_name=name, argvals=argvals, n_functions=n_functions, random_state=rng 44 | ) 45 | kl.new(n_obs=n_obs, n_clusters=n_clusters, centers=centers, cluster_std="exponential") 46 | 47 | _ = plot(kl.data, kl.labels) 48 | -------------------------------------------------------------------------------- /examples/simulation/plot_cluster_multivariate.py: -------------------------------------------------------------------------------- 1 | """ 2 | Simulation of clusters of multivariate functional data 3 | ====================================================== 4 | 5 | """ 6 | 7 | # Author: Steven Golovkine 8 | # License: MIT 9 | 10 | # Load packages 11 | import numpy as np 12 | 13 | from FDApy.representation import DenseArgvals 14 | from FDApy.simulation import KarhunenLoeve 15 | from FDApy.visualization import plot_multivariate 16 | 17 | ############################################################################### 18 | # Similarly to the univariate case, the package provides a class to simulate clusters of multivariate functional data based on the Karhunen-Loève decomposition. The class :class:`~FDApy.simulation.KarhunenLoeve` allows to simulate functional data based on the truncated Karhunen-Loève representation of a functional process. 19 | 20 | # Set general parameters 21 | rng = 42 22 | n_obs = 20 23 | 24 | # Define the random state 25 | random_state = np.random.default_rng(rng) 26 | 27 | # Parameters of the basis 28 | name = ["fourier", "wiener"] 29 | n_functions = [5, 5] 30 | argvals = [ 31 | DenseArgvals({"input_dim_0": np.linspace(0, 1, 101)}), 32 | DenseArgvals({"input_dim_0": np.linspace(0, 1, 101)}), 33 | ] 34 | 35 | # Parameters of the clusters 36 | n_clusters = 2 37 | mean = np.array([0, 0]) 38 | covariance = np.array([[1, -0.6], [-0.6, 1]]) 39 | centers = random_state.multivariate_normal(mean, covariance, size=n_functions[0]) 40 | 41 | ############################################################################### 42 | # We simulate :math:`N = 20` curves of a multivariate process. The first component of the process is defined on the one-dimensional observation grid :math:`\{0, 0.01, 0.02, \cdots, 1\}`, based on the first :math:`K = 5` Fourier basis functions on :math:`[0, 1]` and the decreasing of the variance of the scores is exponential. The second component of the process is defined on the one-dimensional observation grid :math:`\{0, 0.01, 0.02, \cdots, 1\}`, based on the first :math:`K = 5` Wiener basis functions on :math:`[0, 1]` and the decreasing of the variance of the scores is exponential. The clusters are defined through the coefficients in the Karhunen-Loève decomposition. The centers of the clusters are generated as Gaussian random variables with parameters defined by `mean` and `covariance`. 43 | 44 | kl = KarhunenLoeve( 45 | basis_name=name, argvals=argvals, n_functions=n_functions, random_state=rng 46 | ) 47 | kl.new(n_obs=n_obs, n_clusters=n_clusters, centers=centers, clusters_std="exponential") 48 | 49 | _ = plot_multivariate(kl.data, kl.labels) 50 | -------------------------------------------------------------------------------- /examples/simulation/plot_karhunen.py: -------------------------------------------------------------------------------- 1 | """ 2 | Simulation using Karhunen-Loève decomposition 3 | ============================================= 4 | 5 | """ 6 | 7 | # Author: Steven Golovkine 8 | # License: MIT 9 | 10 | # Load packages 11 | import numpy as np 12 | 13 | from FDApy.representation import DenseArgvals 14 | from FDApy.simulation import KarhunenLoeve 15 | from FDApy.visualization import plot 16 | 17 | ############################################################################### 18 | # The simulation of univariate functional data 19 | # :math:`X: \mathcal{T} \rightarrow \mathbb{R}` is based on the truncated 20 | # Karhunen-Loève representation of :math:`X`. Consider that the representation 21 | # is truncated at :math:`K` components, then, for a particular realization 22 | # :math:`i` of the process :math:`X`: 23 | # 24 | # .. math:: 25 | # X_i(t) = \mu(t) + \sum_{k = 1}^K c_{i, k}\phi_k(t), 26 | # \quad t \in \mathcal{T} 27 | # 28 | # with a common mean function :math:`\mu(t)` and eigenfunctions 29 | # :math:`\phi_k, k = 1, \cdots, K`. The scores :math:`c_{i, k}` are the 30 | # projection of the curves :math:`X_i` onto the eigenfunctions :math:`\phi_k`. 31 | # These scores are random variables with mean :math:`0` and variance 32 | # :math:`\lambda_k`, which are the eigenvalues associated to each 33 | # eigenfunctions and that decreases toward :math:`0` when :math:`k` goes to 34 | # infinity. This representation is valid for domains of arbitrary dimension, 35 | # such as images (:math:`\mathcal{T} = \mathbb{R}^2`). 36 | 37 | 38 | # Set general parameters 39 | rng = 42 40 | n_obs = 10 41 | 42 | # Parameters of the basis 43 | name = "fourier" 44 | n_functions = 25 45 | argvals = DenseArgvals({"input_dim_0": np.arange(0, 10.01, 0.01)}) 46 | 47 | ############################################################################### 48 | # Simulation for one-dimensional curve 49 | # ------------------------------------ 50 | # 51 | # **First example** 52 | # --- 53 | # We simulate :math:`N = 10` curves on the one-dimensional observation grid 54 | # :math:`\{0, 0.1, 0.2, \cdots, 1\}` (default), based on the first 55 | # :math:`K = 25` Fourier basis functions on :math:`[0, 1]` and the variance of 56 | # the scores random variables equal to :math:`1` (default). 57 | kl = KarhunenLoeve( 58 | basis_name=name, n_functions=n_functions, random_state=rng 59 | ) 60 | kl.new(n_obs=n_obs) 61 | 62 | _ = plot(kl.data) 63 | 64 | ############################################################################### 65 | # **Second example** 66 | # --- 67 | # We simulate :math:`N = 10` curves on the one-dimensional observation grid 68 | # :math:`\{0, 0.01, 0.02, \cdots, 10\}`, based on the first 69 | # :math:`K = 25` Fourier basis functions on :math:`[0, 10]` and the variance of 70 | # the scores random variables equal to :math:`1` (default). 71 | kl = KarhunenLoeve( 72 | basis_name=name, argvals=argvals, n_functions=n_functions, random_state=rng 73 | ) 74 | kl.new(n_obs=n_obs) 75 | 76 | _ = plot(kl.data) 77 | 78 | ############################################################################### 79 | # **Third example** 80 | # --- 81 | # We simulate :math:`N = 10` curves on the one-dimensional observation grid 82 | # :math:`\{0, 0.01, 0.02, \cdots, 10\}` (default), based on the first 83 | # :math:`K = 25` Fourier basis functions on :math:`[0, 1]` and the decreasing 84 | # of the variance of the scores is exponential. 85 | kl = KarhunenLoeve( 86 | basis_name=name, argvals=argvals, n_functions=n_functions, random_state=rng 87 | ) 88 | kl.new(n_obs=n_obs, clusters_std="exponential") 89 | 90 | _ = plot(kl.data) 91 | 92 | ############################################################################### 93 | # Simulation for two-dimensional curve (image) 94 | # -------------------------------------------- 95 | # 96 | # For the simulation on a two-dimensional domain, we construct an 97 | # two-dimensional eigenbasis based on tensor products of univariate eigenbasis. 98 | # 99 | # **First example** 100 | # --- 101 | # We simulate :math:`N = 1` image on the two-dimensional observation grid 102 | # :math:`\{0, 0.01, 0.02, \cdots, 10\} \times \{0, 0.01, 0.02, \cdots, 10\}` 103 | # (default), based on the tensor product of the first :math:`K = 25` Fourier 104 | # basis functions on :math:`[0, 10] \times [0, 10]` and the variance of 105 | # the scores random variables equal to :math:`1` (default). 106 | 107 | # Parameters of the basis 108 | name = ("fourier", "fourier") 109 | n_functions = (5, 5) 110 | argvals = DenseArgvals( 111 | {"input_dim_0": np.arange(0, 10.01, 0.01), "input_dim_1": np.arange(0, 10.01, 0.01)} 112 | ) 113 | 114 | kl = KarhunenLoeve( 115 | basis_name=name, argvals=argvals, n_functions=n_functions, random_state=rng 116 | ) 117 | kl.new(n_obs=1) 118 | 119 | _ = plot(kl.data) 120 | 121 | ############################################################################### 122 | # **Second example** 123 | # --- 124 | # We simulate :math:`N = 1` image on the two-dimensional observation grid 125 | # :math:`\{0, 0.01, 0.02, \cdots, 1\} \times \{0, 0.01, 0.02, \cdots, 1\}` 126 | # (default), based on the tensor product of the first :math:`K = 25` Fourier 127 | # basis functions on :math:`[0, 1] \times [0, 1]` and the decreasing 128 | # of the variance of the scores is linear. 129 | kl = KarhunenLoeve( 130 | basis_name=name, argvals=argvals, n_functions=n_functions, random_state=rng 131 | ) 132 | kl.new(n_obs=1, clusters_std="linear") 133 | 134 | _ = plot(kl.data) 135 | -------------------------------------------------------------------------------- /examples/simulation/plot_karhunen_multivariate.py: -------------------------------------------------------------------------------- 1 | """ 2 | Simulation using multivariate Karhunen-Loève decomposition 3 | ========================================================== 4 | 5 | """ 6 | 7 | # Author: Steven Golovkine 8 | # License: MIT 9 | 10 | # Load packages 11 | import numpy as np 12 | 13 | from FDApy.representation import DenseArgvals 14 | from FDApy.simulation import KarhunenLoeve 15 | from FDApy.visualization import plot_multivariate 16 | 17 | ############################################################################### 18 | # Multivariate functional data consist of independent trajectories of a 19 | # vector-valued stochastic process 20 | # :math:`X = (X^{(1)}, \dots, X^{(P)})^\top, P \geq 1`. Each coordinate 21 | # :math:`X^{(p)}: \mathcal{T}_p \rightarrow \mathbb{R}` is assumed to be a 22 | # squared-integrable real-valued functions defined on :math:`\mathcal{T}_p`. 23 | # The simulation of multivariate functional data 24 | # :math:`X` is based on the truncated multivariate Karhunen-Loève 25 | # representation of :math:`X`. For a particular realization :math:`i` of the 26 | # process :math:`X`: 27 | # 28 | # .. math:: 29 | # X_i(t) = \mu(t) + \sum_{k = 1}^{K} c_{i,k}\phi_k(t), 30 | # \quad t \in \mathcal{T} 31 | # 32 | # with a common mean function :math:`\mu(t)` and eigenfunctions 33 | # :math:`\phi_k, k = 1, \cdots, K`. The scores :math:`c_{i, k}` 34 | # are the projection of the curves :math:`X_i` onto the eigenfunctions 35 | # :math:`\phi_k`. These scores are random variables with mean :math:`0` 36 | # and variance :math:`\lambda_k`, which are the eigenvalues associated to each 37 | # eigenfunctions and that decreases toward :math:`0` when :math:`k` goes to 38 | # infinity. This representation is valid for domains of arbitrary dimension, 39 | # such as images (:math:`\mathcal{T} = \mathbb{R}^2`). 40 | 41 | 42 | # Set general parameters 43 | rng = 42 44 | n_obs = 10 45 | 46 | 47 | # Parameters of the basis 48 | name = ["fourier", "bsplines"] 49 | n_functions = [5, 5] 50 | argvals = [ 51 | DenseArgvals({"input_dim_0": np.arange(0, 10.01, 0.01)}), 52 | DenseArgvals({"input_dim_0": np.arange(-0.5, 0.51, 0.01)}), 53 | ] 54 | 55 | ############################################################################### 56 | # Simulation for one-dimensional curve 57 | # ------------------------------------ 58 | # 59 | # **First example** 60 | # --- 61 | # We simulate :math:`N = 10` curves of a 2-dimensional process. The first component of the process is defined on the one-dimensional observation grid :math:`\{0, 0.01, 0.02, \cdots, 10\}`, based on the first :math:`K = 5` Fourier basis functions on :math:`[0, 10]` and the variance of the scores random variables equal to :math:`1` (default). The second component of the process is defined on the one-dimensional observation grid :math:`\{-0.5, -0.49, -0.48, \cdots, 0.5\}`, based on the first :math:`K = 5` B-splines basis functions on :math:`[0, 10]` and the variance of the scores random variables equal to :math:`1` (default). 62 | kl = KarhunenLoeve( 63 | basis_name=name, n_functions=n_functions, argvals=argvals, random_state=rng 64 | ) 65 | kl.new(n_obs=n_obs) 66 | 67 | _ = plot_multivariate(kl.data) 68 | 69 | ############################################################################### 70 | # **Second example** 71 | # --- 72 | # We simulate :math:`N = 10` curves of a 2-dimensional process. The first component of the process is defined on the one-dimensional observation grid :math:`\{0, 0.01, 0.02, \cdots, 10\}`, based on the first :math:`K = 5` Fourier basis functions on :math:`[0, 1]` and the decreasing of the variance of the scores is exponential. The second component of the process is defined on the one-dimensional observation grid :math:`\{-0.5, -0.49, -0.48, \cdots, 0.5\}`, based on the first :math:`K = 5` B-splines basis functions on :math:`[0, 1]` and the decreasing of the variance of the scores is exponential. 73 | kl = KarhunenLoeve( 74 | basis_name=name, n_functions=n_functions, argvals=argvals, random_state=rng 75 | ) 76 | kl.new(n_obs=n_obs, clusters_std="exponential") 77 | 78 | _ = plot_multivariate(kl.data) 79 | 80 | ############################################################################### 81 | # Simulation for two-dimensional curve (image) 82 | # -------------------------------------------- 83 | # 84 | # We simulation a 2-dimensional process where the first component is a surface and the second component is a curve. For the simulation on a two-dimensional domain, we construct an two-dimensional eigenbasis based on tensor products of univariate eigenbasis. 85 | 86 | # Parameters of the basis 87 | name = [("fourier", "fourier"), "bsplines"] 88 | n_functions = [(5, 5), 25] 89 | argvals = [ 90 | DenseArgvals( 91 | { 92 | "input_dim_0": np.arange(0, 10.01, 0.01), 93 | "input_dim_1": np.arange(0, 10.01, 0.01), 94 | } 95 | ), 96 | DenseArgvals({"input_dim_0": np.arange(-0.5, 0.51, 0.01)}), 97 | ] 98 | 99 | ############################################################################### 100 | # **First example** 101 | # --- 102 | # We simulate :math:`N = 1` curves of a 2-dimensional process. The first component of the process is defined on the two-dimensional observation grid :math:`\{0, 0.01, 0.02, \cdots, 10\} \times \{0, 0.01, 0.02, \cdots, 10\}`, based on the tensor product of the first :math:`K = 25` Fourier basis functions on :math:`[0, 1]` and the variance of the scores random variables equal to :math:`1` (default). The second component of the process is defined on the one-dimensional observation grid :math:`\{-0.5, -0.49, -0.48, \cdots, 0.5\}`, based on the first :math:`K = 25` B-splines basis functions on :math:`[0, 1]` and the variance of the scores random variables equal to :math:`1` (default). 103 | kl = KarhunenLoeve( 104 | basis_name=name, n_functions=n_functions, argvals=argvals, random_state=rng 105 | ) 106 | kl.new(n_obs=1) 107 | 108 | _ = plot_multivariate(kl.data) 109 | 110 | ############################################################################### 111 | # **Second example** 112 | # --- 113 | # We simulate :math:`N = 1` curves of a 2-dimensional process. The first component of the process is defined on the two-dimensional observation grid :math:`\{0, 0.01, 0.02, \cdots, 10\} \times \{0, 0.01, 0.02, \cdots, 10\}`, based on the tensor product of the first :math:`K = 25` Fourier basis functions on :math:`[0, 1]` and the decreasing of the variance of the scores is linear. The second component of the process is defined on the one-dimensional observation grid :math:`\{-0.5, -0.49, -0.48, \cdots, 0.5\}` , based on the first :math:`K = 25` B-splines basis functions on :math:`[0, 1]` and the decreasing of the variance of the scores is linear. 114 | kl = KarhunenLoeve( 115 | basis_name=name, n_functions=n_functions, argvals=argvals, random_state=rng 116 | ) 117 | kl.new(n_obs=1, clusters_std="linear") 118 | 119 | _ = plot_multivariate(kl.data) 120 | -------------------------------------------------------------------------------- /examples/simulation/plot_simulation.py: -------------------------------------------------------------------------------- 1 | """ 2 | Simulation of functional data 3 | ============================= 4 | 5 | """ 6 | 7 | # Author: Steven Golovkine 8 | # License: MIT 9 | 10 | # Load packages 11 | import numpy as np 12 | 13 | from FDApy.representation import DenseArgvals 14 | from FDApy.simulation import KarhunenLoeve 15 | from FDApy.visualization import plot 16 | 17 | ############################################################################### 18 | # Two main features of functional data are noise and sparsity. The package provides 19 | # methods to simulate noisy and sparse functional data. In this example, we will 20 | # simulate noisy and sparse functional data using the Karhunen-Loève decomposition. 21 | 22 | 23 | # Set general parameters 24 | rng = 42 25 | n_obs = 10 26 | 27 | # Parameters of the basis 28 | name = "bsplines" 29 | n_functions = 5 30 | argvals = DenseArgvals({"input_dim_0": np.linspace(0, 1, 101)}) 31 | 32 | 33 | ############################################################################### 34 | # For one dimensional data 35 | # ------------------------ 36 | # 37 | # We simulate :math:`N = 10` curves on the one-dimensional observation grid 38 | # :math:`\{0, 0.01, 0.02, \cdots, 1\}`, based on the first 39 | # :math:`K = 5` B-splines basis functions on :math:`[0, 1]` and the variance of 40 | # the scores random variables equal to :math:`1`. 41 | kl = KarhunenLoeve( 42 | basis_name=name, n_functions=n_functions, argvals=argvals, random_state=rng 43 | ) 44 | kl.new(n_obs=n_obs) 45 | 46 | _ = plot(kl.data) 47 | 48 | ############################################################################### 49 | # **Adding noise** 50 | # --- 51 | # We can generates a noisy version of the functional data by adding i.i.d. 52 | # realizations of the random variable 53 | # :math:`\varepsilon \sim \mathcal{N}(0, \sigma^2)` to the observation. In this 54 | # example, we set :math:`\sigma^2 = 0.05`. 55 | 56 | # Add some noise to the simulation. 57 | kl.add_noise(0.05) 58 | 59 | # Plot the noisy simulations 60 | _ = plot(kl.noisy_data) 61 | 62 | ############################################################################### 63 | # **Sparsification** 64 | # --- 65 | # We can generates a sparsified version of the functional data object by 66 | # randomly removing a certain percentage of the sampling points. The percentage 67 | # of retain samplings points can be supplied by the user. In this example, the 68 | # retained number of observations will be different for each curve and be 69 | # randomly drawn between :math:`0.45` and :math:`0.55` (percentage :math:`\pm` epsilon). 70 | 71 | # Sparsify the data 72 | kl.sparsify(percentage=0.5, epsilon=0.05) 73 | 74 | _ = plot(kl.sparse_data) 75 | 76 | 77 | ############################################################################### 78 | # For two dimensional data 79 | # ------------------------ 80 | # We simulate :math:`N = 1` image on the two-dimensional observation grid 81 | # :math:`\{0, 0.01, 0.02, \cdots, 1\} \times \{0, 0.01, 0.02, \cdots, 1\}`, 82 | # based on the tensor product of the first :math:`K = 25` B-splines 83 | # basis functions on :math:`[0, 1] \times [0, 1]` and the variance of 84 | # the scores random variables equal to :math:`1`. 85 | 86 | # Parameters of the basis 87 | name = ("bsplines", "bsplines") 88 | n_functions = (5, 5) 89 | argvals = DenseArgvals( 90 | {"input_dim_0": np.linspace(0, 1, 101), "input_dim_1": np.linspace(0, 1, 101)} 91 | ) 92 | 93 | kl = KarhunenLoeve( 94 | basis_name=name, n_functions=n_functions, argvals=argvals, random_state=rng 95 | ) 96 | kl.new(n_obs=1) 97 | 98 | _ = plot(kl.data) 99 | 100 | ############################################################################### 101 | # **Adding noise** 102 | # --- 103 | # We can generates a noisy version of the functional data by adding i.i.d. 104 | # realizations of the random variable 105 | # :math:`\varepsilon \sim \mathcal{N}(0, \sigma^2)` to the observation. In this 106 | # example, we set :math:`\sigma^2 = 0.05`. 107 | 108 | # Add some noise to the simulation. 109 | kl.add_noise(0.05) 110 | 111 | # Plot the noisy simulations 112 | _ = plot(kl.noisy_data) 113 | 114 | ############################################################################### 115 | # **Sparsification** 116 | # --- 117 | # The sparsification is not implemented for two-dimensional (and higher) data. 118 | -------------------------------------------------------------------------------- /examples/smoothing/README.rst: -------------------------------------------------------------------------------- 1 | .. _smoothing_examples: 2 | 3 | Smoothing 4 | --------- 5 | 6 | These examples illustrate the use of the :mod:`FDApy.preprocessing.smoothing` module. It contains various functionalities to smooth functional data. 7 | -------------------------------------------------------------------------------- /examples/smoothing/plot_smooth_data_1d.py: -------------------------------------------------------------------------------- 1 | """ 2 | Smoothing of dense one-dimensional functional data 3 | ================================================== 4 | 5 | """ 6 | 7 | # Author: Steven Golovkine 8 | # License: MIT 9 | 10 | # Load packages 11 | import matplotlib.pyplot as plt 12 | import numpy as np 13 | 14 | from FDApy.simulation import KarhunenLoeve 15 | from FDApy.representation import DenseArgvals 16 | from FDApy.visualization import plot 17 | 18 | ############################################################################### 19 | # The package includes different smoothing methods to smooth functional data. In this section, we are showing the building blocks of the smoothing of dense unidimensional functional data. First, we simulate functional data using the Karhunen-Loève decomposition using B-splines basis functions. We then add some noise to the simulation. 20 | 21 | 22 | # Set general parameters 23 | rng = 42 24 | n_obs = 4 25 | 26 | # Parameters of the basis 27 | name = "bsplines" 28 | n_functions = 5 29 | points = DenseArgvals({"input_dim_0": np.linspace(0, 1, 101)}) 30 | 31 | # Simulate data 32 | kl = KarhunenLoeve( 33 | basis_name=name, 34 | n_functions=n_functions, 35 | argvals=points, 36 | random_state=rng 37 | ) 38 | kl.new(n_obs=n_obs) 39 | 40 | # Add some noise to the simulation. 41 | kl.add_noise(0.05) 42 | 43 | 44 | ############################################################################## 45 | # We will smooth the noisy data using the :func:`~FDApy.representation.DenseFunctionalData.smooth` function. This function allows to smooth the data using different methods such as local polynomials and P-splines. In this example, we will use the local polynomials smoothing method with an Epanechnikov kernel and a bandwidth of :math:`0.1`. In the plot, the red line represents the true data, the grey line represents the noisy data and the blue line represents the smoothed data. 46 | 47 | # Smooth the data 48 | kernel_name = "epanechnikov" 49 | bandwidth = 0.1 50 | degree = 1 51 | 52 | fdata_smooth = kl.noisy_data.smooth( 53 | points=points, 54 | method="LP", 55 | kernel_name=kernel_name, 56 | bandwidth=bandwidth, 57 | degree=degree, 58 | ) 59 | 60 | # Plot results 61 | fig, axes = plt.subplots(2, 2, figsize=(10, 8)) 62 | for idx, ax in enumerate(axes.flat): 63 | plot(kl.noisy_data[idx], colors="k", alpha=0.2, ax=ax) 64 | plot(kl.data[idx], colors="r", ax=ax) 65 | plot(fdata_smooth[idx], colors="b", ax=ax) 66 | ax.set_title(f"Observation {idx + 1}") 67 | 68 | plt.show() 69 | -------------------------------------------------------------------------------- /examples/smoothing/plot_smooth_data_2d.py: -------------------------------------------------------------------------------- 1 | """ 2 | Smoothing of dense two-dimensional functional data 3 | ================================================== 4 | 5 | """ 6 | 7 | # Author: Steven Golovkine 8 | # License: MIT 9 | 10 | # Load packages 11 | import numpy as np 12 | 13 | from FDApy.representation import DenseArgvals 14 | from FDApy.simulation import KarhunenLoeve 15 | from FDApy.visualization import plot 16 | 17 | ############################################################################### 18 | # The package includes different smoothing methods to smooth functional data. In this section, we are showing the building blocks of the smoothing of dense two-dimensional functional data. First, we simulate functional data using the Karhunen-Loève decomposition using B-splines basis functions. We then add some noise to the simulation. 19 | 20 | # Set general parameters 21 | rng = 42 22 | n_obs = 4 23 | 24 | # Parameters of the basis 25 | name = ("bsplines", "bsplines") 26 | n_functions = (5, 5) 27 | 28 | argvals = DenseArgvals({ 29 | "input_dim_0": np.linspace(0, 1, 51), 30 | "input_dim_1": np.linspace(0, 1, 51) 31 | }) 32 | 33 | 34 | kl = KarhunenLoeve( 35 | basis_name=name, argvals=argvals, n_functions=n_functions, random_state=rng 36 | ) 37 | kl.new(n_obs=n_obs) 38 | data = kl.data 39 | 40 | # Add some noise to the simulation. 41 | kl.add_noise(0.05) 42 | 43 | ############################################################################### 44 | # Smoothing two-dimensional functional data is similar to smoothing one-dimensional functional data. The main difference is that the smoothing is done in two dimensions. In this example, we will smooth the noisy data using the :func:`~FDApy.representation.DenseFunctionalData.smooth` function. This function allows to smooth the data using different methods such as local polynomials and P-splines. In this example, we will use the local polynomials smoothing method with an Epanechnikov kernel and a bandwidth of :math:`0.5`. We plot the smoothed data. 45 | 46 | # Smooth the data 47 | points = DenseArgvals({ 48 | "input_dim_0": np.linspace(0, 1, 11), 49 | "input_dim_1": np.linspace(0, 1, 11) 50 | }) 51 | kernel_name = "epanechnikov" 52 | bandwidth = 0.5 53 | degree = 1 54 | 55 | data_smooth = kl.noisy_data.smooth( 56 | points=points, 57 | method="LP", 58 | kernel_name=kernel_name, 59 | bandwidth=bandwidth, 60 | degree=degree, 61 | ) 62 | 63 | _ = plot(data_smooth) 64 | -------------------------------------------------------------------------------- /joss/paper.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: 'FDApy: a Python package for functional data' 3 | tags: 4 | - functional data analysis 5 | - multivariate functional data 6 | - open source 7 | - Python 8 | authors: 9 | - name: Steven Golovkine 10 | orcid: 0000-0002-5994-2671 11 | affiliation: 1 12 | affiliations: 13 | - name: MACSI, Department of Mathematics and Statistics, University of Limerick, Limerick, Ireland 14 | index: 1 15 | ror: 00a0n9e72 16 | date: 28 October 2024 17 | bibliography: paper.bib 18 | 19 | --- 20 | 21 | # Summary 22 | 23 | Functional data analysis (FDA) is a statistical methodology for analyzing data that can be characterized as functions. These functions could represent measurements taken over time, space, frequency, probability, etc. The goal of FDA is to extract meaningful information from these functions and to model their behavior. See, e.g., @ramsayFunctionalDataAnalysis2005, @horvathInferenceFunctionalData2012a, and @kokoszkaIntroductionFunctionalData2017 for some references on FDA. FDA has been successfully applied in different contexts, such as identifying patterns of movements in sport biomechanics [@warmenhovenBivariateFunctionalPrincipal2019], analyzing changes in brain activity in neuroscience [@songSparseMultivariateFunctional2022], fault detection of batch processes [@wangFaultDetectionBatch2015] or in autonomous driving [@golovkineClusteringMultivariateFunctional2022]. In this paper, we introduce `FDApy`, a library developed for the FDA community and Python users, designed to facilitate the manipulation and processing of (multivariate) functional data. 24 | 25 | 26 | # Statement of need 27 | 28 | In order to apply FDA to real datasets, there is a need for appropriate softwares with up-to-date methodological implementation and easy addition of new theoretical developments. The seminal R package for FDA is `fda` [@ramsayFdaFunctionalData2023], based on work cited in @ramsayFunctionalDataAnalysis2005 and @ramsayFunctionalDataAnalysis2009. Most of the R packages that implement FDA methods are highly specialized and are built upon `fda`. For example, one may cite `FDboost` [@brockhausBoostingFunctionalRegression2020] and `refund` [@goldsmithRefundRegressionFunctional2023] for regression and classification, `funFEM` [@bouveyronFunFEMClusteringDiscriminative2021] and `funLBM` [@bouveyronFunLBMModelBasedCoClustering2022] for clustering or `fdasrvf` [@tuckerFdasrvfElasticFunctional2023] for functional data registration. For most packages, the functional data are however restricted to univariate functional data that are well described by their coefficients in a given basis of functions. The `funData` package [@happ-kurzObjectOrientedSoftwareFunctional2020] aims to provide a unified framework to handle univariate and multivariate functional data defined on different dimensional domains. Sparse functional data are also considered. The `MFPCA` [@happ-kurzMFPCAMultivariateFunctional2022] package, built on top of the `funData` package, implements multivariate functional principal components analysis (MFPCA) for data defined on different dimensional domains [@happMultivariateFunctionalPrincipal2018]. Consider looking at the CRAN webpage\footnote{\url{https://cran.r-project.org/web/views/FunctionalData.html}} on functional data to have a complete overview of the R packages. 29 | 30 | Concerning the Python community, there are only few packages that are related to FDA. One may cite `sktime` [@loningSktimeSktimeV02022] and `tslearn` [@tavenardTslearnMachineLearning2020] that provide tools for the analysis of time series as a `scikit-learn` compatible API. They implement specific time series methods such as DTW-based ones or shapelets learning. The only one that develops specific methods for FDA is `scikit-fda` [@ramos-carrenoScikitfdaPythonPackage2024]. In particular, it implements diverse registration techniques as well as statistical data depths for functional data. However, most of the methods are for one-dimensional data and, in most cases, they only accept multivariate functional data defined on the same domain. 31 | 32 | `FDApy` supports the analysis of diverse types of functional data (densely or irregularly sampled, multivariate and multidimensional), represented over a grid of points or using a basis of functions. It implements dimension reduction techniques and smoothing functionalities. A large simulation toolbox, based on basis decomposition, is provided. By providing a flexible and robust toolset for functional data analysis, it aims to support researchers and practitioners in uncovering insights from complex functional datasets. 33 | 34 | `FDApy` was used in @golovkineClusteringMultivariateFunctional2022, @yoshidaDetectingDifferencesGait2022, @golovkineUseGramMatrix2023 and @nguyenLearningDomainspecificCameras2024 and is also presented in the author's doctoral dissertation. 35 | 36 | 37 | # Code Quality and Documentation 38 | 39 | 40 | `FDApy` is hosted on GitHub\footnote{\url{https://github.com/StevenGolovkine/FDApy}}. Examples and API documentation are available on the platform Read the Docs\footnote{\url{https://fdapy.readthedocs.io}}. We provide installation guides, algorithm introductions, and examples of using the package. The package is available on Linux, macOS and Windows for Python $3.9-3.11$. It can be installed with `pip install FDApy`. 41 | 42 | To ensure high code quality, all implementations adhere to the `PEP8` code style [@vanrossumPEP8StyleGuide2001], enforced by `flake8`, the code formatter `black` and the static analyzer `prospector`. The documentation is provided through docstrings using the `NumPy` conventions and build using `Sphinx`. The code is accompanied by unit tests covering $94\%$ of the lines that are automatically executed in a continuous integration workflow upon commits. 43 | 44 | # Acknowledgements 45 | 46 | Steven Golovkine wishes to thank Groupe Renault and the ANRT (French National Association for Research and Technology) for their financial support via the CIFRE convention No. 2017/1116. Steven Golovkine is partially supported by Science Foundation Ireland under Grant No. 19/FFP/7002 and co-funded under the European Regional Development Fund. 47 | 48 | # References -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "FDApy" 3 | description = "A Python package to analyze functional data." 4 | readme = "README.rst" 5 | requires-python = ">=3.10, <4" 6 | license = {file = "LICENSE"} 7 | keywords = [ 8 | "functional data analysis" 9 | ] 10 | maintainers = [ 11 | {name = "Steven Golovkine", email = "steven_golovkine@icloud.com"}, 12 | ] 13 | classifiers = [ 14 | 'Programming Language :: Python :: 3.10', 15 | 'Topic :: Scientific/Engineering :: Mathematics' 16 | ] 17 | 18 | dynamic = ["version"] 19 | 20 | dependencies = [ 21 | 'lazy_loader', 22 | 'matplotlib', 23 | 'numpy<2.0.0', 24 | 'pandas>=2.0.0', 25 | 'scikit-learn>=1.2.0', 26 | 'scipy>=1.10.0' 27 | ] 28 | 29 | [project.optional-dependencies] 30 | docs = [ 31 | "matplotlib", 32 | "mpldatacursor", 33 | "numpydoc", 34 | "numpy<2.0.0", 35 | "pandas>=2.0.0", 36 | "pillow", 37 | "pydata-sphinx-theme==0.16.0", 38 | "scikit-learn", 39 | "scikit-fda", 40 | "scipy", 41 | "setuptools>=41.2", 42 | "sphinx>=3", 43 | "sphinxcontrib-bibtex", 44 | "sphinx-gallery", 45 | ] 46 | test = [ 47 | "pytest", 48 | "pytest-env", 49 | "pytest-subtests", 50 | "scipy", 51 | ] 52 | 53 | [project.urls] 54 | homepage = "https://github.com/StevenGolovkine/FDApy" 55 | documentation = "https://fdapy.readthedocs.io" 56 | repository = "https://github.com/StevenGolovkine/FDApy" 57 | 58 | [build-system] 59 | requires = ["setuptools>=61.0"] 60 | build-backend = "setuptools.build_meta" 61 | 62 | [tool.setuptools.packages.find] 63 | include = ["FDApy*"] 64 | 65 | [tool.setuptools.dynamic] 66 | version = {attr = "FDApy.__version__"} -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bdist_wheel] 2 | universal = 1 3 | 4 | # mypy 5 | [mypy] 6 | strict = True 7 | strict_equality = True 8 | implicit_reexport = True 9 | plugins = numpy.typing.mypy_plugin 10 | 11 | [mypy-pandas.*] 12 | ignore_missing_imports = True 13 | 14 | [mypy-scipy.*] 15 | ignore_missing_imports = True 16 | 17 | [mypy-sklearn.*] 18 | ignore_missing_imports = True 19 | 20 | [flake8] 21 | max-line-length = 88 22 | extend-ignore = E203 23 | 24 | [coverage:run] 25 | omit = 26 | # Omit reporting for __init__.py files 27 | */__init__.py -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StevenGolovkine/FDApy/6626ebc6a70a6496f3058c4abb0bb97f5bdb20be/tests/__init__.py -------------------------------------------------------------------------------- /tests/data/basis_2_1D.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StevenGolovkine/FDApy/6626ebc6a70a6496f3058c4abb0bb97f5bdb20be/tests/data/basis_2_1D.pickle -------------------------------------------------------------------------------- /tests/data/basis_2_2D.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StevenGolovkine/FDApy/6626ebc6a70a6496f3058c4abb0bb97f5bdb20be/tests/data/basis_2_2D.pickle -------------------------------------------------------------------------------- /tests/data/basis_multi_3_1D.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StevenGolovkine/FDApy/6626ebc6a70a6496f3058c4abb0bb97f5bdb20be/tests/data/basis_multi_3_1D.pickle -------------------------------------------------------------------------------- /tests/data/data_noisy_5_100_005.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StevenGolovkine/FDApy/6626ebc6a70a6496f3058c4abb0bb97f5bdb20be/tests/data/data_noisy_5_100_005.pickle -------------------------------------------------------------------------------- /tests/data/data_noisy_5_100_005_2D.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StevenGolovkine/FDApy/6626ebc6a70a6496f3058c4abb0bb97f5bdb20be/tests/data/data_noisy_5_100_005_2D.pickle -------------------------------------------------------------------------------- /tests/data/data_noisy_5_10_001.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StevenGolovkine/FDApy/6626ebc6a70a6496f3058c4abb0bb97f5bdb20be/tests/data/data_noisy_5_10_001.pickle -------------------------------------------------------------------------------- /tests/data/data_noisy_5_10_001_2D.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StevenGolovkine/FDApy/6626ebc6a70a6496f3058c4abb0bb97f5bdb20be/tests/data/data_noisy_5_10_001_2D.pickle -------------------------------------------------------------------------------- /tests/data/data_noisy_5_1_001_2D.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StevenGolovkine/FDApy/6626ebc6a70a6496f3058c4abb0bb97f5bdb20be/tests/data/data_noisy_5_1_001_2D.pickle -------------------------------------------------------------------------------- /tests/data/data_noisy_5_1_005.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StevenGolovkine/FDApy/6626ebc6a70a6496f3058c4abb0bb97f5bdb20be/tests/data/data_noisy_5_1_005.pickle -------------------------------------------------------------------------------- /tests/data/data_sparse_5_100_08.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StevenGolovkine/FDApy/6626ebc6a70a6496f3058c4abb0bb97f5bdb20be/tests/data/data_sparse_5_100_08.pickle -------------------------------------------------------------------------------- /tests/data/data_sparse_5_10_08.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StevenGolovkine/FDApy/6626ebc6a70a6496f3058c4abb0bb97f5bdb20be/tests/data/data_sparse_5_10_08.pickle -------------------------------------------------------------------------------- /tests/data/data_sparse_5_1_08.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StevenGolovkine/FDApy/6626ebc6a70a6496f3058c4abb0bb97f5bdb20be/tests/data/data_sparse_5_1_08.pickle -------------------------------------------------------------------------------- /tests/test_basis_functional_data.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*-coding:utf8 -* 3 | """Module that contains unit tests for the functions of the functional_data.py 4 | file. 5 | 6 | Written with the help of ChatGPT. 7 | 8 | """ 9 | import numpy as np 10 | import unittest 11 | 12 | from pathlib import Path 13 | 14 | from FDApy.representation.argvals import DenseArgvals 15 | from FDApy.representation.functional_data import BasisFunctionalData 16 | from FDApy.representation.basis import Basis 17 | 18 | THIS_DIR = Path(__file__) 19 | 20 | 21 | class TestBasisFunctionalData(unittest.TestCase): 22 | def setUp(self): 23 | argvals = DenseArgvals({"input_dim_0": np.linspace(0, 1, 11)}) 24 | basis = Basis("fourier", n_functions=2, argvals=argvals) 25 | coefs = np.array([[1, 0.5], [0.5, 1]]) 26 | self.func_data = BasisFunctionalData(basis, coefs) 27 | 28 | def test_n_obs(self): 29 | self.assertEqual(self.func_data.n_obs, 2) 30 | 31 | def test_n_dimension(self): 32 | self.assertEqual(self.func_data.n_dimension, 1) 33 | 34 | def test_n_points(self): 35 | expected_result = (11,) 36 | result = self.func_data.n_points 37 | np.testing.assert_equal(result, expected_result) 38 | -------------------------------------------------------------------------------- /tests/test_brownian.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*-coding:utf8 -* 3 | """Module that contains unit tests for the functions of the brownian.py file. 4 | 5 | Written with the help of ChatGPT. 6 | 7 | """ 8 | import numpy as np 9 | import unittest 10 | 11 | from FDApy.representation.functional_data import DenseFunctionalData 12 | from FDApy.simulation.brownian import ( 13 | _init_brownian, 14 | _standard_brownian, 15 | _geometric_brownian, 16 | _fractional_brownian, 17 | _simulate_brownian, 18 | Brownian, 19 | ) 20 | 21 | 22 | class TestInitBrownian(unittest.TestCase): 23 | def test_delta(self): 24 | # Test if delta is calculated correctly 25 | argvals = np.array([1, 2, 3, 4, 5]) 26 | delta, _ = _init_brownian(argvals) 27 | np.testing.assert_almost_equal(delta, 0.8) 28 | 29 | def test_argvals(self): 30 | # Test if the function returns the correct argvals 31 | argvals = np.array([1, 2, 3, 4, 5]) 32 | _, returned_argvals = _init_brownian(argvals) 33 | np.testing.assert_array_equal(returned_argvals, argvals) 34 | 35 | 36 | class TestStandardBrownian(unittest.TestCase): 37 | def test_shape(self): 38 | # Test if the output array has the correct shape 39 | argvals = np.arange(0, 1, 0.01) 40 | output = _standard_brownian(argvals) 41 | self.assertEqual(output.shape, argvals.shape) 42 | 43 | def test_starting_point(self): 44 | # Test if the starting point is correct 45 | argvals = np.arange(0, 1, 0.01) 46 | init_point = 1.0 47 | output = _standard_brownian(argvals, init_point=init_point) 48 | self.assertAlmostEqual(output[0], init_point) 49 | 50 | def test_reproducibility(self): 51 | # Test if the function is reproducible with the same rnorm 52 | argvals = np.arange(0, 1, 0.01) 53 | rnorm = np.random.default_rng(42).normal 54 | output1 = _standard_brownian(argvals, rnorm=rnorm) 55 | 56 | rnorm = np.random.default_rng(42).normal 57 | output2 = _standard_brownian(argvals, rnorm=rnorm) 58 | np.testing.assert_array_equal(output1, output2) 59 | 60 | 61 | class TestGeometricBrownian(unittest.TestCase): 62 | def setUp(self): 63 | self.argvals = np.arange(0, 1, 0.01) 64 | 65 | def test_init_point(self): 66 | with self.assertRaises(ValueError): 67 | _geometric_brownian(self.argvals, init_point=0.0) 68 | 69 | def test_output_shape(self): 70 | output = _geometric_brownian(self.argvals) 71 | self.assertEqual(output.shape, self.argvals.shape) 72 | 73 | def test_positive_values(self): 74 | output = _geometric_brownian(self.argvals) 75 | self.assertTrue(np.all(output > 0)) 76 | 77 | def test_reproducibility(self): 78 | output1 = _geometric_brownian( 79 | self.argvals, rnorm=np.random.default_rng(42).normal 80 | ) 81 | output2 = _geometric_brownian( 82 | self.argvals, rnorm=np.random.default_rng(42).normal 83 | ) 84 | np.testing.assert_allclose(output1, output2) 85 | 86 | 87 | class TestFractionalBrownian(unittest.TestCase): 88 | def setUp(self): 89 | self.argvals = np.arange(0, 1, 0.01) 90 | 91 | def test_output_shape(self): 92 | fbm = _fractional_brownian( 93 | self.argvals, hurst=0.7, rnorm=np.random.default_rng(42).normal 94 | ) 95 | self.assertEqual(fbm.shape, (100,)) 96 | 97 | def test_reproducibility(self): 98 | output1 = _fractional_brownian( 99 | self.argvals, hurst=0.7, rnorm=np.random.default_rng(42).normal 100 | ) 101 | output2 = _fractional_brownian( 102 | self.argvals, hurst=0.7, rnorm=np.random.default_rng(42).normal 103 | ) 104 | np.testing.assert_array_almost_equal(output1, output2) 105 | 106 | def test_negative_hurst(self): 107 | with self.assertRaises(ValueError): 108 | _fractional_brownian(self.argvals, hurst=-0.2) 109 | 110 | def test_zero_hurst(self): 111 | with self.assertRaises(ValueError): 112 | _fractional_brownian(self.argvals, hurst=0) 113 | 114 | 115 | class TestSimulateBrownian(unittest.TestCase): 116 | def setUp(self): 117 | self.argvals = np.arange(0, 1, 0.01) 118 | 119 | def test_error(self): 120 | with self.assertRaises(NotImplementedError): 121 | _simulate_brownian(name="error", argvals=self.argvals) 122 | 123 | def test_standard_brownian(self): 124 | """Test if _simulate_brownian returns a standard brownian motion.""" 125 | brownian_type = "standard" 126 | brownian = _simulate_brownian(brownian_type, self.argvals) 127 | self.assertEqual(brownian[0], 0) 128 | 129 | def test_geometric_brownian(self): 130 | """Test if _simulate_brownian returns a geometric brownian motion.""" 131 | brownian_type = "geometric" 132 | mu, sigma, init_point = 0.1, 0.5, 1.0 133 | brownian = _simulate_brownian( 134 | brownian_type, self.argvals, mu=mu, sigma=sigma, init_point=init_point 135 | ) 136 | self.assertTrue(np.all(brownian > 0)) 137 | 138 | def test_fractional_brownian(self): 139 | """Test if _simulate_brownian returns a fractional brownian motion.""" 140 | brownian_type = "fractional" 141 | hurst = 0.6 142 | brownian = _simulate_brownian(brownian_type, self.argvals, hurst=hurst) 143 | self.assertEqual(brownian.shape, (100,)) 144 | 145 | 146 | class TestBrownian(unittest.TestCase): 147 | def test_standard_brownian(self): 148 | # Test standard Brownian motion simulation 149 | brownian = Brownian(name="standard") 150 | brownian.new(n_obs=1) 151 | self.assertIsInstance(brownian.data, DenseFunctionalData) 152 | self.assertEqual(brownian.data.n_obs, 1) 153 | 154 | def test_geometric_brownian(self): 155 | # Test geometric Brownian motion simulation 156 | brownian = Brownian(name="geometric", random_state=42) 157 | brownian.new(n_obs=1, mu=0.05, sigma=0.1, init_point=100) 158 | self.assertIsInstance(brownian.data, DenseFunctionalData) 159 | self.assertEqual(brownian.data.n_obs, 1) 160 | 161 | def test_fractional_brownian(self): 162 | # Test fractional Brownian motion simulation 163 | brownian = Brownian(name="fractional", random_state=42) 164 | brownian.new(n_obs=1, hurst=0.4) 165 | self.assertIsInstance(brownian.data, DenseFunctionalData) 166 | self.assertEqual(brownian.data.n_obs, 1) 167 | -------------------------------------------------------------------------------- /tests/test_checker_functional_data.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*-cooding:utf8 -* 3 | """Module that contains unit tests for the checkers of the FunctionalData 4 | classe. 5 | 6 | Written with the help of ChatGPT. 7 | 8 | """ 9 | import numpy as np 10 | import unittest 11 | 12 | from FDApy.representation.functional_data import ( 13 | FunctionalData, 14 | DenseFunctionalData, 15 | IrregularFunctionalData, 16 | ) 17 | from FDApy.representation.argvals import DenseArgvals, IrregularArgvals 18 | from FDApy.representation.values import DenseValues, IrregularValues 19 | 20 | 21 | class TestCheckSameType(unittest.TestCase): 22 | def setUp(self): 23 | # define DenseFunctionalData 24 | self.x = np.linspace(0, 1, num=10) 25 | self.y = np.random.randn(3, 10) 26 | self.argvals = {"input_dim_0": self.x} 27 | self.dense_fda = DenseFunctionalData( 28 | DenseArgvals(self.argvals), DenseValues(self.y) 29 | ) 30 | 31 | # define IrregularFunctionalData 32 | self.x = IrregularArgvals( 33 | { 34 | 0: DenseArgvals({"input_dim_0": np.array([1, 2, 3])}), 35 | 1: DenseArgvals({"input_dim_0": np.array([1, 2])}), 36 | } 37 | ) 38 | self.y = IrregularValues({0: np.array([4, 5, 6]), 1: np.array([2, 4])}) 39 | self.irreg_data = IrregularFunctionalData(self.x, self.y) 40 | 41 | def test_same_type(self): 42 | FunctionalData._check_same_type(self.dense_fda, self.dense_fda) 43 | FunctionalData._check_same_type(self.irreg_data, self.irreg_data) 44 | self.assertTrue(True) # if no error is raised, test passed 45 | 46 | def test_different_type(self): 47 | with self.assertRaises(TypeError): 48 | FunctionalData._check_same_type(self.dense_fda, self.irreg_data) 49 | -------------------------------------------------------------------------------- /tests/test_datasets.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*-coding:utf8 -* 3 | """Module that contains unit tests for the functions of the datasets.py file. 4 | 5 | Written with the help of ChatGPT. 6 | 7 | """ 8 | import numpy as np 9 | import unittest 10 | 11 | from FDApy.representation.functional_data import DenseFunctionalData 12 | from FDApy.simulation.datasets import _zhang_chen, Datasets 13 | 14 | 15 | class ZhangChangTestCase(unittest.TestCase): 16 | def test_zhang_chang_shape(self): 17 | n_obs = 100 18 | argvals = np.linspace(0, 1, 10) 19 | results = _zhang_chen(n_obs, argvals) 20 | expected_shape = (n_obs, len(argvals)) 21 | self.assertEqual(results.shape, expected_shape) 22 | 23 | def test_zhang_chang_cos_sin(self): 24 | n_obs = 2 25 | argvals = np.linspace(0, 1, 10) 26 | rnorm = np.random.default_rng(42).normal 27 | 28 | results = _zhang_chen(n_obs, argvals, rnorm=rnorm) 29 | expected_results = np.array( 30 | [ 31 | [ 32 | 2.63139013, 33 | 5.02482289, 34 | 6.60973235, 35 | 5.8997611, 36 | 2.48634443, 37 | -1.16218953, 38 | -4.02114032, 39 | -3.39676508, 40 | -1.05722496, 41 | 2.36348728, 42 | ], 43 | [ 44 | 5.4050085, 45 | 6.01901495, 46 | 5.81901528, 47 | 3.17679982, 48 | 0.40184728, 49 | -1.65134235, 50 | -1.00260541, 51 | 0.10582, 52 | 2.66644022, 53 | 5.13092035, 54 | ], 55 | ] 56 | ) 57 | np.testing.assert_array_almost_equal(results, expected_results, decimal=3) 58 | 59 | 60 | class DatasetsTestCase(unittest.TestCase): 61 | def test_new_zhang_chen(self): 62 | basis_name = "zhang_chen" 63 | random_state = 42 64 | dataset = Datasets(basis_name, random_state) 65 | 66 | n_obs = 100 67 | argvals = np.linspace(0, 1, 10) 68 | 69 | dataset.new(n_obs, argvals=argvals) 70 | 71 | self.assertIsInstance(dataset.data, DenseFunctionalData) 72 | np.testing.assert_almost_equal(dataset.data.argvals["input_dim_0"], argvals) 73 | np.testing.assert_equal(dataset.data.values.shape, (n_obs, len(argvals))) 74 | 75 | def test_new_not_implemented(self): 76 | basis_name = "unknown_basis" 77 | random_state = 42 78 | dataset = Datasets(basis_name, random_state) 79 | 80 | n_obs = 100 81 | argvals = np.linspace(0, 1, 10) 82 | 83 | with self.assertRaises(NotImplementedError): 84 | dataset.new(n_obs, argvals=argvals) 85 | -------------------------------------------------------------------------------- /tests/test_functional_data.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*-coding:utf8 -* 3 | """Module that contains unit tests for FunctionalData. 4 | 5 | Written with the help of ChatGPT. 6 | 7 | """ 8 | import numpy as np 9 | import unittest 10 | 11 | from FDApy.representation.argvals import DenseArgvals 12 | from FDApy.representation.values import DenseValues 13 | from FDApy.representation.functional_data import DenseFunctionalData 14 | 15 | 16 | class TestConcatenate(unittest.TestCase): 17 | def setUp(self): 18 | self.argvals = DenseArgvals({"input_dim_0": np.array([1, 2, 3, 4, 5])}) 19 | self.values = DenseValues( 20 | np.array([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10], [11, 12, 13, 14, 15]]) 21 | ) 22 | self.func_data = DenseFunctionalData(self.argvals, self.values) 23 | -------------------------------------------------------------------------------- /tests/test_loader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*-coding:utf8 -* 3 | """Module that contains unit tests for the loader functions. 4 | 5 | Written with the help of ChatGPT. 6 | 7 | """ 8 | import pandas as pd 9 | import numpy as np 10 | import unittest 11 | 12 | from FDApy.representation.argvals import DenseArgvals, IrregularArgvals 13 | from FDApy.representation.values import IrregularValues 14 | from FDApy.representation.functional_data import ( 15 | DenseFunctionalData, 16 | IrregularFunctionalData, 17 | ) 18 | from FDApy.misc.loader import read_csv, _read_csv_dense, _read_csv_irregular 19 | 20 | 21 | class TestReadCsvDense(unittest.TestCase): 22 | def test_read_csv_dense(self): 23 | # create test dataframe 24 | df = pd.DataFrame({"col1": [1, 2, 3], "col2": [4, 5, 6]}) 25 | argvals = np.array([1, 2]) 26 | 27 | # read csv file 28 | obj = _read_csv_dense(df, argvals) 29 | 30 | # check if object is of type DenseFunctionalData 31 | self.assertIsInstance(obj, DenseFunctionalData) 32 | 33 | # check if the argvals match 34 | np.testing.assert_array_equal(obj.argvals["input_dim_0"], argvals) 35 | 36 | # check if the values match 37 | np.testing.assert_array_equal(obj.values, df.values) 38 | 39 | 40 | class TestReadCsvIrregular(unittest.TestCase): 41 | def setUp(self): 42 | self.data = pd.DataFrame( 43 | { 44 | "col1": [1, 2, np.nan, 4], 45 | "col2": [5, np.nan, 7, 8], 46 | "col3": [9, 10, 11, 12], 47 | } 48 | ) 49 | self.argvals = np.array([0, 1, 2]) 50 | self.expected_argvals = IrregularArgvals( 51 | { 52 | 0: DenseArgvals({"input_dim_0": np.array([0, 1, 2])}), 53 | 1: DenseArgvals({"input_dim_0": np.array([0, 2])}), 54 | 2: DenseArgvals({"input_dim_0": np.array([1, 2])}), 55 | 3: DenseArgvals({"input_dim_0": np.array([0, 1, 2])}), 56 | } 57 | ) 58 | self.expected_values = IrregularValues( 59 | { 60 | 0: np.array([1, 5, 9]), 61 | 1: np.array([2, 10]), 62 | 2: np.array([7, 11]), 63 | 3: np.array([4, 8, 12]), 64 | } 65 | ) 66 | 67 | def test_read_csv_irregular(self): 68 | # read csv file 69 | obj = _read_csv_irregular(self.data, self.argvals) 70 | 71 | # check if object is of type IrregularFunctionalData 72 | self.assertIsInstance(obj, IrregularFunctionalData) 73 | 74 | # check if the argvals match 75 | np.testing.assert_equal(obj.argvals, self.expected_argvals) 76 | 77 | # check if the values match 78 | np.testing.assert_allclose(obj.values, self.expected_values) 79 | 80 | 81 | class TestReadCsv(unittest.TestCase): 82 | def setUp(self): 83 | # create a test csv file 84 | self.dense_data = {"x": [1, 2, 3, 4, 5], "y": [2, 4, 6, 8, 10]} 85 | self.dense_df = pd.DataFrame(self.dense_data) 86 | self.dense_df.to_csv("dense_test.csv", index=False) 87 | 88 | self.irregular_data = {"x": [1, 2, np.nan, 4], "y": [2, np.nan, 6, 8]} 89 | self.irregular_df = pd.DataFrame(self.irregular_data) 90 | self.irregular_df.to_csv("irregular_test.csv", index=False) 91 | 92 | def test_read_csv_dense(self): 93 | dense_obj = read_csv("dense_test.csv") 94 | self.assertIsInstance(dense_obj, DenseFunctionalData) 95 | np.testing.assert_array_equal(dense_obj.argvals["input_dim_0"], [0, 1]) 96 | np.testing.assert_array_equal(dense_obj.values, self.dense_df.values) 97 | 98 | def test_read_csv_irregular(self): 99 | irregular_obj = read_csv("irregular_test.csv") 100 | self.assertIsInstance(irregular_obj, IrregularFunctionalData) 101 | np.testing.assert_array_equal( 102 | irregular_obj.argvals[0]["input_dim_0"], np.array([0, 1]) 103 | ) 104 | np.testing.assert_array_equal( 105 | irregular_obj.argvals[1]["input_dim_0"], np.array([0]) 106 | ) 107 | np.testing.assert_array_equal( 108 | irregular_obj.argvals[2]["input_dim_0"], np.array([1]) 109 | ) 110 | np.testing.assert_array_equal( 111 | irregular_obj.argvals[3]["input_dim_0"], np.array([0, 1]) 112 | ) 113 | 114 | np.testing.assert_array_equal(irregular_obj.values[0], np.array([1, 2])) 115 | np.testing.assert_array_equal(irregular_obj.values[1], np.array([2])) 116 | np.testing.assert_array_equal(irregular_obj.values[2], np.array([6])) 117 | np.testing.assert_array_equal(irregular_obj.values[3], np.array([4, 8])) 118 | 119 | def tearDown(self): 120 | # delete test csv files 121 | import os 122 | 123 | os.remove("dense_test.csv") 124 | os.remove("irregular_test.csv") 125 | -------------------------------------------------------------------------------- /tests/test_values.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*-coding:utf8 -* 3 | """Module that contains unit tests for the functions of the _values.py file. 4 | 5 | Written with the help of ChatGPT. 6 | 7 | """ 8 | import numpy as np 9 | import unittest 10 | 11 | from FDApy.representation.argvals import DenseArgvals, IrregularArgvals 12 | from FDApy.representation.values import DenseValues, IrregularValues 13 | 14 | 15 | class TestDenseValues(unittest.TestCase): 16 | def test_n_obs(self): 17 | array = np.array([[1, 2, 3], [4, 5, 6]]) 18 | values = DenseValues(array) 19 | self.assertEqual(values.n_obs, 2) 20 | 21 | def test_n_points(self): 22 | array = np.array([[1, 2, 3], [4, 5, 6]]) 23 | values = DenseValues(array) 24 | self.assertEqual(values.n_points, (3,)) 25 | 26 | def test_compatible_with(self): 27 | argvals1 = DenseArgvals() 28 | argvals1["key1"] = np.array([1, 2, 3]) 29 | argvals1["key2"] = np.array([4, 5, 6]) 30 | 31 | values = DenseValues(np.random.randn(10, 3, 3)) 32 | values.compatible_with(argvals1) 33 | 34 | values = DenseValues(np.random.randn(10, 4, 3)) 35 | with self.assertRaises(ValueError): 36 | values.compatible_with(argvals1) 37 | 38 | def test_concatenate(self): 39 | array = np.array([[1, 2, 3], [4, 5, 6]]) 40 | values = DenseValues(array) 41 | 42 | new_values = DenseValues.concatenate(values, values) 43 | expected_values = np.array([[1, 2, 3], [4, 5, 6], [1, 2, 3], [4, 5, 6]]) 44 | np.testing.assert_equal(new_values, DenseValues(expected_values)) 45 | 46 | 47 | class TestIrregularValues(unittest.TestCase): 48 | def test_n_obs(self): 49 | values_dict = {0: np.array([1, 2, 3]), 1: np.array([4, 5, 6])} 50 | values = IrregularValues(values_dict) 51 | self.assertEqual(values.n_obs, 2) 52 | 53 | def test_n_points(self): 54 | values_dict = {0: np.array([1, 2, 3]), 1: np.array([4, 5, 6])} 55 | values = IrregularValues(values_dict) 56 | self.assertEqual(values.n_points, {0: (3,), 1: (3,)}) 57 | 58 | def test_setitem(self): 59 | values = IrregularValues() 60 | values[0] = np.array([1, 2, 3]) 61 | values[1] = np.array([4, 5, 6]) 62 | self.assertEqual(len(values), 2) 63 | 64 | def test_setitem_invalid_key(self): 65 | values = IrregularValues() 66 | with self.assertRaises(TypeError): 67 | values["key"] = np.array([1, 2, 3]) 68 | 69 | def test_setitem_invalid_value(self): 70 | values = IrregularValues() 71 | with self.assertRaises(TypeError): 72 | values[0] = "value" 73 | 74 | def test_compatible_with(self): 75 | argvals_1 = DenseArgvals( 76 | {"input_dim_0": np.random.randn(10), "input_dim_1": np.random.randn(11)} 77 | ) 78 | argvals_2 = DenseArgvals( 79 | {"input_dim_0": np.random.randn(5), "input_dim_1": np.random.randn(7)} 80 | ) 81 | argvals_irr = IrregularArgvals({0: argvals_1, 1: argvals_2}) 82 | 83 | values = IrregularValues({0: np.random.randn(10, 11), 1: np.random.randn(5, 7)}) 84 | values.compatible_with(argvals_irr) 85 | 86 | values = IrregularValues({0: np.random.randn(10, 10), 1: np.random.randn(5, 7)}) 87 | with self.assertRaises(ValueError): 88 | values.compatible_with(argvals_irr) 89 | 90 | def test_concatenate(self): 91 | values_dict = {0: np.array([1, 2, 3]), 1: np.array([4, 5, 6])} 92 | values = IrregularValues(values_dict) 93 | 94 | values_dict = {0: np.array([1, 2])} 95 | values_2 = IrregularValues(values_dict) 96 | 97 | new_values = IrregularValues.concatenate(values, values_2) 98 | expected_values = IrregularValues( 99 | {0: np.array([1, 2, 3]), 1: np.array([4, 5, 6]), 2: np.array([1, 2])} 100 | ) 101 | np.testing.assert_allclose(new_values, expected_values) 102 | --------------------------------------------------------------------------------