├── .build.info ├── .flake8 ├── .gitattributes ├── .github └── workflows │ ├── build-test-codecov.yml │ ├── github-actions-demo.yml │ ├── test-release-candidate.yaml │ └── upload-to-testpypi.yml ├── .gitignore ├── .pre-commit-config.yaml ├── .secrets.baseline ├── Dockerfile ├── LICENSE ├── README.md ├── examples ├── fitting_Model_1st_6peaks+Si.png └── raw_data.png ├── pyproject.toml ├── src └── raman_fitting │ ├── __init__.py │ ├── config │ ├── __init__.py │ ├── base_settings.py │ ├── default_models │ │ ├── __init__.py │ │ ├── first_order.toml │ │ ├── normalization.toml │ │ ├── second_order.toml │ │ └── spectrum_regions.toml │ ├── filepath_helper.py │ ├── logging_config.py │ └── path_settings.py │ ├── delegating │ ├── __init__.py │ ├── main_delegator.py │ ├── models.py │ ├── pre_processing.py │ ├── run_fit_multi.py │ └── run_fit_spectrum.py │ ├── example_fixtures │ ├── Si_spectrum01.txt │ ├── __init__.py │ ├── testDW38C_pos1.txt │ ├── testDW38C_pos2.txt │ ├── testDW38C_pos3.txt │ └── testDW38C_pos4.txt │ ├── exports │ ├── __init__.py │ ├── exporter.py │ ├── file_table.py │ ├── plot_formatting.py │ ├── plotting_fit_results.py │ └── plotting_raw_data.py │ ├── imports │ ├── __init__.py │ ├── collector.py │ ├── files │ │ ├── file_finder.py │ │ ├── file_indexer.py │ │ ├── index_funcs.py │ │ ├── index_helpers.py │ │ ├── metadata.py │ │ ├── utils.py │ │ └── validators.py │ ├── models.py │ ├── samples │ │ ├── models.py │ │ └── sample_id_helpers.py │ ├── spectrum │ │ ├── __init__.py │ │ ├── datafile_parsers.py │ │ ├── spectra_collection.py │ │ └── validators.py │ └── spectrumdata_parser.py │ ├── interfaces │ ├── __init__.py │ ├── argparse_cli.py │ ├── typer_cli.py │ └── utils.py │ ├── models │ ├── __init__.py │ ├── deconvolution │ │ ├── __init__.py │ │ ├── base_model.py │ │ ├── base_peak.py │ │ ├── init_models.py │ │ ├── lmfit_parameter.py │ │ └── spectrum_regions.py │ ├── fit_models.py │ ├── post_deconvolution │ │ ├── __init__.py │ │ ├── calculate_params.py │ │ └── parameter_ratio_funcs.py │ ├── spectrum.py │ └── splitter.py │ ├── processing │ ├── __init__.py │ ├── baseline_subtraction.py │ ├── despike.py │ ├── filter.py │ ├── normalization.py │ └── post_processing.py │ ├── types.py │ └── utils │ ├── __init__.py │ ├── decorators.py │ ├── file_reader.py │ └── string_operations.py ├── tests ├── __init__.py ├── conftest.py ├── deconvolution_models │ ├── __init__.py │ ├── test_base_model.py │ ├── test_base_peaks.py │ ├── test_fit_models.py │ └── test_peak_validation.py ├── delegating │ ├── __init__.py │ └── test_main_delegator.py ├── empty.toml ├── exporting │ ├── __init__.py │ └── test_plotting.py ├── indexing │ ├── __init__.py │ ├── test_filename_parser.py │ └── test_indexer.py ├── models │ ├── __init__.py │ ├── test_base_peak.py │ ├── test_calculate_params.py │ └── test_fit_models.py ├── processing │ ├── __init__.py │ ├── test_cleaner.py │ └── test_spectrum_constructor.py └── test_fixtures │ ├── empty-lines_1.txt │ └── wrong-values-in-lines_pos1.txt └── todos.md /.build.info: -------------------------------------------------------------------------------- 1 | # Build release overview 2 | # Reference: 3 | # https://github.com/pauliacomi/pyGAPS/blob/c19bf45a896ff787acf8a29f77652a90236dd6c5/.build.info 4 | # This file contains details about how to 5 | # perform a release of this package 6 | 7 | # Create new branch for release (we use git flow) 8 | # Respect semantic versioning for the releases 9 | git flow release start x.y.z 10 | 11 | # Ensure all tests are passing 12 | # or use CI provider to run them automatically 13 | pytest --cov --cov-report xml:coverage.xml 14 | 15 | # Run bumpversion to change any version strings 16 | # scattered throughout the source code 17 | bumpversion major/minor/patch 18 | # !!! Check if pre-commit hooks are enabled 19 | # bump2version may fail the commit if pre-commit hooks fail... 20 | # bump2version patch --no-commit 21 | 22 | # Finish branch 23 | git flow release finish x.y.z 24 | 25 | # Push, including tags 26 | git push --tags 27 | 28 | # Check CI if project has been successfully pushed to pypi 29 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | # Rule definitions: http://flake8.pycqa.org/en/latest/user/error-codes.html 3 | # D203: 1 blank line required before class docstring 4 | # W503: line break before binary operator 5 | exclude = 6 | venv*, 7 | .venv, 8 | __pycache__, 9 | node_modules, 10 | bower_components, 11 | migrations, 12 | .tox, 13 | .eggs, 14 | build, 15 | dist 16 | 17 | extend-exclude= 18 | *_Conflict.py 19 | *_old.py 20 | *_dev_*.py 21 | 22 | 23 | ignore = D203,W503 24 | max-complexity = 9 25 | max-line-length = 140 26 | extend-ignore = 27 | E115, E121, E123, E126, E128, 28 | E202, E203, E125, E128, 29 | E222, E226, E231, E241, E251, E262 30 | E303, E306, 31 | F523, E261, E265, E266, 32 | E301, E302, E305, 33 | F401, E402, F402, F403, 34 | E501, F502, F524, F541, 35 | F601, W605, 36 | E712, E713, E722, E741 37 | F811, F841, 38 | C901 39 | 40 | per-file-ignores = 41 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | .git_archival.txt export-subst 2 | -------------------------------------------------------------------------------- /.github/workflows/build-test-codecov.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: Lint Build Test Codecov 5 | 6 | on: 7 | push: 8 | branches: [ main ] 9 | pull_request: 10 | branches: [ main, pre-release ] 11 | 12 | jobs: 13 | 14 | build: 15 | runs-on: ${{ matrix.os }} 16 | strategy: 17 | fail-fast: true 18 | matrix: 19 | os: [ ubuntu-latest, macos-latest, windows-latest ] 20 | python-version: [3.11] 21 | 22 | 23 | steps: 24 | - uses: actions/checkout@v3 25 | 26 | - name: Set up Python ${{ matrix.python-version }} 27 | uses: actions/setup-python@v4 28 | with: 29 | python-version: ${{ matrix.python-version }} 30 | 31 | - name: Install dependencies 32 | run: | 33 | python3 -m pip install -U pip 34 | python3 -m pip install -U build 35 | python3 -m pip install flake8 36 | 37 | - name: Lint with flake8 38 | run: | 39 | # stop the build if there are Python syntax errors or undefined names 40 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics 41 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 42 | flake8 . --count --exit-zero --max-complexity=10 --max-line-length=140 --statistics 43 | 44 | - name: Build wheels 45 | run: | 46 | python3 -m build 47 | # install the package in editable mode for the coverage report 48 | python3 -m pip install -e .["pytest"] 49 | 50 | - name: Generate coverage report 51 | run: | 52 | pytest --cov --cov-report=xml --cov-report=term-missing 53 | - name: Upload Coverage to Codecov 54 | uses: codecov/codecov-action@v1 55 | 56 | - name: raman_fitting run examples 57 | 58 | 59 | run: | 60 | raman_fitting run examples 61 | -------------------------------------------------------------------------------- /.github/workflows/github-actions-demo.yml: -------------------------------------------------------------------------------- 1 | name: GitHub Actions Demo 2 | on: [push] 3 | jobs: 4 | Explore-GitHub-Actions: 5 | runs-on: ubuntu-latest 6 | steps: 7 | - run: echo "🎉 The job was automatically triggered by a ${{ github.event_name }} event." 8 | - run: echo "🐧 This job is now running on a ${{ runner.os }} server hosted by GitHub!" 9 | - run: echo "🔎 The name of your branch is ${{ github.ref }} and your repository is ${{ github.repository }}." 10 | - name: Check out repository code 11 | uses: actions/checkout@v3 12 | - run: echo "💡 The ${{ github.repository }} repository has been cloned to the runner by $GITHUB_ACTOR or ${{ github.actor }}." 13 | - run: echo "🖥️ The workflow is now ready to test your code on the runner." 14 | - name: List files in the repository 15 | run: | 16 | ls ${{ github.workspace }} 17 | - run: echo "🍏 This job's status is ${{ job.status }}." 18 | -------------------------------------------------------------------------------- /.github/workflows/test-release-candidate.yaml: -------------------------------------------------------------------------------- 1 | # Tests for releases and release candidates 2 | # 3 | # Runs on every tag creation, and all pushes and PRs to release branches 4 | # named "v1.2.x", etc. 5 | # 6 | # This workflow is more extensive than the regular test workflow. 7 | # - Tests are executed on more Python versions 8 | # - Tests are run on more operating systems 9 | # - N.B. There is no pip cache here to ensure runs are always against the 10 | # very latest versions of dependencies, even if this workflow ran recently. 11 | # 12 | # In addition, the package is built as a wheel on each OS/Python job, and these 13 | # are stored as artifacts to use for your distribution process. There is an 14 | # extra job (disabled by default) which can be enabled to push to Test PyPI. 15 | 16 | # Reference: 17 | # https://github.com/scottclowe/python-template-repo 18 | 19 | name: release candidate tests 20 | 21 | on: 22 | workflow_dispatch: 23 | 24 | push: 25 | branches: 26 | # Release branches. 27 | # Examples: "v1", "v3.0", "v1.2.x", "1.5.0", "1.2rc0" 28 | # Expected usage is (for example) a branch named "v1.2.x" which contains 29 | # the latest release in the 1.2 series. 30 | - 'v[0-9]+' 31 | - 'v?[0-9]+.[0-9x]+' 32 | - 'v?[0-9]+.[0-9]+.[0-9x]+' 33 | - 'v?[0-9]+.[0-9]+.[0-9]+.[0-9x]+' 34 | - 'v?[0-9]+.[0-9x]+rc[0-9]*' 35 | tags: 36 | # Run whenever any tag is created 37 | - '**' 38 | pull_request: 39 | branches: 40 | # Release branches 41 | - 'v[0-9]+' 42 | - 'v?[0-9]+.[0-9x]+' 43 | - 'v?[0-9]+.[0-9]+.[0-9x]+' 44 | - 'v?[0-9]+.[0-9]+.[0-9]+.[0-9x]+' 45 | - 'v?[0-9]+.[0-9x]+rc[0-9]*' 46 | release: 47 | # Run on a new release 48 | types: [created, edited, published] 49 | 50 | jobs: 51 | test-build: 52 | runs-on: ${{ matrix.os }} 53 | strategy: 54 | matrix: 55 | os: [ubuntu-latest, windows-latest, macos-latest] 56 | python-version: ["3.11"] 57 | env: 58 | OS: ${{ matrix.os }} 59 | PYTHON: ${{ matrix.python-version }} 60 | 61 | steps: 62 | - uses: actions/checkout@v3 63 | 64 | - name: Set up Python ${{ matrix.python-version }} 65 | uses: actions/setup-python@v4 66 | with: 67 | python-version: ${{ matrix.python-version }} 68 | 69 | - name: Install dependencies 70 | run: | 71 | python -m pip install --upgrade pip 72 | python -m pip install flake8 73 | python -m pip install -U build 74 | python -m pip install --editable .["pytest"] 75 | 76 | - name: Sanity check with flake8 77 | run: | 78 | # stop the build if there are Python syntax errors or undefined names 79 | python -m flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics 80 | # exit-zero treats all errors as warnings 81 | python -m flake8 . --count --exit-zero --statistics 82 | 83 | - name: Debug environment 84 | run: python -m pip freeze 85 | 86 | - name: Test with pytest 87 | run: | 88 | pytest --cov --cov-report=xml --cov-report=term-missing 89 | # python -m pytest --cov=raman_fitting --cov-report=term-missing --cov-report=xml --cov-config .coveragerc --junitxml=testresults.xml 90 | 91 | - name: Upload coverage to Codecov 92 | if: false 93 | uses: codecov/codecov-action@v1 94 | with: 95 | flags: unittests 96 | env_vars: OS,PYTHON 97 | name: Python ${{ matrix.python-version }} on ${{ runner.os }} 98 | 99 | - name: Build wheels 100 | run: | 101 | python3 -m pip install -U build 102 | python3 -m build 103 | 104 | - name: Store wheel artifacts 105 | uses: actions/upload-artifact@v2 106 | with: 107 | name: wheel-${{ runner.os }} 108 | path: dist/* 109 | 110 | 111 | publish: 112 | # Disabled by default 113 | if: startsWith(github.ref, 'refs/tags/') 114 | needs: test-build 115 | 116 | runs-on: ubuntu-latest 117 | steps: 118 | - uses: actions/checkout@v3 119 | 120 | - name: Download wheel artifacts 121 | uses: actions/download-artifact@v2 122 | with: 123 | name: wheel-${{ runner.os }} 124 | path: dist/ 125 | 126 | - name: Store wheel artifacts 127 | uses: actions/upload-artifact@v2 128 | with: 129 | name: wheel-${{ runner.os }} 130 | path: dist/* 131 | 132 | - name: Publish package to Test PyPI 133 | if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags') 134 | uses: pypa/gh-action-pypi-publish@release/v1 135 | with: 136 | user: __token__ 137 | password: ${{ secrets.TWINE_TEST_TOKEN }} 138 | repository_url: https://test.pypi.org/legacy/ 139 | 140 | - name: Publish package to PyPI 📦 141 | if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags') 142 | uses: pypa/gh-action-pypi-publish@release/v1 143 | with: 144 | user: __token__ 145 | password: ${{ secrets.PYPI_API_TOKEN }} 146 | -------------------------------------------------------------------------------- /.github/workflows/upload-to-testpypi.yml: -------------------------------------------------------------------------------- 1 | # This is a basic workflow to help you get started with Actions 2 | 3 | name: Publish to TestPyPI and PyPI 4 | 5 | # Controls when the action will run. 6 | on: 7 | # Triggers the workflow on push to the master branch 8 | #push: 9 | # branches: [ master ] 10 | 11 | # Allows you to run this workflow manually from the Actions tab 12 | workflow_dispatch: 13 | 14 | # A workflow run is made up of one or more jobs that can run sequentially or in parallel 15 | jobs: 16 | # This workflow contains a single job called "build" 17 | build: 18 | # The type of runner that the job will run on 19 | name: Build Python 🐍 distributions to 📦 20 | #if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags') 21 | runs-on: ubuntu-latest 22 | 23 | # Steps represent a sequence of tasks that will be executed as part of the job 24 | steps: 25 | # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it 26 | - uses: actions/checkout@v3 27 | # with: 28 | # fetch-depth: 0 29 | - name: Fetch all history for all tags and branches 30 | run: git fetch --prune --unshallow 31 | 32 | - name: Set up python 3.11 33 | uses: actions/setup-python@v4 34 | with: 35 | python-version: 3.11 36 | 37 | # Installs and upgrades pip, installs other dependencies and installs the package from pyproject.toml 38 | - name: Installs and upgrades pip and installs other dependencies 39 | run: | 40 | # Upgrade pip 41 | python3 -m pip install --upgrade pip 42 | # Install build deps 43 | python3 -m pip install -U build 44 | # If requirements.txt exists, install from it 45 | python3 -m pip install -r requirements.txt 46 | 47 | - name: Builds the package 48 | run: | 49 | # Install package with build 50 | python3 -m build 51 | 52 | - name: Store wheel artifacts 53 | uses: actions/upload-artifact@v2 54 | with: 55 | name: wheel-${{ runner.os }}-${{ runner.python-version }} 56 | path: dist/* 57 | 58 | publish: 59 | 60 | name: Publish 📦 to PyPI and TestPyPI 61 | #if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags') 62 | needs: build 63 | runs-on: ubuntu-latest 64 | # Steps represent a sequence of tasks that will be executed as part of the job 65 | steps: 66 | - uses: actions/checkout@v3 67 | 68 | - name: Download wheel artifacts 69 | uses: actions/download-artifact@v2 70 | with: 71 | name: wheel-${{ runner.os }}-${{ runner.python-version }} 72 | path: dist/ 73 | 74 | - name: Store aggregated wheel artifacts 75 | uses: actions/upload-artifact@v2 76 | with: 77 | name: wheels 78 | path: dist/* 79 | 80 | # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it 81 | - name: Check to TestPyPI 82 | run: | 83 | python3 -m pip install -U twine 84 | # Check twine in advance even though gh-action-pypi also does that 85 | twine check dist/* 86 | # Upload to TestPyPI 87 | - name: Publish package to TestPyPI 📦 88 | if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') 89 | uses: pypa/gh-action-pypi-publish@release/v1 90 | with: 91 | user: __token__ 92 | password: ${{ secrets.TWINE_TEST_TOKEN }} 93 | repository_url: https://test.pypi.org/legacy/ 94 | 95 | - name: Publish package to PyPI 📦 96 | if: startsWith(github.ref, 'refs/tags/v') 97 | uses: pypa/gh-action-pypi-publish@v1.5.1 98 | with: 99 | user: __token__ 100 | password: ${{ secrets.PYPI_API_TOKEN }} 101 | verbose: true 102 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | testresults.xml 47 | 48 | *.cover 49 | .hypothesis/ 50 | .pytest_cache/ 51 | 52 | # Translations 53 | *.mo 54 | *.pot 55 | 56 | # Django stuff: 57 | *.log 58 | local_settings.py 59 | db.sqlite3 60 | 61 | # Flask stuff: 62 | instance/ 63 | .webassets-cache 64 | 65 | # Scrapy stuff: 66 | .scrapy 67 | 68 | # Sphinx documentation 69 | docs/_build/ 70 | 71 | # PyBuilder 72 | target/ 73 | 74 | # Jupyter Notebook 75 | .ipynb_checkpoints 76 | 77 | # pyenv 78 | .python-version 79 | 80 | # celery beat schedule file 81 | celerybeat-schedule 82 | 83 | # SageMath parsed files 84 | *.sage.py 85 | 86 | # Environments 87 | .env 88 | .venv 89 | env/ 90 | env*/ 91 | venv/ 92 | ENV/ 93 | env.bak/ 94 | venv.bak/ 95 | 96 | # Spyder project settings 97 | .spyderproject 98 | .spyproject 99 | 100 | # VS code project settings 101 | .vscode 102 | 103 | # Rope project settings 104 | .ropeproject 105 | 106 | # mkdocs documentation 107 | /site 108 | 109 | # mypy 110 | .mypy_cache/ 111 | 112 | # pycharm 113 | .todo/ 114 | 115 | # datafiles and results 116 | **/results/* 117 | tests/test_results/** 118 | 119 | /*.csv 120 | /*.zip 121 | #/*.txt 122 | /*.xlsx 123 | 124 | # local configuration settings 125 | local_config.py 126 | 127 | # all logs 128 | logs/ 129 | 130 | # trained models (will be created in CI) 131 | /*.pkl 132 | 133 | # extra tox files 134 | tox.ini.bak 135 | tox-generated.ini 136 | 137 | # Generated by setuptools-scm 138 | */*/_version.py 139 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # See https://pre-commit.com for more information 2 | # See https://pre-commit.com/hooks.html for more hooks 3 | # Temporary disabling hooks: SKIP=flake8 git commit -m "foo" 4 | repos: 5 | - repo: https://github.com/pre-commit/pre-commit-hooks 6 | rev: v4.5.0 7 | hooks: 8 | - id: check-added-large-files 9 | name: Check for files larger than 5 MB 10 | args: [ "--maxkb=5120" ] 11 | - id: end-of-file-fixer 12 | name: Check for a blank line at the end of scripts (auto-fixes) 13 | exclude: '\.Rd' 14 | - id: trailing-whitespace 15 | name: Check for trailing whitespaces (auto-fixes) 16 | - repo: https://github.com/astral-sh/ruff-pre-commit 17 | # Ruff version. 18 | rev: v0.3.2 19 | hooks: 20 | # Run the linter. 21 | - id: ruff 22 | args: [ --fix ] 23 | # Run the formatter. 24 | - id: ruff-format 25 | - repo: https://github.com/gitleaks/gitleaks 26 | rev: v8.18.2 27 | hooks: 28 | - id: gitleaks 29 | -------------------------------------------------------------------------------- /.secrets.baseline: -------------------------------------------------------------------------------- 1 | { 2 | "version": "1.0.3", 3 | "plugins_used": [ 4 | { 5 | "name": "ArtifactoryDetector" 6 | }, 7 | { 8 | "name": "AWSKeyDetector" 9 | }, 10 | { 11 | "name": "AzureStorageKeyDetector" 12 | }, 13 | { 14 | "name": "Base64HighEntropyString", 15 | "limit": 4.5 16 | }, 17 | { 18 | "name": "BasicAuthDetector" 19 | }, 20 | { 21 | "name": "CloudantDetector" 22 | }, 23 | { 24 | "name": "HexHighEntropyString", 25 | "limit": 3.0 26 | }, 27 | { 28 | "name": "IbmCloudIamDetector" 29 | }, 30 | { 31 | "name": "IbmCosHmacDetector" 32 | }, 33 | { 34 | "name": "JwtTokenDetector" 35 | }, 36 | { 37 | "name": "KeywordDetector", 38 | "keyword_exclude": "" 39 | }, 40 | { 41 | "name": "MailchimpDetector" 42 | }, 43 | { 44 | "name": "NpmDetector" 45 | }, 46 | { 47 | "name": "PrivateKeyDetector" 48 | }, 49 | { 50 | "name": "SlackDetector" 51 | }, 52 | { 53 | "name": "SoftlayerDetector" 54 | }, 55 | { 56 | "name": "SquareOAuthDetector" 57 | }, 58 | { 59 | "name": "StripeDetector" 60 | }, 61 | { 62 | "name": "TwilioKeyDetector" 63 | } 64 | ], 65 | "filters_used": [ 66 | { 67 | "path": "detect_secrets.filters.allowlist.is_line_allowlisted" 68 | }, 69 | { 70 | "path": "detect_secrets.filters.common.is_ignored_due_to_verification_policies", 71 | "min_level": 2 72 | }, 73 | { 74 | "path": "detect_secrets.filters.heuristic.is_indirect_reference" 75 | }, 76 | { 77 | "path": "detect_secrets.filters.heuristic.is_likely_id_string" 78 | }, 79 | { 80 | "path": "detect_secrets.filters.heuristic.is_potential_uuid" 81 | }, 82 | { 83 | "path": "detect_secrets.filters.heuristic.is_prefixed_with_dollar_sign" 84 | }, 85 | { 86 | "path": "detect_secrets.filters.heuristic.is_sequential_string" 87 | }, 88 | { 89 | "path": "detect_secrets.filters.heuristic.is_templated_secret" 90 | } 91 | ], 92 | "results": {}, 93 | "generated_at": "2021-06-14T10:43:14Z" 94 | } 95 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # set base image (host OS) 2 | FROM python:3.11 3 | 4 | RUN addgroup -S nonroot \ 5 | && adduser -S nonroot -G nonroot 6 | 7 | USER nonroot 8 | 9 | # set the working directory in the container 10 | WORKDIR /code 11 | 12 | # copy the dependencies file to the working directory 13 | COPY ./raman-fitting ./raman-fitting 14 | 15 | # copy setup.cfg to work dir 16 | # COPY setup.cfg . 17 | # COPY setup.py . 18 | # install package test, maybe not possible because only src 19 | # RUN pip install -e ./ 20 | 21 | 22 | # install dependencies 23 | RUN pip install -r requirements.txt 24 | 25 | RUN pip install --upgrade build 26 | RUN build ./ 27 | RUN pip install -e ./ 28 | 29 | # copy the content of the local src directory to the working directory 30 | #COPY src/ . 31 | 32 | # command to run on container start 33 | CMD [ "raman_fitting run examples" ] 34 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 David Wallace 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) 2 | [![CI GH actions](https://github.com/MyPyDavid/raman-fitting/actions/workflows/build-test-codecov.yml/badge.svg)](https://github.com/MyPyDavid/raman-fitting/actions/workflows/build-test-codecov.yml) 3 | [![codecov](https://codecov.io/gh/MyPyDavid/raman-fitting/branch/main/graph/badge.svg?token=II9JZAODJY)](https://codecov.io/gh/MyPyDavid/raman-fitting) 4 | [![Test & Upload to TestPyPI](https://github.com/MyPyDavid/raman-fitting/actions/workflows/upload-to-testpypi.yml/badge.svg)](https://github.com/MyPyDavid/raman-fitting/actions/workflows/upload-to-testpypi.yml) 5 | 6 |

7 | raman_cover_img 8 |

9 | 10 | 11 | # raman-fitting 12 | A Python framework that performs a deconvolution on typical parts of interest on the spectrum of carbonaceous materials. 13 | The deconvolutions are done with models which are composed of collections of lineshapes or peaks that are typically assigned to these spectra in scientific literature. 14 | 15 | In batch processing mode this package will index the raman data files in a chosen folder. 16 | First, it will try to extract a sample ID and position number from the filenames and create an index of the files in a dataframe. Over this index a preprocessing, fitting and exporting loop will start. 17 | There are several models, each with a different combination of typical peaks, used for fitting. Each individual typical peak is defined as a class in the deconvolution/default_models folder with some added literature reference in the docstring. Here, the individual peak parameter settings can also be easily adjusted for initial values, limits, shape (eg. Lorentzian, Gaussian and Voigt) or be fixed at certain initial values. 18 | Export is done with plots and excel files for the spectral data and fitting parameters for further analysis. 19 | 20 | 21 | ### Example plots 22 | 23 | https://github.com/MyPyDavid/raman-fitting/wiki 24 | 25 | 26 | ### Set up virtual environment and install the package 27 | 28 | A release is now available on PyPI, installation can be done with these commands in a terminal. 29 | ``` bash 30 | # Setting up and activating a virtual environment 31 | python -m venv env # python 3.11 is recommended 32 | source env/bin/activate 33 | 34 | # Installation from PyPI 35 | python -m pip install raman_fitting 36 | ``` 37 | 38 | #### From source installation 39 | 40 | The following shows how to install the package from this source repository. 41 | Download or clone this repository in a certain folder. 42 | ``` bash 43 | git clone https://github.com/MyPyDavid/raman-fitting.git 44 | 45 | # set up and activate venv ... 46 | 47 | # regular install 48 | python -m pip install raman-fitting/ 49 | 50 | # editable/develop mode 51 | python -m pip install -e raman-fitting/ 52 | ``` 53 | 54 | ### Usage 55 | 56 | #### Post installation test run 57 | 58 | In order to test the package after installation, please try the following command in a terminal CLI. 59 | ``` bash 60 | raman_fitting run examples 61 | ``` 62 | or these commands in the Python interpreter or in a Jupyter Notebook. 63 | ``` python 64 | import raman_fitting 65 | raman_fitting.make_examples() 66 | ``` 67 | This test run should yield the resulting plots and files in the following folder. Where home means the local user home directory depending on the OS. 68 | ``` bash 69 | # Linux 70 | home/.raman_fitting/example_results 71 | 72 | # For Other OSs, log messages will show: 73 | # Results saved in ... 74 | 75 | ``` 76 | 77 | #### Fitting your own datafiles 78 | Place your data files in the default location or change this default setting in the config. 79 | ``` bash 80 | home/.raman_fitting/datafiles 81 | ``` 82 | The following command will attempt the indexing, preprocessing, fitting and plotting on all the files found in this folder. 83 | ``` bash 84 | # default run mode is "normal" means over all the files found in the index 85 | raman_fitting 86 | 87 | # If you add a lot of files, try to check if the index is properly constructed 88 | # before fitting them. 89 | raman_fitting make index 90 | 91 | # Location of index 92 | home/.raman_fitting/datafiles/results/raman_fitting_index.csv 93 | ``` 94 | 95 | #### Datafiles 96 | 97 | The raman data files should be .txt files with two columns of data values. 98 | The first column should contain the Raman shift values and the second one the measured intensity. 99 | Filenames will be parsed into a sampleID and position, in order to take the mean of the measured intensity 100 | of several positions on the same sample. 101 | 102 | An example of filename formatting and parsing result: 103 | ``` python 104 | samplename1_pos1.txt => sampleID = 'samplename1', position = 1 105 | sample2-100_3.txt => sampleID = 'sample2-100', position = 3 106 | ``` 107 | ### Version 108 | 109 | The current version is v0.8.0 110 | 111 | ### Dependencies 112 | 113 | - python >= 3.11 114 | - lmfit >= 1.2.0 115 | - pandas >= 2.0.0 116 | - scipy >= 1.10.1 117 | - matplotlib >= 3.7.2 118 | - numpy >= 1.24.2 119 | -------------------------------------------------------------------------------- /examples/fitting_Model_1st_6peaks+Si.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MyPyDavid/raman-fitting/554760daa97161f7de38b69a2afd4897d2020976/examples/fitting_Model_1st_6peaks+Si.png -------------------------------------------------------------------------------- /examples/raw_data.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MyPyDavid/raman-fitting/554760daa97161f7de38b69a2afd4897d2020976/examples/raw_data.png -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["hatchling", "hatch-vcs"] 3 | build-backend = "hatchling.build" 4 | 5 | [project] 6 | name = "raman_fitting" 7 | license = {file = "LICENSE"} 8 | authors = [ 9 | {name = "David Wallace", email = "mypydavid@proton.me"}, 10 | ] 11 | description = "Python framework for the batch processing and deconvolution of raman spectra." 12 | readme = {file = "README.md", content-type = "text/markdown"} 13 | keywords = ["spectroscopy", "Raman", "fitting", "deconvolution", "batch processing", "carbonaceous materials"] 14 | classifiers = [ 15 | "License :: OSI Approved :: MIT License", 16 | "Natural Language :: English", 17 | "Programming Language :: Python", 18 | "Programming Language :: Python :: 3 :: Only", 19 | "Programming Language :: Python :: 3.11", 20 | "Programming Language :: Python :: Implementation :: CPython", 21 | "Programming Language :: Python :: Implementation :: PyPy", 22 | "Intended Audience :: Science/Research", 23 | "Topic :: Scientific/Engineering :: Physics", 24 | "Topic :: Scientific/Engineering :: Chemistry", 25 | ] 26 | dynamic = ["version"] 27 | dependencies = [ 28 | "pandas~=2.1.2", 29 | "scipy~=1.11.3", 30 | "lmfit~=1.2.2", 31 | "matplotlib~=3.8.0", 32 | "numpy~=1.26.1", 33 | "tablib~=3.5.0", 34 | "pydantic>=2.5", 35 | "pydantic-settings>=2.1", 36 | "pydantic_numpy>=4.1", 37 | "loguru>=0.7", 38 | "typer[all]", 39 | "mpire[dill]~=2.10.0", 40 | ] 41 | 42 | [project.optional-dependencies] 43 | pytest = [ 44 | "pytest", 45 | "pytest-cov", 46 | "pytest-flake8", 47 | "mock", 48 | "wheel" 49 | ] 50 | dev = [ 51 | "isort", 52 | "pylint", 53 | "flake8", 54 | "autopep8", 55 | "pydocstyle", 56 | "black", 57 | "bump2version", 58 | "raman_fitting[pytest]" 59 | ] 60 | 61 | [project.urls] 62 | homepage = "https://pypi.org/project/raman-fitting/" 63 | repository = "https://github.com/MyPyDavid/raman-fitting.git" 64 | # documentation = "https://raman-fitting.readthedocs.io/en/latest/" 65 | 66 | [project.scripts] 67 | raman_fitting = "raman_fitting.interfaces.typer_cli:app" 68 | 69 | 70 | [tool.hatch.version] 71 | source = "vcs" 72 | 73 | [tool.hatch.build.hooks.vcs] 74 | version-file = "src/raman_fitting/_version.py" 75 | 76 | [tool.hatch.build.targets.sdist] 77 | exclude = ["/profiling"] 78 | 79 | [tool.hatch.build.targets.wheel] 80 | only-include = ["src"] 81 | sources = ["src"] 82 | 83 | # testing 84 | [tool.pytest.ini_options] 85 | minversion = "7.0" 86 | pythonpath = "src" 87 | addopts = [ 88 | "--import-mode=importlib", 89 | "-ra -q", 90 | "--cov", 91 | "--cov-report term-missing html xml annotate", 92 | ] 93 | testpaths = [ 94 | "tests", 95 | ] 96 | 97 | [tool.coverage.run] 98 | source = ["src"] 99 | omit = [ 100 | "*/example_fixtures/*", 101 | ] 102 | 103 | [tool.coverage.report] 104 | # Regexes for lines to exclude from consideration 105 | exclude_also = [ 106 | # Don't complain about missing debug-only code: 107 | "def __repr__", 108 | "if self\\.debug", 109 | 110 | # Don't complain if tests don't hit defensive assertion code: 111 | "raise AssertionError", 112 | "raise NotImplementedError", 113 | 114 | # Don't complain if non-runnable code isn't run: 115 | "if 0:", 116 | "if __name__ == .__main__.:", 117 | 118 | # Don't complain about abstract methods, they aren't run: 119 | "@(abc\\.)?abstractmethod", 120 | ] 121 | 122 | ignore_errors = true 123 | 124 | [tool.coverage.html] 125 | directory = "coverage_html_report" 126 | 127 | 128 | [tool.bumpversion] 129 | current_version = "0.8.0" 130 | commit = true 131 | commit_args = "--no-verify" 132 | message = "Bump version: {current_version} → {new_version}" 133 | tag = true 134 | allow_dirty = true 135 | tag_name = "{new_version}" 136 | tag_message = "Bump version: {current_version} → {new_version}" 137 | parse = "(?P\\d+)\\.(?P\\d+)\\.(?P\\d+)(\\.dev(?P\\d+))?" 138 | serialize =[ 139 | "{major}.{minor}.{patch}.dev{dev}", 140 | "{major}.{minor}.{patch}" 141 | ] 142 | 143 | [[tool.bumpversion.files]] 144 | filename= "README.md" 145 | search = "The current version is v{current_version}" 146 | replace = "The current version is v{new_version}" 147 | 148 | [[tool.bumpversion.files]] 149 | filename= "pyproject.toml" 150 | search = "current_version = '{current_version}'" 151 | replace = "current_version = '{new_version}'" 152 | -------------------------------------------------------------------------------- /src/raman_fitting/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = "David Wallace" 2 | __docformat__ = "restructuredtext" 3 | __status__ = "Development" 4 | __future_package_name__ = "pyramdeconv" 5 | __current_package_name__ = "raman_fitting" 6 | __package_name__ = __current_package_name__ 7 | 8 | import importlib.util 9 | 10 | try: 11 | from ._version import __version__ 12 | except ImportError: 13 | # -- Source mode -- 14 | try: 15 | # use setuptools_scm to get the current version from src using git 16 | from setuptools_scm import get_version as _gv 17 | from os import path as _path 18 | 19 | __version__ = _gv(_path.join(_path.dirname(__file__), _path.pardir)) 20 | except ModuleNotFoundError: 21 | __version__ = "importerr_modulenotfound_version" 22 | except Exception: 23 | __version__ = "importerr_exception_version" 24 | except Exception: 25 | __version__ = "catch_exception_version" 26 | 27 | import sys 28 | import warnings 29 | 30 | from loguru import logger 31 | 32 | # This code is written for Python 3.11 and higher 33 | if sys.version_info.major < 3 and sys.version_info.minor < 11: 34 | logger.error(f"{__package_name__} requires Python 3.11 or higher.") 35 | sys.exit(1) 36 | 37 | # Let users know if they're missing any hard dependencies 38 | hard_dependencies = ("numpy", "pandas", "scipy", "matplotlib", "lmfit", "pydantic") 39 | soft_dependencies = {} 40 | missing_dependencies = [] 41 | 42 | 43 | for dependency in hard_dependencies: 44 | if not importlib.util.find_spec(dependency): 45 | missing_dependencies.append(dependency) 46 | 47 | if missing_dependencies: 48 | raise ImportError(f"Missing required dependencies {missing_dependencies}") 49 | 50 | for dependency in soft_dependencies: 51 | if not importlib.util.find_spec(dependency): 52 | warnings.warn( 53 | f"Missing important package {dependency}. {soft_dependencies[dependency]}" 54 | ) 55 | 56 | del hard_dependencies, soft_dependencies, dependency, missing_dependencies 57 | -------------------------------------------------------------------------------- /src/raman_fitting/config/__init__.py: -------------------------------------------------------------------------------- 1 | from raman_fitting.config.base_settings import Settings 2 | 3 | settings = Settings() -------------------------------------------------------------------------------- /src/raman_fitting/config/base_settings.py: -------------------------------------------------------------------------------- 1 | from typing import Dict 2 | from pathlib import Path 3 | 4 | from pydantic import ( 5 | Field, 6 | ) 7 | 8 | from pydantic_settings import BaseSettings 9 | 10 | from raman_fitting.models.deconvolution.base_model import BaseLMFitModel 11 | from raman_fitting.models.deconvolution.base_model import ( 12 | get_models_and_peaks_from_definitions, 13 | ) 14 | from raman_fitting.models.deconvolution.spectrum_regions import ( 15 | get_default_regions_from_toml_files, 16 | ) 17 | from .default_models import load_config_from_toml_files 18 | from .path_settings import create_default_package_dir_or_ask, InternalPathSettings 19 | from types import MappingProxyType 20 | 21 | 22 | def get_default_models_and_peaks_from_definitions(): 23 | models_and_peaks_definitions = load_config_from_toml_files() 24 | return get_models_and_peaks_from_definitions(models_and_peaks_definitions) 25 | 26 | 27 | class Settings(BaseSettings): 28 | default_models: Dict[str, Dict[str, BaseLMFitModel]] = Field( 29 | default_factory=get_default_models_and_peaks_from_definitions, 30 | alias="my_default_models", 31 | init_var=False, 32 | validate_default=False, 33 | ) 34 | default_regions: Dict[str, Dict[str, float]] | None = Field( 35 | default_factory=get_default_regions_from_toml_files, 36 | alias="my_default_regions", 37 | init_var=False, 38 | validate_default=False, 39 | ) 40 | default_definitions: MappingProxyType | None = Field( 41 | default_factory=load_config_from_toml_files, 42 | alias="my_default_definitions", 43 | init_var=False, 44 | validate_default=False, 45 | ) 46 | 47 | destination_dir: Path = Field(default_factory=create_default_package_dir_or_ask) 48 | internal_paths: InternalPathSettings = Field(default_factory=InternalPathSettings) 49 | -------------------------------------------------------------------------------- /src/raman_fitting/config/default_models/__init__.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from types import MappingProxyType 3 | import tomllib 4 | 5 | 6 | def load_config_from_toml_files() -> MappingProxyType: 7 | current_parent_dir = Path(__file__).resolve().parent 8 | default_peak_settings = {} 9 | for i in current_parent_dir.glob("*.toml"): 10 | default_peak_settings.update(tomllib.loads(i.read_bytes().decode())) 11 | if not default_peak_settings: 12 | raise ValueError("default models should not be empty.") 13 | 14 | return MappingProxyType(default_peak_settings) 15 | -------------------------------------------------------------------------------- /src/raman_fitting/config/default_models/first_order.toml: -------------------------------------------------------------------------------- 1 | [first_order] 2 | 3 | [first_order.models] 4 | 1peak = "G" 5 | 2peaks = "G+D" 6 | 3peaks = "G+D+D3" 7 | 4peaks = "G+D+D3+D4" 8 | 5peaks = "G+D+D2+D3+D4" 9 | 6peaks = "G+D+D2+D3+D4+D5" 10 | 11 | [first_order.peaks] 12 | 13 | [first_order.peaks.G] 14 | docstring = """ 15 | Graphite belongs to the P63/mmc (D46h) space group. If considering only a graphene plane, at 16 | the à point of the Brillouin zone, there are six normal modes that possess only one mode (doubly 17 | degenerate in plane) with a E2g representation, which is Raman active 18 | G ; Ideal graphitic lattice (E2g-symmetry) 19 | G peak center stable over different laser wavelengths. 20 | Influenced by potential, HSO4 adsorption (or ionization of G- and G+), 21 | magnetic fields, pressure 22 | Für G: 1580-1590 D5 und D2 weiß ich nicht 23 | """ 24 | peak_name = "G" 25 | peak_type = "Lorentzian" 26 | [first_order.peaks.G.param_hints] 27 | center = {value = 1571, min = 1545, max = 1595} 28 | sigma = {value = 30, min = 5, max = 150} 29 | amplitude = {value = 100, min = 1e-05, max = 500} 30 | 31 | 32 | [first_order.peaks.D] 33 | docstring = """ 34 | D or D1 ; Disordered graphitic lattice (graphene layer edges,A1gsymmetry) 35 | A defective graphite presents other bands that can be as intense as the G band at D=1350 and D'=1615 cm-1 36 | These bands are activated by defects due to the breaking of the crystal symmetry that relax the Raman selection rules. 37 | Für D1: 1340-1350 38 | """ 39 | peak_name = "D" 40 | peak_type = "Lorentzian" 41 | [first_order.peaks.D.param_hints] 42 | center = {value = 1350, min = 1330, max = 1380} 43 | sigma = {value = 35, min = 1, max = 150} 44 | amplitude = {value = 120, min = 1e-05, max = 500} 45 | 46 | [first_order.peaks.D2] 47 | docstring = """ 48 | D2 or D' ; Right next to the G peak, sometimes not obvious as G peak split. 49 | Disordered graphitic lattice (surface graphene layers,E2g-symmetry) 50 | j.molstruc.2010.12.065 51 | """ 52 | peak_name = "D2" 53 | peak_type = "Lorentzian" 54 | [first_order.peaks.D2.param_hints] 55 | center = {value = 1606, min = 1592, max = 1635} 56 | sigma = {value = 30, min = 5, max = 150} 57 | amplitude = {value = 35, min = 5, max = 500} 58 | 59 | 60 | [first_order.peaks.D3] 61 | docstring = """ 62 | D3 or D'' or A or Am ; Between the D and G peak, sometimes too broad. 63 | For amorphous carbon (Gaussian[26]or Lorentzian[3,18,27]line shape). 64 | Für D3: 1495-1515 65 | """ 66 | peak_name = "D3" 67 | peak_type = "Lorentzian" 68 | [first_order.peaks.D3.param_hints] 69 | center = {value = 1480, min = 1450, max = 1525} 70 | sigma = {value = 25, min = 1, max = 150} 71 | amplitude = {value = 25, min = 1e-02, max = 500} 72 | 73 | [first_order.peaks.D4] 74 | docstring = """ 75 | D4 or I ; Below D band, a shoulder sometimes split with D5 band. 76 | Disordered graphitic lattice (A1gsymmetry)[10],polyenes[3,27], ionic impurities 77 | D4 peak at 1212 cm−1 78 | Jurkiewicz, K., Pawlyta, M., Zygadło, D. et al. J Mater Sci (2018) 53: 3509. https://doi.org/10.1007/s10853-017-1753-7 79 | Für D4: 1185-1210, but depends on if there is D5 or not. 80 | """ 81 | peak_name = "D4" 82 | peak_type = "Lorentzian" 83 | [first_order.peaks.D4.param_hints] 84 | center = {value = 1230, min = 1180, max = 1310} 85 | sigma = {value = 40, min = 1, max = 150} 86 | amplitude = {value = 20, min = 1e-02, max = 200} 87 | 88 | [first_order.peaks.D5] 89 | docstring = """ 90 | D5 peak at 1110 cm−1. At lowest should of D peak, below D4. 91 | Ref: Jurkiewicz, K., Pawlyta, M., Zygadło, D. et al. J Mater Sci (2018) 53: 3509. https://doi.org/10.1007/s10853-017-1753-7 92 | """ 93 | peak_name = "D5" 94 | peak_type = "Lorentzian" 95 | [first_order.peaks.D5.param_hints] 96 | center = {value = 1110, min = 1080, max = 1150} 97 | sigma = {value = 40, min = 1, max = 150} 98 | amplitude = {value = 20, min = 1e-02, max = 200} 99 | 100 | [first_order.peaks.Si1] 101 | docstring = """ 102 | ===== Extra peak at ca. 960 cm-1 presumably from Si substrate 2nd order === not from Nafion... 103 | => Either cut the Spectra 1000-2000 104 | => Place an extra Gaussian peak at 960 in the fit 105 | """ 106 | peak_name = "Si1" 107 | peak_type = "Gaussian" 108 | is_substrate = true 109 | [first_order.peaks.Si1.param_hints] 110 | center = {value = 960, min = 900, max = 980} 111 | sigma = {value = 10, min = 0, max = 150} 112 | amplitude = {value = 10, min = 0, max = 200} 113 | -------------------------------------------------------------------------------- /src/raman_fitting/config/default_models/normalization.toml: -------------------------------------------------------------------------------- 1 | [normalization] 2 | 3 | [normalization.models] 4 | norm = "norm_G+norm_D" 5 | 6 | [normalization.peaks] 7 | 8 | [normalization.peaks.norm_G] 9 | docstring = """ 10 | G_peak used for normalization 11 | """ 12 | peak_name = "norm_G" 13 | peak_type = "Lorentzian" 14 | is_for_normalization = true 15 | [normalization.peaks.norm_G.param_hints] 16 | center = {"value" = 1581, "min" = 1500, "max" = 1600} 17 | sigma = {"value" = 40, "min" = 1e-05, "max" = 1e3} 18 | amplitude = {"value" = 8e4, "min" = 1e2} 19 | 20 | [normalization.peaks.norm_D] 21 | docstring = """ 22 | D_peak for normalization 23 | """ 24 | peak_name = "norm_D" 25 | peak_type = "Lorentzian" 26 | is_for_normalization = true 27 | [normalization.peaks.norm_D.param_hints] 28 | center = {"value" = 1350, "min" = 1300, "max" = 1400} 29 | sigma = {"value" = 90, "min" = 1e-05} 30 | amplitude = {"value" = 10e5, "min" = 1e2} 31 | -------------------------------------------------------------------------------- /src/raman_fitting/config/default_models/second_order.toml: -------------------------------------------------------------------------------- 1 | [second_order] 2 | 3 | [second_order.models] 4 | 2nd_4peaks = "D4D4+D1D1+GD1+D2D2" 5 | 6 | [second_order.peaks] 7 | 8 | [second_order.peaks.D4D4] 9 | peak_name = "D4D4" 10 | peak_type = "Lorentzian" 11 | [second_order.peaks.D4D4.param_hints] 12 | center = {value = 2435, min = 2400, max = 2550} 13 | sigma = {value = 30, min = 1, max = 200} 14 | amplitude = {value = 2, min = 1e-03, max = 100} 15 | 16 | [second_order.peaks.D1D1] 17 | peak_name = "D1D1" 18 | peak_type = "Lorentzian" 19 | [second_order.peaks.D1D1.param_hints] 20 | center = {value = 2650, min = 2600, max = 2750} 21 | sigma = {value = 60, min = 1, max = 200} 22 | amplitude = {value = 14, min = 1e-03, max = 100} 23 | 24 | [second_order.peaks.GD1] 25 | peak_name = "GD1" 26 | peak_type = "Lorentzian" 27 | [second_order.peaks.GD1.param_hints] 28 | center = {value = 2900, min = 2800, max = 2950} 29 | sigma = {value = 50, min = 1, max = 200} 30 | amplitude = {value = 10, min = 1e-03, max = 100} 31 | 32 | [second_order.peaks.D2D2] 33 | peak_type = "Lorentzian" 34 | peak_name = "D2D2" 35 | [second_order.peaks.D2D2.param_hints] 36 | center = {value = 3250, min = 3000, max = 3400} 37 | sigma = {value = 60, min = 20, max = 200} 38 | amplitude = {value = 1, min = 1e-03, max = 100} 39 | -------------------------------------------------------------------------------- /src/raman_fitting/config/default_models/spectrum_regions.toml: -------------------------------------------------------------------------------- 1 | [spectrum] 2 | 3 | [spectrum.regions] 4 | full = {"min" = 200, "max" = 3600} 5 | full_first_and_second = {"min" = 800, "max" = 3500} 6 | low = {"min" = 150, "max" = 850, "extra_margin" = 10} 7 | first_order = {"min" = 900, "max" = 2000} 8 | mid = {"min" = 1850, "max" = 2150, "extra_margin" = 10} 9 | normalization = {"min" = 1500, "max" = 1675, "extra_margin" = 10} 10 | second_order = {"min" = 2150, "max" = 3380} -------------------------------------------------------------------------------- /src/raman_fitting/config/filepath_helper.py: -------------------------------------------------------------------------------- 1 | """ this module prepares the local file paths for data and results""" 2 | 3 | 4 | from pathlib import Path 5 | 6 | from loguru import logger 7 | 8 | 9 | def check_and_make_dirs(destdir: Path) -> None: 10 | _destfile = None 11 | if destdir.suffix: 12 | _destfile = destdir 13 | destdir = _destfile.parent 14 | 15 | if not destdir.is_dir(): 16 | destdir.mkdir(exist_ok=True, parents=True) 17 | logger.info( 18 | f"check_and_make_dirs the results directory did not exist and was created at:\n{destdir}\n" 19 | ) 20 | 21 | if _destfile: 22 | _destfile.touch() 23 | 24 | 25 | def create_dir_or_ask_user_input(destdir: Path, ask_user=True): 26 | counter, max_attempts = 0, 10 27 | while not destdir.exists() and counter < max_attempts: 28 | answer = "y" 29 | if ask_user: 30 | answer = input( 31 | f"Directory to store files raman_fitting:\n{destdir}\nCan this be folder be created? (y/n)" 32 | ) 33 | if "y" in answer.lower(): 34 | destdir.mkdir(exist_ok=True, parents=True) 35 | 36 | if "y" not in answer.lower(): 37 | new_path_user = input( 38 | "Please provide the directory to store files raman_fitting:" 39 | ) 40 | try: 41 | new_path = Path(new_path_user).resolve() 42 | except Exception as e: 43 | print(f"Exception: {e}") 44 | counter += 1 45 | destdir = new_path 46 | 47 | logger.info(f"Directory created: {destdir}") 48 | return destdir 49 | -------------------------------------------------------------------------------- /src/raman_fitting/config/logging_config.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import sys 3 | 4 | # Multiple calls to logging.getLogger('someLogger') return a 5 | # reference to the same logger object. This is true not only 6 | # within the same module, but also across modules as long as 7 | # it is in the same Python interpreter process. 8 | 9 | FORMATTER = logging.Formatter( 10 | "%(asctime)s — %(name)s — %(levelname)s —%(funcName)s:%(lineno)d — %(message)s" 11 | ) 12 | 13 | 14 | log_format = ( 15 | "[%(asctime)s] — %(name)s — %(levelname)s —" 16 | "%(funcName)s:%(lineno)d—12s %(message)s" 17 | ) 18 | # '[%(asctime)s] %(levelname)-8s %(name)-12s %(message)s') 19 | 20 | # Define basic configuration 21 | logging.basicConfig( 22 | # Define logging level 23 | level=logging.DEBUG, 24 | # Define the format of log messages 25 | format=log_format, 26 | # Provide the filename to store the log messages 27 | filename=("debug.log"), 28 | ) 29 | 30 | 31 | def get_console_handler(): 32 | console_handler = logging.StreamHandler(sys.stdout) 33 | console_handler.setFormatter(FORMATTER) 34 | return console_handler 35 | -------------------------------------------------------------------------------- /src/raman_fitting/config/path_settings.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import tempfile 3 | from enum import StrEnum, auto 4 | 5 | 6 | from pydantic import ( 7 | BaseModel, 8 | DirectoryPath, 9 | FilePath, 10 | ConfigDict, 11 | Field, 12 | model_validator, 13 | ) 14 | 15 | 16 | from .filepath_helper import check_and_make_dirs 17 | 18 | 19 | PACKAGE_NAME = "raman_fitting" 20 | CURRENT_FILE: Path = Path(__file__).resolve() 21 | PACKAGE_ROOT: Path = CURRENT_FILE.parent.parent 22 | REPO_ROOT: Path = PACKAGE_ROOT.parent 23 | INTERNAL_DEFAULT_MODELS: Path = CURRENT_FILE.parent / "default_models" 24 | # MODEL_DIR: Path = PACKAGE_ROOT / "deconvolution_models" 25 | INTERNAL_EXAMPLE_FIXTURES: Path = PACKAGE_ROOT / "example_fixtures" 26 | INTERNAL_PYTEST_FIXTURES: Path = REPO_ROOT / "tests" / "test_fixtures" 27 | 28 | # Home dir from pathlib.Path for storing the results 29 | USER_HOME_PACKAGE: Path = Path.home() / PACKAGE_NAME 30 | # pyramdeconv is the new version package name 31 | 32 | # Optional local configuration file 33 | USER_LOCAL_CONFIG_FILE: Path = USER_HOME_PACKAGE / f"{PACKAGE_NAME}/toml" 34 | 35 | INDEX_FILE_NAME = f"{PACKAGE_NAME}_index.csv" 36 | # Storage file of the index 37 | USER_INDEX_FILE_PATH: Path = USER_HOME_PACKAGE / INDEX_FILE_NAME 38 | 39 | TEMP_DIR = Path(tempfile.mkdtemp(prefix="raman-fitting-")) 40 | TEMP_RESULTS_DIR: Path = TEMP_DIR / "results" 41 | 42 | CLEAN_SPEC_REGION_NAME_PREFIX = "savgol_filter_raw_region_" 43 | 44 | ERROR_MSG_TEMPLATE = "{sample_group} {sampleid}: {msg}" 45 | 46 | 47 | class InternalPathSettings(BaseModel): 48 | settings_file: FilePath = Field(CURRENT_FILE) 49 | package_root: DirectoryPath = Field(PACKAGE_ROOT) 50 | default_models_dir: DirectoryPath = Field(INTERNAL_DEFAULT_MODELS) 51 | example_fixtures: DirectoryPath = Field(INTERNAL_EXAMPLE_FIXTURES) 52 | pytest_fixtures: DirectoryPath = Field(INTERNAL_PYTEST_FIXTURES) 53 | temp_dir: DirectoryPath = Field(TEMP_RESULTS_DIR) 54 | temp_index_file: FilePath = Field(TEMP_DIR / INDEX_FILE_NAME) 55 | 56 | 57 | EXPORT_FOLDER_NAMES = { 58 | "plots": "fitting_plots", 59 | "components": "fitting_components", 60 | "raw_data": "raw_data", 61 | } 62 | 63 | 64 | class RunModes(StrEnum): 65 | NORMAL = auto() 66 | PYTEST = auto() 67 | EXAMPLES = auto() 68 | DEBUG = auto() 69 | 70 | 71 | def get_run_mode_paths(run_mode: RunModes, user_package_home: Path = None): 72 | if user_package_home is None: 73 | user_package_home = USER_HOME_PACKAGE 74 | if isinstance(run_mode, str): 75 | run_mode = RunModes(run_mode) 76 | 77 | RUN_MODE_PATHS = { 78 | RunModes.PYTEST.name: { 79 | "RESULTS_DIR": TEMP_RESULTS_DIR, 80 | "DATASET_DIR": INTERNAL_EXAMPLE_FIXTURES, 81 | "USER_CONFIG_FILE": INTERNAL_EXAMPLE_FIXTURES / f"{PACKAGE_NAME}.toml", 82 | "INDEX_FILE": TEMP_RESULTS_DIR / f"{PACKAGE_NAME}_index.csv", 83 | }, 84 | RunModes.EXAMPLES.name: { 85 | "RESULTS_DIR": user_package_home / "examples", 86 | "DATASET_DIR": INTERNAL_EXAMPLE_FIXTURES, 87 | "USER_CONFIG_FILE": INTERNAL_EXAMPLE_FIXTURES / f"{PACKAGE_NAME}.toml", 88 | "INDEX_FILE": user_package_home / "examples" / f"{PACKAGE_NAME}_index.csv", 89 | }, 90 | RunModes.NORMAL.name: { 91 | "RESULTS_DIR": user_package_home / "results", 92 | "DATASET_DIR": user_package_home / "datafiles", 93 | "USER_CONFIG_FILE": user_package_home / "raman_fitting.toml", 94 | "INDEX_FILE": user_package_home / f"{PACKAGE_NAME}_index.csv", 95 | }, 96 | } 97 | if run_mode.name not in RUN_MODE_PATHS: 98 | raise ValueError(f"Choice of run_mode {run_mode.name} not supported.") 99 | return RUN_MODE_PATHS[run_mode.name] 100 | 101 | 102 | class ExportPathSettings(BaseModel): 103 | results_dir: Path 104 | plots: DirectoryPath = Field(None, validate_default=False) 105 | components: DirectoryPath = Field(None, validate_default=False) 106 | raw_data: DirectoryPath = Field(None, validate_default=False) 107 | 108 | @model_validator(mode="after") 109 | def set_export_path_settings(self) -> "ExportPathSettings": 110 | if not self.results_dir.is_dir(): 111 | self.results_dir.mkdir(exist_ok=True, parents=True) 112 | 113 | plots: DirectoryPath = self.results_dir.joinpath(EXPORT_FOLDER_NAMES["plots"]) 114 | self.plots = plots 115 | components: DirectoryPath = self.results_dir.joinpath( 116 | EXPORT_FOLDER_NAMES["components"] 117 | ) 118 | self.components = components 119 | raw_data: DirectoryPath = self.results_dir.joinpath( 120 | EXPORT_FOLDER_NAMES["raw_data"] 121 | ) 122 | self.raw_data = raw_data 123 | return self 124 | 125 | 126 | class RunModePaths(BaseModel): 127 | model_config = ConfigDict(alias_generator=str.upper) 128 | 129 | run_mode: RunModes 130 | results_dir: DirectoryPath 131 | dataset_dir: DirectoryPath 132 | user_config_file: Path 133 | index_file: Path 134 | 135 | 136 | def initialize_run_mode_paths( 137 | run_mode: RunModes, user_package_home: Path = None 138 | ) -> RunModePaths: 139 | run_mode_paths = get_run_mode_paths(run_mode, user_package_home=user_package_home) 140 | 141 | for destname, destdir in run_mode_paths.items(): 142 | destdir = Path(destdir) 143 | check_and_make_dirs(destdir) 144 | return RunModePaths(RUN_MODE=run_mode, **run_mode_paths) 145 | 146 | 147 | def create_default_package_dir_or_ask(): 148 | return USER_HOME_PACKAGE 149 | -------------------------------------------------------------------------------- /src/raman_fitting/delegating/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MyPyDavid/raman-fitting/554760daa97161f7de38b69a2afd4897d2020976/src/raman_fitting/delegating/__init__.py -------------------------------------------------------------------------------- /src/raman_fitting/delegating/main_delegator.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=W0614,W0401,W0611,W0622,C0103,E0401,E0402 2 | from dataclasses import dataclass, field 3 | from typing import Dict, List, Sequence, Any 4 | 5 | from raman_fitting.config.path_settings import ( 6 | RunModes, 7 | ERROR_MSG_TEMPLATE, 8 | initialize_run_mode_paths, 9 | ) 10 | from raman_fitting.config import settings 11 | 12 | from raman_fitting.imports.models import RamanFileInfo 13 | 14 | from raman_fitting.models.deconvolution.base_model import BaseLMFitModel 15 | from raman_fitting.models.splitter import RegionNames 16 | from raman_fitting.exports.exporter import ExportManager 17 | from raman_fitting.imports.files.file_indexer import ( 18 | RamanFileIndex, 19 | groupby_sample_group, 20 | groupby_sample_id, 21 | IndexSelector, 22 | initialize_index_from_source_files, 23 | ) 24 | 25 | from raman_fitting.delegating.models import ( 26 | AggregatedSampleSpectrumFitResult, 27 | ) 28 | from raman_fitting.delegating.pre_processing import ( 29 | prepare_aggregated_spectrum_from_files, 30 | ) 31 | from raman_fitting.types import LMFitModelCollection 32 | from raman_fitting.delegating.run_fit_spectrum import run_fit_over_selected_models 33 | 34 | 35 | from loguru import logger 36 | 37 | 38 | @dataclass 39 | class MainDelegator: 40 | # IDEA Add flexible input handling for the cli, such a path to dir, or list of files 41 | # or create index when no kwargs are given. 42 | """ 43 | Main delegator for the processing of files containing Raman spectra. 44 | 45 | Creates plots and files in the config RESULTS directory. 46 | """ 47 | 48 | run_mode: RunModes 49 | use_multiprocessing: bool = False 50 | lmfit_models: LMFitModelCollection = field( 51 | default_factory=lambda: settings.default_models 52 | ) 53 | fit_model_region_names: Sequence[RegionNames] = field( 54 | default=(RegionNames.first_order, RegionNames.second_order) 55 | ) 56 | fit_model_specific_names: Sequence[str] | None = None 57 | sample_ids: Sequence[str] = field(default_factory=list) 58 | sample_groups: Sequence[str] = field(default_factory=list) 59 | index: RamanFileIndex = None 60 | selection: Sequence[RamanFileInfo] = field(init=False) 61 | selected_models: Sequence[RamanFileInfo] = field(init=False) 62 | 63 | results: Dict[str, Any] | None = field(default=None, init=False) 64 | export: bool = True 65 | 66 | def __post_init__(self): 67 | run_mode_paths = initialize_run_mode_paths(self.run_mode) 68 | if self.index is None: 69 | raman_files = run_mode_paths.dataset_dir.glob("*.txt") 70 | index_file = run_mode_paths.index_file 71 | self.index = initialize_index_from_source_files( 72 | files=raman_files, index_file=index_file, force_reindex=True 73 | ) 74 | 75 | self.selection = self.select_samples_from_index() 76 | self.selected_models = self.select_models_from_provided_models() 77 | self.main_run() 78 | if self.export: 79 | self.exports = self.call_export_manager() 80 | 81 | def select_samples_from_index(self) -> Sequence[RamanFileInfo]: 82 | index = self.index 83 | # breakpoint() 84 | index_selector = IndexSelector( 85 | **dict( 86 | raman_files=index.raman_files, 87 | sample_groups=self.sample_groups, 88 | sample_ids=self.sample_ids, 89 | ) 90 | ) 91 | selection = index_selector.selection 92 | if not selection: 93 | logger.info("Selection was empty.") 94 | return selection 95 | 96 | def call_export_manager(self): 97 | # breakpoint() 98 | export = ExportManager(self.run_mode, self.results) 99 | exports = export.export_files() 100 | return exports 101 | 102 | # region_names:List[RegionNames], model_names: List[str] 103 | def select_models_from_provided_models(self) -> LMFitModelCollection: 104 | selected_region_names = self.fit_model_region_names 105 | selected_model_names = self.fit_model_specific_names 106 | selected_models = {} 107 | for region_name, all_region_models in self.lmfit_models.items(): 108 | if region_name not in selected_region_names: 109 | continue 110 | if not selected_model_names: 111 | selected_models[region_name] = all_region_models 112 | continue 113 | selected_region_models = {} 114 | for mod_name, mod_val in all_region_models.items(): 115 | if mod_name not in selected_model_names: 116 | continue 117 | selected_region_models[mod_name] = mod_val 118 | 119 | selected_models[region_name] = selected_region_models 120 | return selected_models 121 | 122 | def select_fitting_model( 123 | self, region_name: RegionNames, model_name: str 124 | ) -> BaseLMFitModel: 125 | try: 126 | return self.lmfit_models[region_name][model_name] 127 | except KeyError as exc: 128 | raise KeyError(f"Model {region_name} {model_name} not found.") from exc 129 | 130 | def main_run(self): 131 | selection = self.select_samples_from_index() 132 | if not self.fit_model_region_names: 133 | logger.info("No model region names were selected.") 134 | if not self.selected_models: 135 | logger.info("No fit models were selected.") 136 | 137 | results = {} 138 | 139 | for group_name, grp in groupby_sample_group(selection): 140 | results[group_name] = {} 141 | for sample_id, sample_grp in groupby_sample_id(grp): 142 | sgrp = list(sample_grp) 143 | results[group_name][sample_id] = {} 144 | _error_msg = None 145 | 146 | if not sgrp: 147 | _err = "group is empty" 148 | _error_msg = ERROR_MSG_TEMPLATE.format(group_name, sample_id, _err) 149 | logger.debug(_error_msg) 150 | results[group_name][sample_id]["errors"] = _error_msg 151 | continue 152 | 153 | unique_positions = {i.sample.position for i in sgrp} 154 | if len(unique_positions) <= len(sgrp): 155 | # handle edge-case, multiple source files for a single position on a sample 156 | _error_msg = f"Handle multiple source files for a single position on a sample, {group_name} {sample_id}" 157 | results[group_name][sample_id]["errors"] = _error_msg 158 | logger.debug(_error_msg) 159 | model_result = run_fit_over_selected_models( 160 | sgrp, 161 | self.selected_models, 162 | use_multiprocessing=self.use_multiprocessing, 163 | ) 164 | results[group_name][sample_id]["fit_results"] = model_result 165 | self.results = results 166 | 167 | 168 | def get_results_over_selected_models( 169 | raman_files: List[RamanFileInfo], models: LMFitModelCollection, fit_model_results 170 | ) -> Dict[RegionNames, AggregatedSampleSpectrumFitResult]: 171 | results = {} 172 | for region_name, region_grp in models.items(): 173 | aggregated_spectrum = prepare_aggregated_spectrum_from_files( 174 | region_name, raman_files 175 | ) 176 | if aggregated_spectrum is None: 177 | continue 178 | fit_region_results = AggregatedSampleSpectrumFitResult( 179 | region_name=region_name, 180 | aggregated_spectrum=aggregated_spectrum, 181 | fit_model_results=fit_model_results, 182 | ) 183 | results[region_name] = fit_region_results 184 | return results 185 | 186 | 187 | def make_examples(): 188 | # breakpoint() 189 | _main_run = MainDelegator( 190 | run_mode="pytest", fit_model_specific_names=["2peaks", "3peaks", "2nd_4peaks"] 191 | ) 192 | _main_run.main_run() 193 | return _main_run 194 | 195 | 196 | if __name__ == "__main__": 197 | example_run = make_examples() 198 | -------------------------------------------------------------------------------- /src/raman_fitting/delegating/models.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=W0614,W0401,W0611,W0622,C0103,E0401,E0402 2 | from typing import Dict, Sequence 3 | 4 | from pydantic import BaseModel 5 | 6 | from raman_fitting.imports.models import RamanFileInfo 7 | 8 | from raman_fitting.models.spectrum import SpectrumData 9 | from raman_fitting.models.fit_models import SpectrumFitModel 10 | from raman_fitting.models.splitter import RegionNames 11 | from raman_fitting.imports.spectrumdata_parser import SpectrumReader 12 | from raman_fitting.processing.post_processing import SpectrumProcessor 13 | 14 | 15 | class PreparedSampleSpectrum(BaseModel): 16 | file_info: RamanFileInfo 17 | read: SpectrumReader 18 | processed: SpectrumProcessor 19 | 20 | 21 | class AggregatedSampleSpectrum(BaseModel): 22 | sources: Sequence[PreparedSampleSpectrum] 23 | spectrum: SpectrumData 24 | 25 | 26 | class AggregatedSampleSpectrumFitResult(BaseModel): 27 | region_name: RegionNames 28 | aggregated_spectrum: AggregatedSampleSpectrum 29 | fit_model_results: Dict[str, SpectrumFitModel] 30 | -------------------------------------------------------------------------------- /src/raman_fitting/delegating/pre_processing.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from raman_fitting.models.splitter import RegionNames 4 | from raman_fitting.imports.spectrumdata_parser import SpectrumReader 5 | from raman_fitting.processing.post_processing import SpectrumProcessor 6 | from raman_fitting.imports.models import RamanFileInfo 7 | from .models import ( 8 | AggregatedSampleSpectrum, 9 | PreparedSampleSpectrum, 10 | ) 11 | 12 | from loguru import logger 13 | 14 | from raman_fitting.config.path_settings import CLEAN_SPEC_REGION_NAME_PREFIX 15 | from ..imports.spectrum.spectra_collection import SpectraDataCollection 16 | 17 | 18 | def prepare_aggregated_spectrum_from_files( 19 | region_name: RegionNames, raman_files: List[RamanFileInfo] 20 | ) -> AggregatedSampleSpectrum | None: 21 | select_region_key = f"{CLEAN_SPEC_REGION_NAME_PREFIX}{region_name}" 22 | clean_data_for_region = [] 23 | data_sources = [] 24 | for i in raman_files: 25 | read = SpectrumReader(i.file) 26 | processed = SpectrumProcessor(read.spectrum) 27 | prepared_spec = PreparedSampleSpectrum( 28 | file_info=i, read=read, processed=processed 29 | ) 30 | data_sources.append(prepared_spec) 31 | selected_clean_data = processed.clean_spectrum.spec_regions[select_region_key] 32 | clean_data_for_region.append(selected_clean_data) 33 | if not clean_data_for_region: 34 | logger.warning( 35 | f"prepare_mean_data_for_fitting received no files. {region_name}" 36 | ) 37 | return 38 | spectra_collection = SpectraDataCollection( 39 | spectra=clean_data_for_region, region_name=region_name 40 | ) 41 | aggregated_spectrum = AggregatedSampleSpectrum( 42 | sources=data_sources, spectrum=spectra_collection.mean_spectrum 43 | ) 44 | return aggregated_spectrum 45 | -------------------------------------------------------------------------------- /src/raman_fitting/delegating/run_fit_multi.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List 2 | 3 | from loguru import logger 4 | from mpire import WorkerPool 5 | 6 | from raman_fitting.models.fit_models import SpectrumFitModel 7 | 8 | 9 | def run_fit_multi(**kwargs) -> SpectrumFitModel: 10 | # include optional https://lmfit.github.io/lmfit-py/model.html#saving-and-loading-modelresults 11 | spectrum = kwargs.pop("spectrum") 12 | model = kwargs.pop("model") 13 | lmfit_model = model["lmfit_model"] 14 | region = kwargs.pop("region") 15 | import time 16 | 17 | lmfit_kwargs = {} 18 | if "method" not in kwargs: 19 | lmfit_kwargs["method"] = "leastsq" 20 | 21 | init_params = lmfit_model.make_params() 22 | start_time = time.time() 23 | x, y = spectrum["ramanshift"], spectrum["intensity"] 24 | out = lmfit_model.fit(y, init_params, x=x, **lmfit_kwargs) # 'leastsq' 25 | end_time = time.time() 26 | elapsed_seconds = abs(start_time - end_time) 27 | elapsed_time = elapsed_seconds 28 | logger.debug( 29 | f"Fit with model {model['name']} on {region} success: {out.success} in {elapsed_time:.2f}s." 30 | ) 31 | return out 32 | 33 | 34 | def run_fit_multiprocessing( 35 | spec_fits: List[SpectrumFitModel], 36 | ) -> Dict[str, SpectrumFitModel]: 37 | spec_fits_dumps = [i.model_dump() for i in spec_fits] 38 | 39 | with WorkerPool(n_jobs=4, use_dill=True) as pool: 40 | results = pool.map( 41 | run_fit_multi, spec_fits_dumps, progress_bar=True, progress_bar_style="rich" 42 | ) 43 | # patch spec_fits, setattr fit_result 44 | fit_model_results = {} 45 | for result in results: 46 | _spec_fit_search = [ 47 | i for i in spec_fits if i.model.lmfit_model.name == result.model.name 48 | ] 49 | if len(_spec_fit_search) != 1: 50 | continue 51 | _spec_fit = _spec_fit_search[0] 52 | _spec_fit.fit_result = result 53 | fit_model_results[_spec_fit.model.name] = _spec_fit 54 | return fit_model_results 55 | -------------------------------------------------------------------------------- /src/raman_fitting/delegating/run_fit_spectrum.py: -------------------------------------------------------------------------------- 1 | from typing import List, Dict 2 | 3 | from raman_fitting.delegating.run_fit_multi import run_fit_multiprocessing 4 | from raman_fitting.models.spectrum import SpectrumData 5 | from raman_fitting.types import LMFitModelCollection 6 | from raman_fitting.delegating.models import AggregatedSampleSpectrumFitResult 7 | from raman_fitting.delegating.pre_processing import ( 8 | prepare_aggregated_spectrum_from_files, 9 | ) 10 | from raman_fitting.imports.models import RamanFileInfo 11 | from raman_fitting.models.deconvolution.spectrum_regions import RegionNames 12 | from raman_fitting.models.fit_models import SpectrumFitModel 13 | 14 | from loguru import logger 15 | 16 | 17 | def run_fit_over_selected_models( 18 | raman_files: List[RamanFileInfo], 19 | models: LMFitModelCollection, 20 | use_multiprocessing: bool = False, 21 | ) -> Dict[RegionNames, AggregatedSampleSpectrumFitResult]: 22 | results = {} 23 | for region_name, model_region_grp in models.items(): 24 | aggregated_spectrum = prepare_aggregated_spectrum_from_files( 25 | region_name, raman_files 26 | ) 27 | if aggregated_spectrum is None: 28 | continue 29 | spec_fits = prepare_spec_fit_regions( 30 | aggregated_spectrum.spectrum, model_region_grp 31 | ) 32 | if use_multiprocessing: 33 | fit_model_results = run_fit_multiprocessing(spec_fits) 34 | else: 35 | fit_model_results = run_fit_loop(spec_fits) 36 | fit_region_results = AggregatedSampleSpectrumFitResult( 37 | region_name=region_name, 38 | aggregated_spectrum=aggregated_spectrum, 39 | fit_model_results=fit_model_results, 40 | ) 41 | results[region_name] = fit_region_results 42 | return results 43 | 44 | 45 | def prepare_spec_fit_regions( 46 | spectrum: SpectrumData, model_region_grp 47 | ) -> List[SpectrumFitModel]: 48 | spec_fits = [] 49 | for model_name, model in model_region_grp.items(): 50 | region = model.region_name.name 51 | spec_fit = SpectrumFitModel(spectrum=spectrum, model=model, region=region) 52 | spec_fits.append(spec_fit) 53 | return spec_fits 54 | 55 | 56 | def run_fit_loop(spec_fits: List[SpectrumFitModel]) -> Dict[str, SpectrumFitModel]: 57 | fit_model_results = {} 58 | for spec_fit in spec_fits: 59 | # include optional https://lmfit.github.io/lmfit-py/model.html#saving-and-loading-modelresults 60 | spec_fit.run_fit() 61 | logger.debug( 62 | f"Fit with model {spec_fit.model.name} on {spec_fit.region} success: {spec_fit.fit_result.success} in {spec_fit.elapsed_time:.2f}s." 63 | ) 64 | fit_model_results[spec_fit.model.name] = spec_fit 65 | return fit_model_results 66 | -------------------------------------------------------------------------------- /src/raman_fitting/example_fixtures/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MyPyDavid/raman-fitting/554760daa97161f7de38b69a2afd4897d2020976/src/raman_fitting/example_fixtures/__init__.py -------------------------------------------------------------------------------- /src/raman_fitting/exports/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | -------------------------------------------------------------------------------- /src/raman_fitting/exports/exporter.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import Dict, Any 3 | from raman_fitting.config.path_settings import ( 4 | RunModes, 5 | initialize_run_mode_paths, 6 | ExportPathSettings, 7 | ) 8 | from raman_fitting.config import settings 9 | 10 | from raman_fitting.exports.plotting_fit_results import fit_spectrum_plot 11 | from raman_fitting.exports.plotting_raw_data import raw_data_spectra_plot 12 | 13 | 14 | from loguru import logger 15 | 16 | 17 | class ExporterError(Exception): 18 | """Error occured during the exporting functions""" 19 | 20 | 21 | @dataclass 22 | class ExportManager: 23 | run_mode: RunModes 24 | results: Dict[str, Any] | None = None 25 | 26 | def __post_init__(self): 27 | self.paths = initialize_run_mode_paths( 28 | self.run_mode, user_package_home=settings.destination_dir 29 | ) 30 | 31 | def export_files(self): 32 | # breakpoint() self.results 33 | exports = [] 34 | for group_name, group_results in self.results.items(): 35 | for sample_id, sample_results in group_results.items(): 36 | export_dir = self.paths.results_dir / group_name / sample_id 37 | export_paths = ExportPathSettings(results_dir=export_dir) 38 | try: 39 | raw_data_spectra_plot( 40 | sample_results["fit_results"], export_paths=export_paths 41 | ) 42 | except Exception as exc: 43 | logger.error(f"Plotting error, raw_data_spectra_plot: {exc}") 44 | try: 45 | fit_spectrum_plot( 46 | sample_results["fit_results"], export_paths=export_paths 47 | ) 48 | except Exception as exc: 49 | logger.error(f"plotting error fit_spectrum_plot: {exc}") 50 | raise exc from exc 51 | exports.append( 52 | { 53 | "sample": sample_results["fit_results"], 54 | "export_paths": export_paths, 55 | } 56 | ) 57 | return exports 58 | -------------------------------------------------------------------------------- /src/raman_fitting/exports/file_table.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from raman_fitting.models.spectrum import SpectrumData 4 | 5 | 6 | def raw_data_spectra_export(spectra: List[SpectrumData]): 7 | try: 8 | for spec in spectra: 9 | wnxl_outpath_spectra = spec.mean_info.DestRaw.unique()[0].joinpath( 10 | f"spectra_{spec.sIDmean_col}_{spec.regionname}.xlsx" 11 | ) 12 | spec.mean_spec.to_excel(wnxl_outpath_spectra) 13 | 14 | _0_spec = spectra[0] 15 | wnxl_outpath_info = _0_spec.mean_info.DestRaw.unique()[0].joinpath( 16 | f"info_{_0_spec.sIDmean_col}.xlsx" 17 | ) 18 | _0_spec.mean_info.to_excel(wnxl_outpath_info) 19 | except Exception as e: 20 | print("no extra Raw Data plots: {0}".format(e)) 21 | 22 | 23 | def export_xls_from_spec(self, res_peak_spec): 24 | try: 25 | res_peak_spec.FitComponents.to_excel( 26 | res_peak_spec.extrainfo["DestFittingModel"].with_suffix(".xlsx"), 27 | index=False, 28 | ) 29 | 30 | except Exception as e: 31 | print("Error export_xls_from_spec", e) 32 | -------------------------------------------------------------------------------- /src/raman_fitting/exports/plot_formatting.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Wed Apr 28 15:08:26 2021 5 | 6 | @author: zmg 7 | """ 8 | 9 | from collections import namedtuple 10 | from typing import Sequence, Tuple 11 | 12 | from raman_fitting.models.splitter import RegionNames 13 | 14 | import matplotlib.pyplot as plt 15 | from lmfit import Model as LMFitModel 16 | 17 | from loguru import logger 18 | 19 | 20 | CMAP_OPTIONS_DEFAULT = ("Dark2", "tab20") 21 | DEFAULT_COLOR = (0.4, 0.4, 0.4, 1.0) 22 | COLOR_BLACK = (0, 0, 0, 1) # black as fallback default color 23 | 24 | ModelValidation = namedtuple("ModelValidation", "valid peak_group model_inst message") 25 | 26 | 27 | PLOT_REGION_AXES = { 28 | RegionNames.full: (0, 0), 29 | RegionNames.low: (0, 1), 30 | RegionNames.first_order: (0, 2), 31 | RegionNames.mid: (1, 1), 32 | RegionNames.second_order: (1, 2), 33 | RegionNames.normalization: (1, 0), 34 | } 35 | 36 | 37 | class PeakValidationWarning(UserWarning): 38 | pass 39 | 40 | 41 | class NotFoundAnyModelsWarning(PeakValidationWarning): 42 | pass 43 | 44 | 45 | class CanNotInitializeModelWarning(PeakValidationWarning): 46 | pass 47 | 48 | 49 | def get_cmap_list( 50 | length: int, 51 | cmap_options: Tuple = CMAP_OPTIONS_DEFAULT, 52 | default_color: Tuple = DEFAULT_COLOR, 53 | ) -> Tuple | None: 54 | lst = list(range(length)) 55 | if not lst: 56 | return None 57 | 58 | # set fallback color from class 59 | if isinstance(default_color, tuple) and default_color is not None: 60 | if len(default_color) == 4: 61 | cmap = [default_color for _ in lst] 62 | return cmap 63 | elif default_color is None: 64 | cmap = [DEFAULT_COLOR for _ in lst] 65 | else: 66 | raise ValueError(f"default color is not tuple but {type(default_color)}") 67 | 68 | # set cmap colors from cmap options 69 | if cmap_options: 70 | try: 71 | pltcmaps = [plt.get_cmap(cmap) for cmap in cmap_options] 72 | # Take shortest colormap but not 73 | cmap = min( 74 | [i for i in pltcmaps if len(lst) <= len(i.colors)], 75 | key=lambda x: len(x.colors), 76 | default=cmap, 77 | ) 78 | # if succesfull 79 | if "ListedColormap" in str(type(cmap)): 80 | cmap = cmap.colors 81 | 82 | except Exception as exc: 83 | logger.warning(f"get_cmap_list error setting cmap colors:{exc}") 84 | 85 | return cmap 86 | 87 | 88 | def assign_colors_to_peaks(selected_models: Sequence[LMFitModel]) -> dict: 89 | cmap_get = get_cmap_list(len(selected_models)) 90 | annotated_models = {} 91 | for n, peak in enumerate(selected_models): 92 | color = ", ".join([str(i) for i in cmap_get[n]]) 93 | lenpars = len(peak.param_names) 94 | res = {"index": n, "color": color, "lenpars": lenpars, "peak": peak} 95 | annotated_models[peak.prefix] = res 96 | return annotated_models 97 | 98 | 99 | def __repr__(self): 100 | _repr = "Validated Peak model collection" 101 | if self.selected_models: 102 | _selmods = f", {len(self.selected_models)} models from: " + "\n\t- " 103 | _repr += _selmods 104 | _joinmods = "\n\t- ".join( 105 | [f"{i.peak_group}: {i.model_inst} \t" for i in self.selected_models] 106 | ) 107 | _repr += _joinmods 108 | else: 109 | _repr += ", empty selected models" 110 | return _repr 111 | -------------------------------------------------------------------------------- /src/raman_fitting/exports/plotting_fit_results.py: -------------------------------------------------------------------------------- 1 | from typing import Dict 2 | 3 | import matplotlib 4 | import matplotlib.pyplot as plt 5 | from matplotlib import gridspec 6 | from matplotlib.axes import Axes 7 | 8 | from matplotlib.text import Text 9 | from matplotlib.ticker import AutoMinorLocator 10 | 11 | from raman_fitting.imports.samples.models import SampleMetaData 12 | from raman_fitting.models.fit_models import SpectrumFitModel 13 | 14 | 15 | from raman_fitting.config.path_settings import ExportPathSettings 16 | from raman_fitting.models.splitter import RegionNames 17 | from raman_fitting.delegating.models import AggregatedSampleSpectrumFitResult 18 | 19 | from loguru import logger 20 | 21 | 22 | matplotlib.rcParams.update({"font.size": 14}) 23 | FIT_REPORT_MIN_CORREL = 0.7 24 | 25 | 26 | def fit_spectrum_plot( 27 | aggregated_spectra: Dict[RegionNames, AggregatedSampleSpectrumFitResult], 28 | export_paths: ExportPathSettings | None = None, 29 | plot_annotation=True, 30 | plot_residuals=True, 31 | ): # pragma: no cover 32 | first_order = aggregated_spectra[RegionNames.first_order] 33 | second_order = aggregated_spectra[RegionNames.second_order] 34 | 35 | sources = first_order.aggregated_spectrum.sources 36 | sample = sources[0].file_info.sample 37 | second_model_name = "2nd_4peaks" 38 | second_model = second_order.fit_model_results.get(second_model_name) 39 | for first_model_name, first_model in first_order.fit_model_results.items(): 40 | prepare_combined_spectrum_fit_result_plot( 41 | first_model, 42 | second_model, 43 | sample, 44 | export_paths, 45 | plot_annotation=plot_annotation, 46 | plot_residuals=plot_residuals, 47 | ) 48 | 49 | 50 | def prepare_combined_spectrum_fit_result_plot( 51 | first_model: SpectrumFitModel, 52 | second_model: SpectrumFitModel, 53 | sample: SampleMetaData, 54 | export_paths: ExportPathSettings, 55 | plot_annotation=True, 56 | plot_residuals=True, 57 | ): 58 | plt.figure(figsize=(28, 24)) 59 | gs = gridspec.GridSpec(4, 1, height_ratios=[4, 1, 4, 1]) 60 | ax = plt.subplot(gs[0]) 61 | ax_res = plt.subplot(gs[1]) 62 | ax.set_title(f"{sample.id}") 63 | 64 | first_model_name = first_model.model.name 65 | 66 | fit_plot_first(ax, ax_res, first_model, plot_residuals=plot_residuals) 67 | _bbox_artists = None 68 | if plot_annotation: 69 | annotate_report_first = prepare_annotate_fit_report_first( 70 | ax, first_model.fit_result 71 | ) 72 | _bbox_artists = (annotate_report_first,) 73 | 74 | if second_model is not None: 75 | ax2nd = plt.subplot(gs[2]) 76 | ax2nd_res = plt.subplot(gs[3]) 77 | fit_plot_second(ax2nd, ax2nd_res, second_model, plot_residuals=plot_residuals) 78 | if plot_annotation: 79 | annotate_report_second = prepare_annotate_fit_report_second( 80 | ax2nd, second_model.fit_result 81 | ) 82 | if annotate_report_second is not None: 83 | _bbox_artists = (annotate_report_first, annotate_report_second) 84 | 85 | # set axes labels and legend 86 | set_axes_labels_and_legend(ax) 87 | 88 | plot_special_si_components(ax, first_model) 89 | if export_paths is not None: 90 | savepath = export_paths.plots.joinpath(f"Model_{first_model_name}").with_suffix( 91 | ".png" 92 | ) 93 | plt.savefig( 94 | savepath, 95 | dpi=100, 96 | bbox_extra_artists=_bbox_artists, 97 | bbox_inches="tight", 98 | ) 99 | logger.debug(f"Plot saved to {savepath}") 100 | plt.close() 101 | 102 | 103 | def fit_plot_first( 104 | ax, ax_res, first_model: SpectrumFitModel, plot_residuals: bool = True 105 | ) -> matplotlib.text.Text | None: 106 | first_result = first_model.fit_result 107 | first_components = first_model.fit_result.components 108 | first_eval_comps = first_model.fit_result.eval_components() 109 | first_model_name = first_model.model.name 110 | 111 | ax.grid(True, "both") 112 | ax_res.grid(True, "both") 113 | ax.get_yaxis().set_tick_params(direction="in") 114 | ax.get_xaxis().set_tick_params(direction="in") 115 | 116 | ax.xaxis.set_minor_locator(AutoMinorLocator(2)) 117 | ax.yaxis.set_minor_locator(AutoMinorLocator(2)) 118 | ax.tick_params(which="both", direction="in") 119 | ax.set_facecolor("oldlace") 120 | ax_res.set_facecolor("oldlace") 121 | ax.plot( 122 | first_model.spectrum.ramanshift, 123 | first_result.best_fit, 124 | label=first_model_name, 125 | lw=3, 126 | c="r", 127 | ) 128 | ax.plot( 129 | first_model.spectrum.ramanshift, 130 | first_result.data, 131 | label="Data", 132 | lw=3, 133 | c="grey", 134 | alpha=0.8, 135 | ) 136 | 137 | if plot_residuals: 138 | ax_res.plot( 139 | first_model.spectrum.ramanshift, 140 | first_result.residual, 141 | label="Residual", 142 | lw=3, 143 | c="k", 144 | alpha=0.8, 145 | ) 146 | 147 | for _component in first_components: # automatic color cycle 'cyan' ... 148 | peak_name = _component.prefix.rstrip("_") 149 | ax.plot( 150 | first_model.spectrum.ramanshift, 151 | first_eval_comps[_component.prefix], 152 | ls="--", 153 | lw=4, 154 | label=peak_name, 155 | ) 156 | center_col = _component.prefix + "center" 157 | ax.annotate( 158 | f"{peak_name}:\n {first_result.best_values[center_col]:.0f}", 159 | xy=( 160 | first_result.best_values[center_col] * 0.97, 161 | 0.7 * first_result.params[_component.prefix + "height"].value, 162 | ), 163 | xycoords="data", 164 | ) 165 | 166 | 167 | def fit_plot_second( 168 | ax2nd, ax2nd_res, second_model: SpectrumFitModel, plot_residuals: bool = True 169 | ) -> None: 170 | if second_model: 171 | second_result = second_model.fit_result 172 | second_components = second_model.fit_result.components 173 | second_eval_comps = second_model.fit_result.eval_components() 174 | second_model_name = second_model.model.name 175 | else: 176 | second_components = [] 177 | second_result = None 178 | second_model_name = None 179 | second_eval_comps = None 180 | if second_model: 181 | ax2nd.grid(True) 182 | ax2nd_res.grid(True) 183 | ax2nd.xaxis.set_minor_locator(AutoMinorLocator(2)) 184 | ax2nd.yaxis.set_minor_locator(AutoMinorLocator(2)) 185 | ax2nd.tick_params(which="both", direction="in") 186 | ax2nd.set_facecolor("oldlace") 187 | ax2nd_res.set_facecolor("oldlace") 188 | if second_result is not None: 189 | ax2nd.plot( 190 | second_model.spectrum.ramanshift, 191 | second_result.best_fit, 192 | label=second_model_name, 193 | lw=3, 194 | c="r", 195 | ) 196 | ax2nd.plot( 197 | second_model.spectrum.ramanshift, 198 | second_result.data, 199 | label="Data", 200 | lw=3, 201 | c="grey", 202 | alpha=0.5, 203 | ) 204 | if plot_residuals: 205 | ax2nd_res.plot( 206 | second_model.spectrum.ramanshift, 207 | second_result.residual, 208 | label="Residual", 209 | lw=3, 210 | c="k", 211 | alpha=0.8, 212 | ) 213 | 214 | for _component in second_components: # automatic color cycle 'cyan' ... 215 | if second_eval_comps is None: 216 | continue 217 | 218 | peak_name = _component.prefix.rstrip("_") 219 | ax2nd.plot( 220 | second_model.spectrum.ramanshift, 221 | second_eval_comps[_component.prefix], 222 | ls="--", 223 | lw=4, 224 | label=peak_name, 225 | ) 226 | center_col = _component.prefix + "center" 227 | ax2nd.annotate( 228 | f"{peak_name}\n {second_result.best_values[center_col]:.0f}", 229 | xy=( 230 | second_result.best_values[center_col] * 0.97, 231 | 0.8 * second_result.params[_component.prefix + "height"].value, 232 | ), 233 | xycoords="data", 234 | ) 235 | ax2nd.set_ylim(-0.02, second_result.data.max() * 1.5) 236 | 237 | set_axes_labels_and_legend(ax2nd) 238 | 239 | 240 | def prepare_annotate_fit_report_second(ax2nd, second_result) -> Text: 241 | props = dict(boxstyle="round", facecolor="wheat", alpha=0.5) 242 | annotate_report_second = ax2nd.text( 243 | 1.01, 244 | 0.7, 245 | second_result.fit_report(min_correl=FIT_REPORT_MIN_CORREL), 246 | transform=ax2nd.transAxes, 247 | fontsize=11, 248 | verticalalignment="top", 249 | bbox=props, 250 | ) 251 | 252 | return annotate_report_second 253 | 254 | 255 | def prepare_annotate_fit_report_first(ax, first_result): 256 | fit_report = first_result.fit_report(min_correl=FIT_REPORT_MIN_CORREL) 257 | if len(fit_report) > -1: 258 | fit_report = fit_report.replace("prefix='D3_'", "prefix='D3_' \n") 259 | props = dict(boxstyle="round", facecolor="wheat", alpha=0.5) 260 | 261 | annotate_report_first = ax.text( 262 | 1.01, 263 | 1, 264 | fit_report, 265 | transform=ax.transAxes, 266 | fontsize=11, 267 | verticalalignment="top", 268 | bbox=props, 269 | ) 270 | return annotate_report_first 271 | 272 | 273 | def plot_special_si_components(ax, first_model): 274 | first_result = first_model.fit_result 275 | si_components = filter(lambda x: x.prefix.startswith("Si"), first_result.components) 276 | first_eval_comps = first_model.fit_result.eval_components() 277 | for si_comp in si_components: 278 | si_result = si_comp 279 | ax.plot( 280 | first_model.spectrum.ramanshift, 281 | first_eval_comps[si_comp.prefix], 282 | "b--", 283 | lw=4, 284 | label="Si_substrate", 285 | ) 286 | if si_result.params[si_comp.prefix + "fwhm"] > 1: 287 | ax.annotate( 288 | "Si_substrate:\n %.0f" % si_result.params["Si1_center"].value, 289 | xy=( 290 | si_result.params["Si1_center"].value * 0.97, 291 | 0.8 * si_result.params["Si1_height"].value, 292 | ), 293 | xycoords="data", 294 | ) 295 | 296 | 297 | def set_axes_labels_and_legend(ax: Axes): 298 | # set axes labels and legend 299 | ax.legend(loc=1) 300 | ax.set_xlabel("Raman shift (cm$^{-1}$)") 301 | ax.set_ylabel("normalized I / a.u.") 302 | -------------------------------------------------------------------------------- /src/raman_fitting/exports/plotting_raw_data.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Wed Jan 29 14:49:50 2020 5 | 6 | @author: DW 7 | """ 8 | 9 | from typing import Dict 10 | 11 | 12 | import matplotlib 13 | import matplotlib.pyplot as plt 14 | 15 | from raman_fitting.models.splitter import RegionNames 16 | from raman_fitting.config.path_settings import ( 17 | CLEAN_SPEC_REGION_NAME_PREFIX, 18 | ExportPathSettings, 19 | ) 20 | from raman_fitting.exports.plot_formatting import PLOT_REGION_AXES 21 | from raman_fitting.delegating.models import AggregatedSampleSpectrumFitResult 22 | 23 | from loguru import logger 24 | 25 | matplotlib.rcParams.update({"font.size": 14}) 26 | 27 | 28 | def raw_data_spectra_plot( 29 | aggregated_spectra: Dict[RegionNames, AggregatedSampleSpectrumFitResult], 30 | export_paths: ExportPathSettings, 31 | ): # pragma: no cover 32 | if not aggregated_spectra: 33 | return 34 | # breakpoint() 35 | sources = list(aggregated_spectra.values())[0].aggregated_spectrum.sources 36 | sample_id = "-".join(set(i.file_info.sample.id for i in sources)) 37 | 38 | destfile = export_paths.plots.joinpath(f"{sample_id}_mean.png") 39 | destfile.parent.mkdir(exist_ok=True, parents=True) 40 | 41 | mean_fmt = dict(c="k", alpha=0.7, lw=3) 42 | sources_fmt = dict(alpha=0.4, lw=2) 43 | 44 | _, ax = plt.subplots(2, 3, figsize=(18, 12)) 45 | 46 | for spec_source in sources: 47 | for ( 48 | source_region_label, 49 | source_region, 50 | ) in spec_source.processed.clean_spectrum.spec_regions.items(): 51 | _source_region_name = source_region.region_name.split( 52 | CLEAN_SPEC_REGION_NAME_PREFIX 53 | )[-1] 54 | if _source_region_name not in PLOT_REGION_AXES: 55 | continue 56 | ax_ = ax[PLOT_REGION_AXES[_source_region_name]] 57 | ax_.plot( 58 | source_region.ramanshift, 59 | source_region.intensity, 60 | label=f"{spec_source.file_info.file.stem}", 61 | **sources_fmt, 62 | ) 63 | ax_.set_title(_source_region_name) 64 | if _source_region_name in aggregated_spectra: 65 | mean_spec = aggregated_spectra[ 66 | _source_region_name 67 | ].aggregated_spectrum.spectrum 68 | # plot the mean aggregated spectrum 69 | ax_.plot( 70 | mean_spec.ramanshift, 71 | mean_spec.intensity, 72 | label=mean_spec.label, 73 | **mean_fmt, 74 | ) 75 | 76 | if _source_region_name == RegionNames.full: 77 | ax_.legend(fontsize=10) 78 | 79 | plt.suptitle(f"Mean {sample_id}", fontsize=16) 80 | plt.savefig( 81 | destfile, 82 | dpi=300, 83 | bbox_inches="tight", 84 | ) 85 | plt.close() 86 | logger.debug(f"raw_data_spectra_plot saved:\n{destfile}") 87 | -------------------------------------------------------------------------------- /src/raman_fitting/imports/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MyPyDavid/raman-fitting/554760daa97161f7de38b69a2afd4897d2020976/src/raman_fitting/imports/__init__.py -------------------------------------------------------------------------------- /src/raman_fitting/imports/collector.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import List, Collection, Tuple 3 | import logging 4 | 5 | from .models import RamanFileInfo 6 | 7 | logger = logging.getLogger(__name__) 8 | 9 | 10 | def collect_raman_file_infos( 11 | raman_files: Collection[Path], 12 | ) -> Tuple[List[RamanFileInfo], List[Path]]: 13 | pp_collection = [] 14 | _files = [] 15 | _failed_files = [] 16 | for file in raman_files: 17 | _files.append(file) 18 | try: 19 | pp_res = RamanFileInfo(**{"file": file}) 20 | pp_collection.append(pp_res) 21 | except Exception as exc: 22 | logger.warning( 23 | f"{__name__} collect_raman_file_infos unexpected error for calling RamanFileInfo on\n{file}.\n{exc}" 24 | ) 25 | _failed_files.append({"file": file, "error": exc}) 26 | if _failed_files: 27 | logger.warning( 28 | f"{__name__} collect_raman_file_infos failed for {len(_failed_files)}." 29 | ) 30 | 31 | return pp_collection, _files 32 | -------------------------------------------------------------------------------- /src/raman_fitting/imports/files/file_finder.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | import logging 3 | from pathlib import Path 4 | from pydantic import BaseModel, DirectoryPath, Field, model_validator 5 | 6 | logger = logging.getLogger(__name__) 7 | 8 | 9 | class FileFinder(BaseModel): 10 | directory: DirectoryPath 11 | suffixes: List[str] = Field([".txt"]) 12 | files: List[Path] = Field(None, init_var=False) 13 | 14 | @model_validator(mode="after") 15 | def parse_metadata_from_filepath(self) -> "FileFinder": 16 | if self.files is None: 17 | files = find_files(self.directory, self.suffixes) 18 | self.files = files 19 | 20 | return self 21 | 22 | 23 | def find_files(directory: Path, suffixes: List[str]) -> List[Path]: 24 | """ 25 | Creates a list of all raman type files found in the DATASET_DIR which are used in the creation of the index. 26 | """ 27 | 28 | raman_files = [] 29 | 30 | for suffix in suffixes: 31 | files = list(directory.rglob(f"*{suffix}")) 32 | raman_files += files 33 | 34 | if not raman_files: 35 | logger.warning( 36 | f"find_files warning: the chose data file dir was empty.\n{directory}\mPlease choose another directory which contains your data files." 37 | ) 38 | logger.info( 39 | f"find_files {len(raman_files)} files were found in the chosen data dir:\n\t{directory}" 40 | ) 41 | return raman_files 42 | -------------------------------------------------------------------------------- /src/raman_fitting/imports/files/file_indexer.py: -------------------------------------------------------------------------------- 1 | """Indexer for raman data files""" 2 | 3 | from itertools import filterfalse, groupby 4 | from pathlib import Path 5 | from typing import List, Sequence, TypeAlias 6 | 7 | from loguru import logger 8 | from pydantic import ( 9 | BaseModel, 10 | ConfigDict, 11 | Field, 12 | FilePath, 13 | NewPath, 14 | model_validator, 15 | ) 16 | from raman_fitting.config import settings 17 | from raman_fitting.imports.collector import collect_raman_file_infos 18 | from raman_fitting.imports.files.utils import ( 19 | load_dataset_from_file, 20 | write_dataset_to_file, 21 | ) 22 | from raman_fitting.imports.models import RamanFileInfo 23 | from tablib import Dataset 24 | 25 | from raman_fitting.imports.spectrum import SPECTRUM_FILETYPE_PARSERS 26 | 27 | RamanFileInfoSet: TypeAlias = Sequence[RamanFileInfo] 28 | 29 | 30 | class RamanFileIndex(BaseModel): 31 | model_config = ConfigDict(arbitrary_types_allowed=True) 32 | 33 | index_file: NewPath | FilePath | None = Field(None, validate_default=False) 34 | raman_files: RamanFileInfoSet | None = Field(None) 35 | dataset: Dataset | None = Field(None) 36 | force_reindex: bool = Field(False, validate_default=False) 37 | persist_to_file: bool = Field(True, validate_default=False) 38 | 39 | @model_validator(mode="after") 40 | def read_or_load_data(self) -> "RamanFileIndex": 41 | if not any([self.index_file, self.raman_files, self.dataset]): 42 | raise ValueError("Not all fields should be empty.") 43 | 44 | reload_from_file = validate_reload_from_index_file( 45 | self.index_file, self.force_reindex 46 | ) 47 | if reload_from_file: 48 | self.dataset = load_dataset_from_file(self.index_file) 49 | if not self.raman_files and self.dataset: 50 | self.raman_files = parse_dataset_to_index(self.dataset) 51 | return self 52 | 53 | if self.raman_files is not None: 54 | dataset_rf = cast_raman_files_to_dataset(self.raman_files) 55 | if self.dataset is not None: 56 | assert ( 57 | dataset_rf == self.dataset 58 | ), "Both dataset and raman_files provided and they are different." 59 | self.dataset = dataset_rf 60 | 61 | if self.dataset is not None: 62 | self.raman_files = parse_dataset_to_index(self.dataset) 63 | 64 | if self.raman_files is None and self.dataset is None: 65 | raise ValueError( 66 | "Index error, both raman_files and dataset are not provided." 67 | ) 68 | 69 | if self.persist_to_file and self.index_file is not None: 70 | write_dataset_to_file(self.index_file, self.dataset) 71 | 72 | return self 73 | 74 | 75 | def validate_reload_from_index_file( 76 | index_file: Path | None, force_reindex: bool 77 | ) -> bool: 78 | if index_file is None: 79 | logger.debug( 80 | "Index file not provided, index will not be reloaded or persisted." 81 | ) 82 | return False 83 | if index_file.exists() and not force_reindex: 84 | return True 85 | elif force_reindex: 86 | logger.warning( 87 | f"Index index_file file {index_file} exists and will be overwritten." 88 | ) 89 | else: 90 | logger.info( 91 | "Index index_file file does not exists but was asked to reload from it." 92 | ) 93 | return False 94 | 95 | 96 | def cast_raman_files_to_dataset(raman_files: RamanFileInfoSet) -> Dataset: 97 | headers = list(RamanFileInfo.model_fields.keys()) 98 | data = Dataset(headers=headers) 99 | for file in raman_files: 100 | data.append(file.model_dump(mode="json").values()) 101 | return data 102 | 103 | 104 | def parse_dataset_to_index(dataset: Dataset) -> RamanFileInfoSet: 105 | raman_files = [] 106 | for row in dataset: 107 | row_data = dict(zip(dataset.headers, row)) 108 | raman_files.append(RamanFileInfo(**row_data)) 109 | return raman_files 110 | 111 | 112 | class IndexSelector(BaseModel): 113 | raman_files: Sequence[RamanFileInfo] 114 | sample_ids: List[str] = Field(default_factory=list) 115 | sample_groups: List[str] = Field(default_factory=list) 116 | selection: Sequence[RamanFileInfo] = Field(default_factory=list) 117 | 118 | @model_validator(mode="after") 119 | def make_and_set_selection(self) -> "IndexSelector": 120 | rf_index = self.raman_files 121 | if not any([self.sample_groups, self.sample_ids]): 122 | self.selection = rf_index 123 | logger.debug( 124 | f"{self.__class__.__qualname__} selected {len(self.selection)} of {len(rf_index)}. " 125 | ) 126 | return self 127 | else: 128 | rf_index_groups = list( 129 | filter(lambda x: x.sample.group in self.sample_groups, rf_index) 130 | ) 131 | _pre_selected_samples = {i.sample.id for i in rf_index_groups} 132 | selected_sample_ids = filterfalse( 133 | lambda x: x in _pre_selected_samples, self.sample_ids 134 | ) 135 | rf_index_samples = list( 136 | filter(lambda x: x.sample.id in selected_sample_ids, rf_index) 137 | ) 138 | rf_selection_index = rf_index_groups + rf_index_samples 139 | self.selection = rf_selection_index 140 | logger.debug( 141 | f"{self.__class__.__qualname__} selected {len(self.selection)} of {rf_index}. " 142 | ) 143 | return self 144 | 145 | 146 | def groupby_sample_group(index: RamanFileInfoSet): 147 | """Generator for Sample Groups, yields the name of group and group of the index SampleGroup""" 148 | grouper = groupby(index, key=lambda x: x.sample.group) 149 | return grouper 150 | 151 | 152 | def groupby_sample_id(index: RamanFileInfoSet): 153 | """Generator for SampleIDs, yields the name of group, name of SampleID and group of the index of the SampleID""" 154 | grouper = groupby(index, key=lambda x: x.sample.id) 155 | return grouper 156 | 157 | 158 | def iterate_over_groups_and_sample_id(index: RamanFileInfoSet): 159 | for grp_name, grp in groupby_sample_group(index): 160 | for sample_id, sgrp in groupby_sample_group(grp): 161 | yield grp_name, grp, sample_id, sgrp 162 | 163 | 164 | def select_index_by_sample_groups(index: RamanFileInfoSet, sample_groups: List[str]): 165 | return filter(lambda x: x.sample.group in sample_groups, index) 166 | 167 | 168 | def select_index_by_sample_ids(index: RamanFileInfoSet, sample_ids: List[str]): 169 | return filter(lambda x: x.sample.id in sample_ids, index) 170 | 171 | 172 | def select_index( 173 | index: RamanFileInfoSet, sample_groups: List[str], sample_ids: List[str] 174 | ): 175 | group_selection = list(select_index_by_sample_groups(index, sample_groups)) 176 | sample_selection = list(select_index_by_sample_ids(index, sample_ids)) 177 | selection = group_selection + sample_selection 178 | return selection 179 | 180 | 181 | def collect_raman_file_index_info( 182 | raman_files: Sequence[Path] | None = None, **kwargs 183 | ) -> RamanFileInfoSet: 184 | """loops over the files and scrapes the index data from each file""" 185 | raman_files = list(raman_files) 186 | total_files = [] 187 | dirs = [i for i in raman_files if i.is_dir()] 188 | files = [i for i in raman_files if i.is_file()] 189 | total_files += files 190 | suffixes = [i.lstrip(".") for i in SPECTRUM_FILETYPE_PARSERS.keys()] 191 | for d1 in dirs: 192 | paths = [path for i in suffixes for path in d1.glob(f"*.{i}")] 193 | total_files += paths 194 | index, files = collect_raman_file_infos(total_files, **kwargs) 195 | logger.info(f"successfully made index {len(index)} from {len(files)} files") 196 | return index 197 | 198 | 199 | def initialize_index_from_source_files( 200 | files: Sequence[Path] | None = None, 201 | index_file: Path | None = None, 202 | force_reindex: bool = False, 203 | ) -> RamanFileIndex: 204 | raman_files = collect_raman_file_index_info(raman_files=files) 205 | # breakpoint() 206 | raman_index = RamanFileIndex( 207 | index_file=index_file, raman_files=raman_files, force_reindex=force_reindex 208 | ) 209 | logger.info( 210 | f"index_delegator index prepared with len {len(raman_index.raman_files)}" 211 | ) 212 | return raman_index 213 | 214 | 215 | def main(): 216 | """test run for indexer""" 217 | index_file = settings.destination_dir.joinpath("index.csv") 218 | raman_files = collect_raman_file_index_info() 219 | try: 220 | index_data = {"file": index_file, "raman_files": raman_files} 221 | raman_index = RamanFileIndex(**index_data) 222 | logger.debug(f"Raman Index len: {len(raman_index.dataset)}") 223 | select_index(raman_index.raman_files, sample_groups=["DW"], sample_ids=["DW38"]) 224 | except Exception as e: 225 | logger.error(f"Raman Index error: {e}") 226 | raman_index = None 227 | 228 | return raman_index 229 | 230 | 231 | if __name__ == "__main__": 232 | main() 233 | -------------------------------------------------------------------------------- /src/raman_fitting/imports/files/index_funcs.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from pathlib import Path 4 | 5 | from raman_fitting.imports.spectrum.datafile_parsers import load_dataset_from_file 6 | 7 | from loguru import logger 8 | 9 | 10 | def get_dtypes_filepath(index_file): 11 | _dtypes_filepath = index_file.with_name( 12 | index_file.stem + "_dtypes" + index_file.suffix 13 | ) 14 | return _dtypes_filepath 15 | 16 | 17 | def export_index(index, index_file): 18 | """saves the index to a defined Index file""" 19 | if index.empty: 20 | logger.info(f"{__name__} Empty index not exported") 21 | return 22 | 23 | if not index_file.parent.exists(): 24 | logger.info(f"{__name__} created parent dir: {index_file.parent}") 25 | index_file.parent.mkdir(exist_ok=True, parents=True) 26 | 27 | index.to_csv(index_file) 28 | 29 | _dtypes = index.dtypes.to_frame("dtypes") 30 | _dtypes.to_csv(get_dtypes_filepath(index_file)) 31 | 32 | logger.info( 33 | f"{__name__} Succesfully Exported Raman Index file to:\n\t{index_file}\nwith len({len(index)})." 34 | ) 35 | 36 | 37 | def load_index(index_file): 38 | """loads the index from from defined Index file""" 39 | if not index_file.exists(): 40 | logger.error( 41 | f"Error in load_index: {index_file} does not exists, starting reload index ... " 42 | ) 43 | return 44 | 45 | try: 46 | index = load_dataset_from_file(index_file) 47 | 48 | logger.info( 49 | f"Succesfully imported Raman Index file from {index_file}, with len({len(index)})" 50 | ) 51 | if len(index) != len(index): 52 | logger.error( 53 | f"""'Error in load_index from {index_file}, 54 | \nlength of loaded index not same as number of raman files 55 | \n starting reload index ... """ 56 | ) 57 | 58 | except Exception as e: 59 | logger.error( 60 | f"Error in load_index from {index_file},\n{e}\n starting reload index ... " 61 | ) 62 | 63 | 64 | def index_selection(index, **kwargs): 65 | """ 66 | Special selector on the index DataFrame 67 | 68 | Parameters 69 | ------- 70 | 71 | index 72 | pd.DataFrame containing the index of files 73 | should contains columns that are given in index_file_sample_cols and index_file_stat_cols 74 | default_selection str 75 | all or '' for empty default 76 | kwargs 77 | checks for keywords suchs as samplegroups, sampleIDs, extra 78 | meant for cli commands 79 | 80 | Returns 81 | ------- 82 | index_selection 83 | pd.DataFrame with a selection from the given input parameter index 84 | default returns empty DataFrame 85 | 86 | """ 87 | if index is None: 88 | return 89 | 90 | if not kwargs: 91 | return index 92 | 93 | default_selection = kwargs.get("default_selection", "all") 94 | if "normal" not in kwargs.get("run_mode", default_selection): 95 | default_selection = "all" 96 | index_selection = None 97 | logger.info( 98 | f"starting index selection from index({len(index)}) with:\n default selection: {default_selection}\n and {kwargs}" 99 | ) 100 | 101 | if not index: 102 | logger.warning("index selection index arg empty") 103 | return 104 | 105 | if default_selection == "all": 106 | index_selection = index.copy() 107 | 108 | if "samplegroups" in kwargs: 109 | index = list( 110 | filter(lambda x: x.sample.group in kwargs.get("samplegroups", []), index) 111 | ) 112 | if "sampleIDs" in kwargs: 113 | index = list( 114 | filter(lambda x: x.sample.id in kwargs.get("sampleIDs", []), index) 115 | ) 116 | 117 | if "extra" in kwargs: 118 | runq = kwargs.get("run") 119 | if "recent" in runq: 120 | grp = index.sort_values( 121 | "FileCreationDate", ascending=False 122 | ).FileCreationDate.unique()[0] 123 | 124 | index_selection = index.loc[index.FileCreationDate == grp] 125 | index_selection = index_selection.assign( 126 | **{ 127 | "DestDir": [ 128 | Path(i).joinpath(grp.strftime("%Y-%m-%d")) 129 | for i in index_selection.DestDir.values 130 | ] 131 | } 132 | ) 133 | 134 | logger.debug( 135 | f"finished index selection from index({len(index)}) with:\n {default_selection}\n and {kwargs}\n selection len({len(index_selection )})" 136 | ) 137 | 138 | if not index_selection: 139 | logger.warning("index selection empty. exiting") 140 | sys.exit() 141 | 142 | return index_selection 143 | 144 | 145 | def test_positions(sample_group_files): 146 | if not sample_group_files: 147 | return 148 | 149 | _files = [i.file for i in sample_group_files] 150 | _positions = [i.sample.position for i in sample_group_files] 151 | if len(set(_files)) != len(set(_positions)): 152 | logger.warning( 153 | f"{sample_group_files[0].sample} Unique files and positions not matching for {sample_group_files}" 154 | ) 155 | return sample_group_files 156 | -------------------------------------------------------------------------------- /src/raman_fitting/imports/files/index_helpers.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | from pathlib import Path 3 | 4 | 5 | def get_filename_id_from_path(path: Path) -> str: 6 | """ 7 | Makes the ID from a filepath 8 | 9 | Parameters 10 | ---------- 11 | path : Path 12 | DESCRIPTION. 13 | 14 | Returns 15 | ------- 16 | str: which contains hash(parent+suffix)_stem of path 17 | 18 | """ 19 | 20 | _parent_suffix_hash = hashlib.sha512( 21 | (str(path.parent) + path.suffix).encode("utf-8") 22 | ).hexdigest() 23 | filename_id = f"{_parent_suffix_hash}_{path.stem}" 24 | return filename_id 25 | -------------------------------------------------------------------------------- /src/raman_fitting/imports/files/metadata.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import Dict 3 | from datetime import date 4 | import datetime 5 | from typing import Any 6 | 7 | 8 | from pydantic import ( 9 | BaseModel, 10 | FilePath, 11 | PastDatetime, 12 | ) 13 | 14 | 15 | class FileMetaData(BaseModel): 16 | file: FilePath 17 | creation_date: date 18 | creation_datetime: PastDatetime 19 | modification_date: date 20 | modification_datetime: PastDatetime 21 | size: int 22 | 23 | 24 | def get_file_metadata(filepath: Path) -> Dict[str, Any]: 25 | """converting creation time and last mod time to datetime object""" 26 | fstat = filepath.stat() 27 | c_t = fstat.st_ctime 28 | m_t = fstat.st_mtime 29 | c_tdate, m_tdate = c_t, m_t 30 | 31 | try: 32 | c_t = datetime.datetime.fromtimestamp(fstat.st_ctime) 33 | m_t = datetime.datetime.fromtimestamp(fstat.st_mtime) 34 | c_tdate = c_t.date() 35 | m_tdate = m_t.date() 36 | except OverflowError: 37 | pass 38 | except OSError: 39 | pass 40 | ret = { 41 | "file": filepath, 42 | "creation_date": c_tdate, 43 | "creation_datetime": c_t, 44 | "modification_date": m_tdate, 45 | "modification_datetime": m_t, 46 | "size": fstat.st_size, 47 | } 48 | return ret 49 | -------------------------------------------------------------------------------- /src/raman_fitting/imports/files/utils.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import tablib.exceptions 4 | from tablib import Dataset 5 | 6 | from loguru import logger 7 | 8 | 9 | def write_dataset_to_file(file: Path, dataset: Dataset) -> None: 10 | if file.suffix == ".csv": 11 | with open(file, "w", newline="") as f: 12 | f.write(dataset.export("csv")) 13 | else: 14 | with open(file, "wb", encoding="utf-8") as f: 15 | f.write(dataset.export(file.suffix)) 16 | logger.debug(f"Wrote dataset {len(dataset)} to {file}") 17 | 18 | 19 | def load_dataset_from_file(file) -> Dataset: 20 | with open(file, "r", encoding="utf-8") as fh: 21 | try: 22 | imported_data = Dataset().load(fh) 23 | except tablib.exceptions.UnsupportedFormat as e: 24 | logger.warning(f"Read dataset {e} from {file}") 25 | imported_data = Dataset() 26 | 27 | logger.debug(f"Read dataset {len(imported_data)} from {file}") 28 | return imported_data 29 | -------------------------------------------------------------------------------- /src/raman_fitting/imports/files/validators.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from pathlib import Path 3 | 4 | logger = logging.getLogger(__name__) 5 | 6 | 7 | def validate_filepath(filepath: Path, max_bytesize=10**6) -> Path | None: 8 | if not isinstance(filepath, (Path, str)): 9 | raise TypeError("Argument given is not Path nor str") 10 | 11 | filepath = Path(filepath) 12 | 13 | if not filepath.exists(): 14 | logger.warning("File does not exist") 15 | return 16 | 17 | filesize = filepath.stat().st_size 18 | if filesize > max_bytesize: 19 | logger.warning(f"File too large ({filesize})=> skipped") 20 | return 21 | return filepath 22 | -------------------------------------------------------------------------------- /src/raman_fitting/imports/models.py: -------------------------------------------------------------------------------- 1 | import json 2 | from pydantic import ( 3 | BaseModel, 4 | FilePath, 5 | model_validator, 6 | Field, 7 | ConfigDict, 8 | ) 9 | 10 | from .samples.sample_id_helpers import extract_sample_metadata_from_filepath 11 | 12 | from .files.metadata import FileMetaData, get_file_metadata 13 | from .files.index_helpers import get_filename_id_from_path 14 | from .samples.models import SampleMetaData 15 | 16 | 17 | class RamanFileInfo(BaseModel): 18 | model_config = ConfigDict(arbitrary_types_allowed=True) 19 | 20 | file: FilePath 21 | filename_id: str = Field(None, init_var=False, validate_default=False) 22 | sample: SampleMetaData | str = Field(None, init_var=False, validate_default=False) 23 | file_metadata: FileMetaData | str = Field( 24 | None, init_var=False, validate_default=False 25 | ) 26 | 27 | @model_validator(mode="after") 28 | def set_filename_id(self) -> "RamanFileInfo": 29 | filename_id = get_filename_id_from_path(self.file) 30 | self.filename_id = filename_id 31 | return self 32 | 33 | @model_validator(mode="after") 34 | def parse_and_set_sample_from_file(self) -> "RamanFileInfo": 35 | sample = extract_sample_metadata_from_filepath(self.file) 36 | self.sample = sample 37 | return self 38 | 39 | @model_validator(mode="after") 40 | def parse_and_set_metadata_from_filepath(self) -> "RamanFileInfo": 41 | file_metadata = get_file_metadata(self.file) 42 | self.file_metadata = FileMetaData(**file_metadata) 43 | return self 44 | 45 | @model_validator(mode="after") 46 | def initialize_sample_and_file_from_dict(self) -> "RamanFileInfo": 47 | if isinstance(self.sample, dict): 48 | self.sample = SampleMetaData(**self.sample) 49 | elif isinstance(self.sample, str): 50 | _sample = json.loads(self.sample.replace("'", '"')) 51 | self.sample = SampleMetaData(**_sample) 52 | 53 | if isinstance(self.file_metadata, dict): 54 | self.file_metadata = FileMetaData(**self.file_metadata) 55 | elif isinstance(self.file_metadata, str): 56 | _file_metadata = json.loads(self.file_metadata.replace("'", '"')) 57 | self.file_metadata = SampleMetaData(**_file_metadata) 58 | 59 | return self 60 | -------------------------------------------------------------------------------- /src/raman_fitting/imports/samples/models.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel 2 | 3 | 4 | class SampleMetaData(BaseModel): 5 | id: str 6 | group: str 7 | position: int = 0 8 | -------------------------------------------------------------------------------- /src/raman_fitting/imports/samples/sample_id_helpers.py: -------------------------------------------------------------------------------- 1 | from typing import List, Tuple, Optional, Dict 2 | from pathlib import Path 3 | 4 | from .models import SampleMetaData 5 | 6 | 7 | def parse_string_to_sample_id_and_position( 8 | string: str, seps=("_", " ", "-") 9 | ) -> Tuple[str, int]: 10 | """ 11 | Parser for the filenames -> finds SampleID and sample position 12 | 13 | Parameters 14 | ---------- 15 | # ramanfile_string : str 16 | # The filepath which the is parsed 17 | seps : tuple of str default 18 | ordered collection of seperators tried for split 19 | default : ('_', ' ', '-') 20 | 21 | Returns 22 | ------- 23 | tuple of strings 24 | Collection of strings which contains the parsed elements. 25 | """ 26 | 27 | split = None 28 | first_sep_match_index = min( 29 | [n for n, i in enumerate(seps) if i in string], default=None 30 | ) 31 | first_sep_match = ( 32 | seps[first_sep_match_index] if first_sep_match_index is not None else None 33 | ) 34 | split = string.split(first_sep_match) 35 | _lensplit = len(split) 36 | 37 | if _lensplit == 0: 38 | sample_id, position = split[0], 0 39 | elif len(split) == 1: 40 | sample_id, position = split[0], 0 41 | elif len(split) == 2: 42 | sample_id = split[0] 43 | _pos_strnum = "".join(i for i in split[1] if i.isnumeric()) 44 | if _pos_strnum: 45 | position = int(_pos_strnum) 46 | else: 47 | position = split[1] 48 | elif len(split) >= 3: 49 | sample_id = "_".join(split[0:-1]) 50 | position = int("".join(filter(str.isdigit, split[-1]))) 51 | position = position or 0 52 | return (sample_id, position) 53 | 54 | 55 | def extract_sample_group_from_sample_id(sample_id: str, max_len=4) -> str: 56 | """adding the extra sample Group key from sample ID""" 57 | 58 | _len = len(sample_id) 59 | _maxalphakey = min( 60 | [n for n, i in enumerate(sample_id) if not str(i).isalpha()], default=_len 61 | ) 62 | _maxkey = min((_len, _maxalphakey, max_len)) 63 | sample_group_id = "".join([i for i in sample_id[0:_maxkey] if i.isalpha()]) 64 | return sample_group_id 65 | 66 | 67 | def overwrite_sample_id_from_mapper(sample_id: str, mapper: dict) -> str: 68 | """Takes an sample_id and potentially overwrites from a mapper dict""" 69 | sample_id_map = mapper.get(sample_id) 70 | if sample_id_map is not None: 71 | return sample_id_map 72 | return sample_id 73 | 74 | 75 | def overwrite_sample_group_id_from_parts( 76 | parts: List[str], sample_group_id: str, mapper: dict 77 | ) -> str: 78 | for k, val in mapper.items(): 79 | if k in parts: 80 | sample_group_id = val 81 | return sample_group_id 82 | 83 | 84 | def extract_sample_metadata_from_filepath( 85 | filepath: Path, sample_name_mapper: Optional[Dict[str, Dict[str, str]]] = None 86 | ) -> SampleMetaData: 87 | """parse the sample_id, position and sgrpID from stem""" 88 | stem = filepath.stem 89 | parts = filepath.parts 90 | 91 | sample_id, position = parse_string_to_sample_id_and_position(stem) 92 | 93 | if sample_name_mapper is not None: 94 | sample_id_mapper = sample_name_mapper.get("sample_id", {}) 95 | sample_id = overwrite_sample_id_from_mapper(sample_id, sample_id_mapper) 96 | sample_group_id = extract_sample_group_from_sample_id(sample_id) 97 | 98 | if sample_name_mapper is not None: 99 | sample_grp_mapper = sample_name_mapper.get("sample_group_id", {}) 100 | sample_group_id = overwrite_sample_group_id_from_parts( 101 | parts, sample_group_id, sample_grp_mapper 102 | ) 103 | 104 | sample = SampleMetaData( 105 | **{"id": sample_id, "group": sample_group_id, "position": position} 106 | ) 107 | return sample 108 | -------------------------------------------------------------------------------- /src/raman_fitting/imports/spectrum/__init__.py: -------------------------------------------------------------------------------- 1 | from .datafile_parsers import read_file_with_tablib 2 | 3 | SPECTRUM_FILETYPE_PARSERS = { 4 | ".txt": { 5 | "method": read_file_with_tablib, # load_spectrum_from_txt, 6 | }, 7 | ".xlsx": { 8 | "method": read_file_with_tablib, # pd.read_excel, 9 | }, 10 | ".csv": { 11 | "method": read_file_with_tablib, # pd.read_csv, 12 | "kwargs": {}, 13 | }, 14 | ".json": { 15 | "method": read_file_with_tablib, 16 | }, 17 | } 18 | -------------------------------------------------------------------------------- /src/raman_fitting/imports/spectrum/datafile_parsers.py: -------------------------------------------------------------------------------- 1 | from typing import Sequence 2 | from pathlib import Path 3 | 4 | import numpy as np 5 | from tablib import Dataset 6 | 7 | from loguru import logger 8 | 9 | 10 | def filter_data_for_numeric(data: Dataset): 11 | filtered_data = Dataset() 12 | filtered_data.headers = data.headers 13 | 14 | for row in data: 15 | try: 16 | digits_row = tuple(map(float, row)) 17 | except ValueError: 18 | continue 19 | except TypeError: 20 | continue 21 | 22 | if not any(i is None for i in digits_row): 23 | filtered_data.append(digits_row) 24 | return filtered_data 25 | 26 | 27 | def load_dataset_from_file(filepath, **kwargs) -> Dataset: 28 | with open(filepath, "r") as fh: 29 | imported_data = Dataset(**kwargs).load(fh) 30 | return imported_data 31 | 32 | 33 | def check_header_keys(dataset: Dataset, header_keys: Sequence[str]): 34 | if set(header_keys) not in set(dataset.headers): 35 | first_row = list(dataset.headers) 36 | dataset.insert(0, first_row) 37 | dataset.headers = header_keys 38 | return dataset 39 | 40 | 41 | def read_file_with_tablib( 42 | filepath: Path, header_keys: Sequence[str], sort_by=None 43 | ) -> Dataset: 44 | data = load_dataset_from_file(filepath) 45 | data = check_header_keys(data, header_keys) 46 | numeric_data = filter_data_for_numeric(data) 47 | sort_by = header_keys[0] if sort_by is None else sort_by 48 | sorted_data = numeric_data.sort(sort_by) 49 | return sorted_data 50 | 51 | 52 | def read_text(filepath, max_bytes=10**6, encoding="utf-8", errors=None): 53 | """additional read text method for raw text data inspection""" 54 | _text = "read_text_method" 55 | filesize = filepath.stat().st_size 56 | if filesize < max_bytes: 57 | try: 58 | _text = filepath.read_text(encoding=encoding, errors=errors) 59 | # _text.splitlines() 60 | except Exception as exc: 61 | # IDEA specify which Exceptions are expected 62 | _text += "\nread_error" 63 | logger.warning(f"file read text error => skipped.\n{exc}") 64 | else: 65 | _text += "\nfile_too_large" 66 | logger.warning(f" file too large ({filesize})=> skipped") 67 | 68 | return _text 69 | 70 | 71 | def use_np_loadtxt(filepath, usecols=(0, 1), **kwargs) -> np.array: 72 | array = np.array([]) 73 | try: 74 | array = np.loadtxt(filepath, usecols=usecols, **kwargs) 75 | except IndexError: 76 | logger.debug(f"IndexError called np genfromtxt for {filepath}") 77 | array = np.genfromtxt(filepath, invalid_raise=False) 78 | except ValueError: 79 | logger.debug(f"ValueError called np genfromtxt for {filepath}") 80 | array = np.genfromtxt(filepath, invalid_raise=False) 81 | except Exception as exc: 82 | _msg = f"Can not load data from txt file: {filepath}\n{exc}" 83 | logger.error(_msg) 84 | raise ValueError(_msg) from exc 85 | return array 86 | -------------------------------------------------------------------------------- /src/raman_fitting/imports/spectrum/spectra_collection.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import numpy as np 4 | 5 | from pydantic import BaseModel, ValidationError, model_validator 6 | 7 | from raman_fitting.models.deconvolution.spectrum_regions import RegionNames 8 | from raman_fitting.models.spectrum import SpectrumData 9 | 10 | 11 | class SpectraDataCollection(BaseModel): 12 | spectra: List[SpectrumData] 13 | region_name: RegionNames 14 | mean_spectrum: SpectrumData | None = None 15 | 16 | @model_validator(mode="after") 17 | def check_spectra_have_same_label(self) -> "SpectraDataCollection": 18 | """checks member of lists""" 19 | labels = set(i.label for i in self.spectra) 20 | if len(labels) > 1: 21 | raise ValidationError(f"Spectra have different labels {labels}") 22 | return self 23 | 24 | @model_validator(mode="after") 25 | def check_spectra_have_same_region(self) -> "SpectraDataCollection": 26 | """checks member of lists""" 27 | region_names = set(i.region_name for i in self.spectra) 28 | if len(region_names) > 1: 29 | raise ValidationError(f"Spectra have different region_names {region_names}") 30 | return self 31 | 32 | @model_validator(mode="after") 33 | def check_spectra_lengths(self) -> "SpectraDataCollection": 34 | unique_lengths_rs = set(len(i.ramanshift) for i in self.spectra) 35 | unique_lengths_int = set(len(i.intensity) for i in self.spectra) 36 | if len(unique_lengths_rs) > 1: 37 | raise ValidationError( 38 | f"The spectra have different ramanshift lengths where they should be the same.\n\t{unique_lengths_rs}" 39 | ) 40 | if len(unique_lengths_int) > 1: 41 | raise ValidationError( 42 | f"The spectra have different intensity lengths where they should be the same.\n\t{unique_lengths_int}" 43 | ) 44 | 45 | return self 46 | 47 | @model_validator(mode="after") 48 | def set_mean_spectrum(self) -> "SpectraDataCollection": 49 | # wrap this in a ProcessedSpectraCollection model 50 | mean_int = np.mean(np.vstack([i.intensity for i in self.spectra]), axis=0) 51 | mean_ramanshift = np.mean( 52 | np.vstack([i.ramanshift for i in self.spectra]), axis=0 53 | ) 54 | source_files = list(set(i.source for i in self.spectra)) 55 | _label = "".join(map(str, set(i.label for i in self.spectra))) 56 | mean_spec = SpectrumData( 57 | ramanshift=mean_ramanshift, 58 | intensity=mean_int, 59 | label=f"clean_{self.region_name}_mean", 60 | region_name=self.region_name, 61 | source=source_files, 62 | ) 63 | self.mean_spectrum = mean_spec 64 | -------------------------------------------------------------------------------- /src/raman_fitting/imports/spectrum/validators.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | import logging 3 | 4 | import pandas as pd 5 | import numpy as np 6 | from tablib import Dataset 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | 11 | @dataclass 12 | class ValidateSpectrumValues: 13 | spectrum_key: str 14 | min: float 15 | max: float 16 | len: int 17 | 18 | def validate_min(self, spectrum_data: pd.DataFrame): 19 | data_min = min(spectrum_data[self.spectrum_key]) 20 | return np.isclose(data_min, self.min, rtol=0.2) 21 | 22 | def validate_max(self, spectrum_data: pd.DataFrame): 23 | data_max = max(spectrum_data[self.spectrum_key]) 24 | return data_max <= self.max 25 | 26 | def validate_len(self, spectrum_data: pd.DataFrame): 27 | data_len = len(spectrum_data) 28 | return np.isclose(data_len, self.len, rtol=0.1) 29 | 30 | def validate(self, spectrum_data: pd.DataFrame): 31 | ret = [] 32 | for _func in [self.validate_min, self.validate_max, self.validate_len]: 33 | ret.append(_func(spectrum_data)) 34 | return all(ret) 35 | 36 | 37 | def validate_spectrum_keys_expected_values( 38 | spectrum_data: Dataset, expected_values: ValidateSpectrumValues 39 | ): 40 | if expected_values.spectrum_key not in spectrum_data.columns: 41 | logger.error( 42 | f"The expected value type {expected_values.spectrum_key} is not in the columns {spectrum_data.columns}" 43 | ) 44 | if spectrum_data.empty: 45 | logger.error("Spectrum data is empty") 46 | return 47 | 48 | validation = expected_values.validate(spectrum_data) 49 | 50 | if not validation: 51 | logger.warning( 52 | f"The {expected_values.spectrum_key} of this spectrum does not match the expected values {expected_values}" 53 | ) 54 | -------------------------------------------------------------------------------- /src/raman_fitting/imports/spectrumdata_parser.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on Mon Jul 5 21:09:06 2021 3 | 4 | @author: DW 5 | """ 6 | 7 | from dataclasses import dataclass, field 8 | import hashlib 9 | 10 | from pathlib import Path 11 | from functools import partial 12 | 13 | from typing import Callable 14 | 15 | from tablib import Dataset 16 | 17 | from .spectrum.validators import ValidateSpectrumValues 18 | from .files.validators import validate_filepath 19 | from .spectrum import SPECTRUM_FILETYPE_PARSERS 20 | 21 | from raman_fitting.models.spectrum import SpectrumData 22 | 23 | from loguru import logger 24 | 25 | 26 | spectrum_data_keys = ("ramanshift", "intensity") 27 | 28 | ramanshift_expected_values = ValidateSpectrumValues( 29 | spectrum_key="ramanshift", min=-95, max=3650, len=1600 30 | ) 31 | intensity_expected_values = ValidateSpectrumValues( 32 | spectrum_key="intensity", min=0, max=1e4, len=1600 33 | ) 34 | 35 | spectrum_keys_expected_values = { 36 | "ramanshift": ramanshift_expected_values, 37 | "intensity": intensity_expected_values, 38 | } 39 | 40 | 41 | def get_file_parser(filepath: Path) -> Callable[[Path], Dataset]: 42 | "Get callable file parser function." 43 | suffix = filepath.suffix 44 | parser = SPECTRUM_FILETYPE_PARSERS[suffix]["method"] 45 | kwargs = SPECTRUM_FILETYPE_PARSERS[suffix].get("kwargs", {}) 46 | return partial(parser, **kwargs) 47 | 48 | 49 | @dataclass 50 | class SpectrumReader: 51 | """ 52 | Reads a spectrum from a 'raw' data file Path or str 53 | 54 | with spectrum_data_keys "ramanshift" and "intensity". 55 | Double checks the values 56 | Sets a hash attribute afterwards 57 | """ 58 | 59 | filepath: Path | str 60 | spectrum_data_keys: tuple = field(default=spectrum_data_keys, repr=False) 61 | 62 | spectrum: SpectrumData = field(default=None) 63 | label: str = "raw" 64 | region_name: str = "full" 65 | spectrum_hash: str = field(default=None, repr=False) 66 | spectrum_length: int = field(default=0, init=False) 67 | 68 | def __post_init__(self): 69 | super().__init__() 70 | 71 | self.filepath = validate_filepath(self.filepath) 72 | self.spectrum_length = 0 73 | 74 | if self.filepath is None: 75 | raise ValueError(f"File is not valid. {self.filepath}") 76 | parser = get_file_parser(self.filepath) 77 | parsed_spectrum = parser(self.filepath, self.spectrum_data_keys) 78 | if parsed_spectrum is None: 79 | return 80 | for spectrum_key in parsed_spectrum.headers: 81 | if spectrum_key not in spectrum_keys_expected_values: 82 | continue 83 | validator = spectrum_keys_expected_values[spectrum_key] 84 | valid = validator.validate(parsed_spectrum) 85 | if not valid: 86 | logger.warning( 87 | f"The values of {spectrum_key} of this spectrum are invalid. {validator}" 88 | ) 89 | spec_init = { 90 | "label": self.label, 91 | "region_name": self.region_name, 92 | "source": self.filepath, 93 | } 94 | _parsed_spec_dict = { 95 | k: parsed_spectrum[k] for k in spectrum_keys_expected_values.keys() 96 | } 97 | spec_init.update(_parsed_spec_dict) 98 | self.spectrum = SpectrumData(**spec_init) 99 | 100 | self.spectrum_hash = self.get_hash_text(self.spectrum) 101 | self.spectrum_length = len(self.spectrum) 102 | 103 | @staticmethod 104 | def get_hash_text(data, hash_text_encoding="utf-8"): 105 | text = str(data) 106 | text_hash = hashlib.sha256(text.encode(hash_text_encoding)).hexdigest() 107 | return text_hash 108 | 109 | def __repr__(self): 110 | _txt = f"Spectrum({self.filepath.name}, len={self.spectrum_length})" 111 | return _txt 112 | 113 | def quickplot(self): 114 | """Plot for quickly checking the spectrum""" 115 | try: 116 | self.spectrum.plot(x="ramanshift", y="intensity") 117 | except TypeError: 118 | logger.warning("No numeric data to plot") 119 | -------------------------------------------------------------------------------- /src/raman_fitting/interfaces/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MyPyDavid/raman-fitting/554760daa97161f7de38b69a2afd4897d2020976/src/raman_fitting/interfaces/__init__.py -------------------------------------------------------------------------------- /src/raman_fitting/interfaces/argparse_cli.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import argparse 5 | 6 | from raman_fitting.config.path_settings import RunModes 7 | from loguru import logger 8 | from .utils import get_package_version 9 | 10 | 11 | def main(): 12 | """ 13 | The command line interface for raman_fitting 14 | """ 15 | 16 | parser = argparse.ArgumentParser( 17 | description="Command-line interface for raman_fitting package main." 18 | ) 19 | 20 | parser.add_argument( 21 | "-M", 22 | "-m", 23 | "--run-mode", 24 | type=RunModes, 25 | # choices=, 26 | help="running mode of package, for testing", 27 | default="normal", 28 | ) 29 | 30 | parser.add_argument( 31 | "-sIDs", 32 | "--sample_ids", 33 | nargs="+", 34 | default=[], 35 | help="Selection of names of SampleIDs from index to run over.", 36 | ) 37 | 38 | parser.add_argument( 39 | "-sGrps", 40 | "--sample_groups", 41 | nargs="+", 42 | default=[], 43 | help="Selection of names of sample groups from index to run over.", 44 | ) 45 | 46 | parser.add_argument( 47 | "--fit_model_specific_names", 48 | nargs="+", 49 | default=[], 50 | help="Selection of names of the composite LMfit models to use for fitting.", 51 | ) 52 | 53 | parser.add_argument( 54 | "--version", 55 | action="version", 56 | version="%(prog)s {}".format(get_package_version()), 57 | help="Prints out the current version of the raman_fitting distribution, via importlib.metadata.version", 58 | ) 59 | 60 | # Execute the parse_args() method 61 | args = parser.parse_args() 62 | 63 | # import the raman_fitting package 64 | import raman_fitting as rf 65 | 66 | extra_kwargs = {} 67 | if args.run_mode == RunModes.EXAMPLES: 68 | extra_kwargs.update( 69 | {"fit_model_specific_names": ["2peaks", "3peaks", "4peaks"]} 70 | ) 71 | logger.info(f"Starting raman_fitting with CLI args:\n{args}") 72 | kwargs = {**vars(args), **extra_kwargs} 73 | _main_run = rf.MainDelegator(**kwargs) 74 | -------------------------------------------------------------------------------- /src/raman_fitting/interfaces/typer_cli.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional 2 | from typing_extensions import Annotated 3 | 4 | from pathlib import Path 5 | from enum import StrEnum, auto 6 | from loguru import logger 7 | from raman_fitting.config.path_settings import RunModes 8 | from raman_fitting.delegating.main_delegator import MainDelegator 9 | from raman_fitting.imports.files.file_indexer import initialize_index_from_source_files 10 | from .utils import get_package_version 11 | 12 | import typer 13 | 14 | 15 | class MakeTypes(StrEnum): 16 | INDEX = auto() 17 | CONFIG = auto() 18 | EXAMPLE = auto() 19 | 20 | 21 | __version__ = "0.1.0" 22 | 23 | 24 | def version_callback(value: bool): 25 | if value: 26 | package_version = get_package_version() 27 | typer_cli_version = f"Awesome Typer CLI Version: {__version__}" 28 | print(f"{package_version}\n{typer_cli_version}") 29 | raise typer.Exit() 30 | 31 | 32 | app = typer.Typer() 33 | state = {"verbose": False} 34 | 35 | 36 | @app.command() 37 | def run( 38 | models: Annotated[ 39 | List[str], 40 | typer.Option( 41 | default_factory=list, help="Selection of models to use for deconvolution." 42 | ), 43 | ], 44 | sample_ids: Annotated[ 45 | List[str], 46 | typer.Option( 47 | default_factory=list, 48 | help="Selection of names of SampleIDs from index to run over.", 49 | ), 50 | ], 51 | group_ids: Annotated[ 52 | List[str], 53 | typer.Option( 54 | default_factory=list, 55 | help="Selection of names of sample groups from index to run over.", 56 | ), 57 | ], 58 | fit_models: Annotated[ 59 | List[str], 60 | typer.Option( 61 | default_factory=list, 62 | help="Selection of names of the composite LMfit models to use for fitting.", 63 | ), 64 | ], 65 | run_mode: Annotated[RunModes, typer.Argument()] = RunModes.NORMAL, 66 | multiprocessing: Annotated[bool, typer.Option("--multiprocessing")] = False, 67 | ): 68 | if run_mode is None: 69 | print("No make run mode passed") 70 | raise typer.Exit() 71 | kwargs = {"run_mode": run_mode, "use_multiprocessing": multiprocessing} 72 | if run_mode == RunModes.EXAMPLES: 73 | kwargs.update( 74 | { 75 | "fit_model_specific_names": [ 76 | "2peaks", 77 | "3peaks", 78 | "4peaks", 79 | "2nd_4peaks", 80 | ], 81 | "sample_groups": ["test"], 82 | } 83 | ) 84 | logger.info(f"Starting raman_fitting with CLI args:\n{run_mode}") 85 | _main_run = MainDelegator(**kwargs) 86 | 87 | 88 | @app.command() 89 | def make( 90 | make_type: Annotated[MakeTypes, typer.Argument()], 91 | source_files: Annotated[List[Path], typer.Option()], 92 | index_file: Annotated[Path, typer.Option()] = None, 93 | force_reindex: Annotated[bool, typer.Option("--force-reindex")] = False, 94 | ): 95 | if make_type is None: 96 | print("No make type args passed") 97 | raise typer.Exit() 98 | if index_file: 99 | index_file = index_file.resolve() 100 | if make_type == MakeTypes.INDEX: 101 | initialize_index_from_source_files( 102 | files=source_files, index_file=index_file, force_reindex=force_reindex 103 | ) 104 | 105 | elif make_type == MakeTypes.CONFIG: 106 | pass # make config 107 | 108 | 109 | @app.callback() 110 | def main( 111 | verbose: bool = False, 112 | version: Annotated[ 113 | Optional[bool], typer.Option("--version", callback=version_callback) 114 | ] = None, 115 | ): 116 | """ 117 | Manage raman_fitting in the awesome CLI app. 118 | """ 119 | if verbose: 120 | print("Will write verbose output") 121 | state["verbose"] = True 122 | 123 | 124 | if __name__ == "__main__": 125 | app() 126 | -------------------------------------------------------------------------------- /src/raman_fitting/interfaces/utils.py: -------------------------------------------------------------------------------- 1 | def get_package_version() -> str: 2 | try: 3 | import importlib.metadata 4 | 5 | _version = importlib.metadata.version("raman_fitting") 6 | except ImportError: 7 | _version = "version.not.found" 8 | 9 | _version_text = f"raman_fitting version: {_version}" 10 | return _version_text 11 | -------------------------------------------------------------------------------- /src/raman_fitting/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MyPyDavid/raman-fitting/554760daa97161f7de38b69a2afd4897d2020976/src/raman_fitting/models/__init__.py -------------------------------------------------------------------------------- /src/raman_fitting/models/deconvolution/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MyPyDavid/raman-fitting/554760daa97161f7de38b69a2afd4897d2020976/src/raman_fitting/models/deconvolution/__init__.py -------------------------------------------------------------------------------- /src/raman_fitting/models/deconvolution/base_model.py: -------------------------------------------------------------------------------- 1 | """The members of the validated collection of BasePeaks are assembled here into fitting Models""" 2 | 3 | import logging 4 | from typing import Optional, Dict 5 | from warnings import warn 6 | 7 | from lmfit.models import Model as LMFitModel 8 | from pydantic import ( 9 | BaseModel, 10 | Field, 11 | ConfigDict, 12 | model_validator, 13 | ) 14 | 15 | 16 | from raman_fitting.models.deconvolution.base_peak import ( 17 | BasePeak, 18 | get_peaks_from_peak_definitions, 19 | ) 20 | from raman_fitting.models.deconvolution.lmfit_parameter import ( 21 | construct_lmfit_model_from_components, 22 | ) 23 | from raman_fitting.models.splitter import RegionNames 24 | 25 | logger = logging.getLogger(__name__) 26 | 27 | SUBSTRATE_PEAK = "Si1_peak" 28 | SEP = "+" 29 | SUFFIX = "_" 30 | 31 | 32 | class BaseLMFitModelWarning(UserWarning): 33 | pass 34 | 35 | 36 | class BaseLMFitModel(BaseModel): 37 | """ 38 | This Model class combines the collection of valid peaks from BasePeak into a regression model 39 | of type lmfit.model.CompositeModel 40 | that is compatible with the lmfit Model and fit functions. 41 | The model_name, include_substrate and lmfit_model attributes are kept 42 | consistent w.r.t. their meaning when they are set. 43 | 44 | Parameters 45 | -------- 46 | verbose_name: string ==> is converted to lmfit Model object 47 | include_substrate: bool ==> toggle between True and False to include a substrate peak 48 | 49 | """ 50 | 51 | model_config = ConfigDict(arbitrary_types_allowed=True) 52 | 53 | name: str 54 | peaks: str 55 | peak_collection: Dict[str, BasePeak] = Field( 56 | default_factory=get_peaks_from_peak_definitions, 57 | validate_default=True, 58 | repr=False, 59 | ) 60 | lmfit_model: LMFitModel = Field(None, init_var=False, repr=False) 61 | region_name: RegionNames 62 | 63 | @property 64 | def has_substrate(self): 65 | if not self.lmfit_model.components: 66 | return False 67 | comps = set(map(lambda x: x.prefix, self.lmfit_model.components)) 68 | substrate_comps = set( 69 | [i.lmfit_model.prefix for i in self.substrate_peaks.values()] 70 | ) 71 | return substrate_comps.issubset(comps) 72 | 73 | def add_substrate(self): 74 | if self.has_substrate: 75 | warn( 76 | f"{self.__class__.__name__} already has substrate.", 77 | BaseLMFitModelWarning, 78 | ) 79 | return 80 | 81 | for name in self.substrate_peaks.keys(): 82 | self.peaks += SEP + name 83 | self.check_lmfit_model() 84 | 85 | def remove_substrate(self): 86 | if not self.has_substrate: 87 | warn( 88 | f"{self.__class__.__name__} has no substrate to remove.", 89 | BaseLMFitModelWarning, 90 | ) 91 | return 92 | _peaks = self.peaks.split(SEP) 93 | for name in self.substrate_peaks.keys(): 94 | _peaks.remove(name) 95 | self.peaks = SEP.join(_peaks) 96 | self.check_lmfit_model() 97 | 98 | @property 99 | def substrate_peaks(self): 100 | return {k: val for k, val in self.peak_collection.items() if val.is_substrate} 101 | 102 | @model_validator(mode="after") 103 | def check_peaks_in_peak_collection(self) -> "BaseLMFitModel": 104 | peak_names_split = self.peaks.split(SEP) 105 | default_peak_names = self.peak_collection.keys() 106 | valid_peaks = set(peak_names_split).union(set(default_peak_names)) 107 | assert valid_peaks 108 | new_peak_names = SEP.join([i for i in peak_names_split if i in valid_peaks]) 109 | self.peaks = new_peak_names 110 | return self 111 | 112 | @model_validator(mode="after") 113 | def check_lmfit_model(self) -> "BaseLMFitModel": 114 | lmfit_model = construct_lmfit_model(self.peaks, self.peak_collection) 115 | self.lmfit_model = lmfit_model 116 | return self 117 | 118 | 119 | def construct_lmfit_model( 120 | peaks: str, peak_collection: Dict[str, BasePeak] 121 | ) -> LMFitModel: 122 | peak_names = peaks.split(SEP) 123 | base_peaks = [peak_collection[i] for i in peak_names if i in peak_collection] 124 | if not base_peaks: 125 | raise ValueError(f"Could not find matching peaks for {peaks}") 126 | base_peaks_lmfit = [i.lmfit_model for i in base_peaks] 127 | lmfit_model = construct_lmfit_model_from_components(base_peaks_lmfit) 128 | return lmfit_model 129 | 130 | 131 | def get_models_and_peaks_from_definitions( 132 | models_and_peaks_definitions: Optional[Dict] = None, 133 | ) -> Dict[str, Dict[str, BaseLMFitModel]]: 134 | peak_collection = get_peaks_from_peak_definitions( 135 | peak_definitions=models_and_peaks_definitions 136 | ) 137 | models_settings = { 138 | k: val.get("models") 139 | for k, val in models_and_peaks_definitions.items() 140 | if "models" in val 141 | } 142 | all_models = {} 143 | for region_name, region_model_settings in models_settings.items(): 144 | if region_model_settings is None: 145 | continue 146 | all_models[region_name] = {} 147 | for model_name, model_peaks in region_model_settings.items(): 148 | all_models[region_name][model_name] = BaseLMFitModel( 149 | name=model_name, 150 | peaks=model_peaks, 151 | peak_collection=peak_collection, 152 | region_name=region_name, 153 | ) 154 | return all_models 155 | 156 | 157 | def main(): 158 | models = get_models_and_peaks_from_definitions() 159 | print("Models: ", len(models)) 160 | 161 | 162 | if __name__ == "__main__": 163 | main() 164 | -------------------------------------------------------------------------------- /src/raman_fitting/models/deconvolution/base_peak.py: -------------------------------------------------------------------------------- 1 | from enum import StrEnum 2 | from typing import List, Optional, Dict 3 | 4 | from pydantic import ( 5 | BaseModel, 6 | ConfigDict, 7 | InstanceOf, 8 | Field, 9 | field_validator, 10 | model_validator, 11 | ) 12 | from lmfit import Parameters 13 | from lmfit.models import Model 14 | 15 | from raman_fitting.models.deconvolution.lmfit_parameter import ( 16 | LMFIT_MODEL_MAPPER, 17 | LMFitParameterHints, 18 | parmeter_to_dict, 19 | ) 20 | from raman_fitting.config.default_models import load_config_from_toml_files 21 | from raman_fitting.utils.string_operations import prepare_text_from_param 22 | 23 | ParamHintDict = Dict[str, Dict[str, Optional[float | bool | str]]] 24 | 25 | 26 | class BasePeakWarning(UserWarning): # pragma: no cover 27 | pass 28 | 29 | 30 | PEAK_TYPE_OPTIONS = StrEnum("PEAK_TYPE_OPTIONS", ["Lorentzian", "Gaussian", "Voigt"]) 31 | 32 | 33 | def get_lmfit_model_from_peak_type(peak_type: str, prefix: str = "") -> Optional[Model]: 34 | """returns the lmfit model instance according to the chosen peak type and sets the prefix from peak_name""" 35 | model = None 36 | 37 | capitalized = peak_type.capitalize() 38 | try: 39 | lmfit_model_class = LMFIT_MODEL_MAPPER[capitalized] 40 | model = lmfit_model_class(prefix=prefix) 41 | except IndexError: 42 | raise NotImplementedError( 43 | f'This peak type or model "{peak_type}" has not been implemented.' 44 | ) 45 | return model 46 | 47 | 48 | class BasePeak(BaseModel): 49 | """ 50 | -------- 51 | Example usage 52 | -------- 53 | Base class for easier definition of typical intensity peaks found in the 54 | raman spectra. 55 | 56 | The go al of is this metaclass is to be able to more easily write 57 | peak class definitions (for possible user input). It tries to find three 58 | fields in the definition, which are requiredfor a LMfit model creation, 59 | namely: peak_name, peak_type and the param hints. 60 | 61 | peak_name: 62 | arbitrary name as prefix for the peak 63 | peak_type: 64 | defines the lineshape of the peak, the following options are implemented: 65 | "Lorentzian", "Gaussian", "Voigt" 66 | params_hints: 67 | initial values for the parameters of the peak, at least 68 | a value for the center position of the peak should be given. 69 | 70 | It tries to find these fields in different sources such as: the class definition 71 | with only class attributes, init attributes or even in the keywords arguments. 72 | The FieldsTracker class instance (fco) keeps track of the definition in different 73 | sources and can check when all are ready. If there are multiple sources with definitions 74 | for the same field than the source with highest priority will be chosen (based on tuple order). 75 | Each field is a propery which validates the assigments. 76 | 77 | Sort of wrapper for lmfit.model definition. 78 | Several of these peaks combined are used to make the lmfit CompositeModel 79 | (composed in the fit_models module), which will be used for the fit. 80 | 81 | -------- 82 | Example usage 83 | -------- 84 | 85 | "Example class definition with attribute definitions" 86 | class New_peak(metaclass=BasePeak): 87 | "New peak child class for easier definition" 88 | 89 | param_hints = { 'center': {'value': 2435,'min': 2400, 'max': 2550}} 90 | peak_type = 'Voigt' #'Voigt' 91 | peak_name ='R2D2' 92 | 93 | New_peak().lmfit_model == 94 | 95 | "Example class definition with keyword arguments" 96 | 97 | New_peak = BasePeak('new', 98 | peak_name='D1', 99 | peak_type= 'Lorentzian', 100 | param_hints = { 'center': {'value': 1500}} 101 | ) 102 | New_peak() 103 | """ 104 | 105 | model_config = ConfigDict(arbitrary_types_allowed=True, from_attributes=True) 106 | 107 | peak_name: str 108 | param_hints: Optional[Parameters | List[LMFitParameterHints] | ParamHintDict] = None 109 | peak_type: Optional[str] = None 110 | is_substrate: Optional[bool] = False 111 | is_for_normalization: Optional[bool] = False 112 | docstring: Optional[str] = Field(None, repr=False) 113 | lmfit_model: Optional[InstanceOf[Model]] = None 114 | 115 | @field_validator("peak_type") 116 | @classmethod 117 | def check_peak_type(cls, v: Optional[str]) -> Optional[str]: 118 | if v is None: 119 | return v 120 | if isinstance(v, str): 121 | try: 122 | v = PEAK_TYPE_OPTIONS[v].name 123 | return v 124 | except KeyError: 125 | raise KeyError( 126 | f"peak_type is not in {map(lambda x: x.name, PEAK_TYPE_OPTIONS)}, but {v}" 127 | ) 128 | elif isinstance(v, PEAK_TYPE_OPTIONS): 129 | v = v.name 130 | return v 131 | else: 132 | raise TypeError(f"peak_type is not a string or enum, but {type(v)}") 133 | 134 | @field_validator("param_hints") 135 | @classmethod 136 | def check_param_hints( 137 | cls, v: Optional[Parameters | List[LMFitParameterHints] | ParamHintDict] 138 | ) -> Optional[Parameters]: 139 | if v is None: 140 | return v 141 | if isinstance(v, Parameters): 142 | return v 143 | 144 | if isinstance(v, dict): 145 | valid_p_hints = [LMFitParameterHints(name=k, **val) for k, val in v.items()] 146 | 147 | if isinstance(v, list): 148 | assert all(isinstance(i, LMFitParameterHints) for i in v) 149 | 150 | pars_hints = [i.parameter for i in valid_p_hints] 151 | params = Parameters() 152 | params.add_many(*pars_hints) 153 | return params 154 | 155 | @model_validator(mode="after") 156 | def check_lmfit_model(self) -> "BasePeak": 157 | if self.lmfit_model is not None: 158 | if isinstance(self.lmfit_model, Model): 159 | return self 160 | else: 161 | raise ValueError( 162 | f"lmfit_model is not a Model instance, but {type(self.lmfit_model)}" 163 | ) 164 | peak_type = self.peak_type 165 | if peak_type is None: 166 | raise ValueError("peak_type is None") 167 | 168 | lmfit_model = get_lmfit_model_from_peak_type( 169 | peak_type, prefix=self.peak_name_prefix 170 | ) 171 | if lmfit_model is None: 172 | raise ValueError("lmfit_model is None") 173 | 174 | if self.param_hints is not None: 175 | for k, v in self.param_hints.items(): 176 | par_dict = parmeter_to_dict(v) 177 | lmfit_model.set_param_hint(k, **par_dict) 178 | self.lmfit_model = lmfit_model 179 | return self 180 | 181 | @property 182 | def peak_name_prefix(self): 183 | if not self.peak_name: 184 | return "" 185 | if self.peak_name.endswith("_"): 186 | return self.peak_name 187 | return self.peak_name + "_" 188 | 189 | def __str__(self): 190 | _repr = f"{self.__class__.__name__}('{self.peak_name}'" 191 | if self.lmfit_model is None: 192 | _repr += ": no Model set" 193 | _repr += f", {self.lmfit_model}" 194 | param_text = make_string_from_param_hints(self.param_hints) 195 | _repr += f"{param_text})" 196 | return _repr 197 | 198 | 199 | def make_string_from_param_hints(param_hints: Parameters) -> str: 200 | param_center = param_hints.get("center", {}) 201 | text = prepare_text_from_param(param_center) 202 | return text 203 | 204 | 205 | def get_peaks_from_peak_definitions( 206 | peak_definitions: Optional[Dict] = None, 207 | ) -> Dict[str, BasePeak]: 208 | if peak_definitions is None: 209 | peak_definitions = load_config_from_toml_files() 210 | peak_settings = { 211 | k: val.get("peaks") for k, val in peak_definitions.items() if "peaks" in val 212 | } 213 | peak_models = {} 214 | for peak_type, peak_type_defs in peak_settings.items(): 215 | if peak_type_defs is None: 216 | continue 217 | for peak_name, peak_def in peak_type_defs.items(): 218 | peak_models[peak_name] = BasePeak(**peak_def) 219 | return peak_models 220 | -------------------------------------------------------------------------------- /src/raman_fitting/models/deconvolution/init_models.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass, field 2 | import logging 3 | from typing import Dict 4 | 5 | from raman_fitting.config.default_models import load_config_from_toml_files 6 | from raman_fitting.models.deconvolution.base_model import ( 7 | get_models_and_peaks_from_definitions, 8 | ) 9 | from .base_model import BaseLMFitModel 10 | 11 | logger = logging.getLogger(__name__) 12 | 13 | 14 | @dataclass 15 | class InitializeModels: 16 | """ 17 | This class will initialize and validate the different fitting models. 18 | The models are of type lmfit.model.CompositeModel and stored in a dict with names 19 | for the models as keys. 20 | """ 21 | 22 | model_definitions: dict = field(default_factory=dict) 23 | peaks: dict = field(default_factory=dict) 24 | lmfit_models: Dict[str, Dict[str, BaseLMFitModel]] | None = None 25 | 26 | def __post_init__(self): 27 | self.model_definitions = self.model_definitions or {} 28 | self.peaks = self.peaks or {} 29 | self.lmfit_models = self.lmfit_models or {} 30 | if not self.model_definitions: 31 | self.model_definitions = load_config_from_toml_files() 32 | if not self.lmfit_models and self.model_definitions: 33 | self.lmfit_models = get_models_and_peaks_from_definitions( 34 | self.model_definitions 35 | ) 36 | 37 | def __repr__(self): 38 | _t = ", ".join(map(str, self.lmfit_models.keys())) 39 | _t += "\n" 40 | _t += "\n".join(map(str, self.lmfit_models.values())) 41 | return _t 42 | 43 | 44 | def main(): 45 | from raman_fitting.config.default_models import ( 46 | load_config_from_toml_files, 47 | ) 48 | 49 | model_definitions = load_config_from_toml_files() 50 | print("model_definitions: ", model_definitions) 51 | models = InitializeModels() 52 | print(models) 53 | # breakpoint() 54 | 55 | 56 | if __name__ == "__main__": 57 | main() 58 | -------------------------------------------------------------------------------- /src/raman_fitting/models/deconvolution/lmfit_parameter.py: -------------------------------------------------------------------------------- 1 | import math 2 | from enum import StrEnum 3 | from typing import List, Optional, Dict 4 | from warnings import warn 5 | 6 | from lmfit import Parameter 7 | from lmfit.models import GaussianModel, LorentzianModel, Model, VoigtModel 8 | 9 | from pydantic import ( 10 | BaseModel, 11 | ConfigDict, 12 | Field, 13 | model_validator, 14 | ) 15 | 16 | 17 | param_hint_dict = Dict[str, Dict[str, Optional[float | bool | str]]] 18 | 19 | 20 | class BasePeakWarning(UserWarning): # pragma: no cover 21 | pass 22 | 23 | 24 | PEAK_TYPE_OPTIONS = StrEnum("PEAK_TYPE_OPTIONS", ["Lorentzian", "Gaussian", "Voigt"]) 25 | 26 | LMFIT_PARAM_KWARGS = ("value", "vary", "min", "max", "expr") 27 | 28 | 29 | LMFIT_MODEL_MAPPER = { 30 | "Lorentzian": LorentzianModel, 31 | "Gaussian": GaussianModel, 32 | "Voigt": VoigtModel, 33 | } 34 | 35 | 36 | class LMFitParameterHints(BaseModel): 37 | """ 38 | https://github.com/lmfit/lmfit-py/blob/master/lmfit/model.py#L566 39 | 40 | The given hint can include optional bounds and constraints 41 | ``(value, vary, min, max, expr)``, which will be used by 42 | `Model.make_params()` when building default parameters. 43 | 44 | While this can be used to set initial values, `Model.make_params` or 45 | the function `create_params` should be preferred for creating 46 | parameters with initial values. 47 | 48 | The intended use here is to control how a Model should create 49 | parameters, such as setting bounds that are required by the mathematics 50 | of the model (for example, that a peak width cannot be negative), or to 51 | define common constrained parameters. 52 | 53 | Parameters 54 | ---------- 55 | name : str 56 | Parameter name, can include the models `prefix` or not. 57 | **kwargs : optional 58 | Arbitrary keyword arguments, needs to be a Parameter attribute. 59 | Can be any of the following: 60 | 61 | - value : float, optional 62 | Numerical Parameter value. 63 | - vary : bool, optional 64 | Whether the Parameter is varied during a fit (default is 65 | True). 66 | - min : float, optional 67 | Lower bound for value (default is ``-numpy.inf``, no lower 68 | bound). 69 | - max : float, optional 70 | Upper bound for value (default is ``numpy.inf``, no upper 71 | bound). 72 | - expr : str, optional 73 | Mathematical expression used to constrain the value during 74 | the fit. 75 | 76 | Example 77 | -------- 78 | >>> model = GaussianModel() 79 | >>> model.set_param_hint('sigma', min=0) 80 | 81 | """ 82 | 83 | model_config = ConfigDict(arbitrary_types_allowed=True, from_attributes=True) 84 | 85 | name: str 86 | value: Optional[float] 87 | vary: Optional[bool] = True 88 | min: Optional[float] = Field(-math.inf, allow_inf_nan=True) 89 | max: Optional[float] = Field(math.inf, allow_inf_nan=True) 90 | expr: Optional[str] = None 91 | parameter: Optional[Parameter] = Field(None, exclude=True) 92 | 93 | @model_validator(mode="after") 94 | def check_min_max(self) -> "LMFitParameterHints": 95 | min_, max_ = self.min, self.max 96 | if min_ is not None and max_ is not None and min_ > max_: 97 | raise ValueError("Min must be less than max") 98 | return self 99 | 100 | @model_validator(mode="after") 101 | def check_value_min_max(self) -> "LMFitParameterHints": 102 | value, min_, max_ = self.value, self.min, self.max 103 | if value is None: 104 | raise ValueError("Value must not be None") 105 | if min_ is not None: 106 | assert value >= min_ 107 | if max_ is not None: 108 | assert value <= max_ 109 | if max_ and min_: 110 | assert min_ <= value <= max_ 111 | assert min_ < max_ 112 | return self 113 | 114 | @model_validator(mode="after") 115 | def check_construct_parameter(self) -> "LMFitParameterHints": 116 | if self.parameter is None: 117 | self.parameter = Parameter( 118 | name=self.name, 119 | value=self.value, 120 | vary=self.vary, 121 | min=self.min, 122 | max=self.max, 123 | expr=self.expr, 124 | ) 125 | return self 126 | 127 | 128 | def construct_lmfit_model_from_components( 129 | models: List[Model], sort_on_center=True 130 | ) -> "Model": 131 | """ 132 | Construct the lmfit model from a collection of (known) peaks 133 | """ 134 | if not models: 135 | raise ValueError("No peaks given to construct lmfit model from.") 136 | if sort_on_center: 137 | models = sort_lmfit_models(models) 138 | lmfit_composite_model = sum(models, models.pop()) 139 | return lmfit_composite_model 140 | 141 | 142 | def sort_lmfit_models( 143 | models: List[Model], key: str = "center", reverse: bool = False 144 | ) -> List[Model]: 145 | try: 146 | sorted_models = sorted( 147 | models, key=lambda x: x.param_hints[key]["value"], reverse=reverse 148 | ) 149 | except KeyError: 150 | warn(f"Sorting on model on key {key} failed") 151 | return sorted_models 152 | 153 | 154 | def parmeter_to_dict(parameter: Parameter) -> dict: 155 | ret = {k: getattr(parameter, k) for k in LMFIT_PARAM_KWARGS} 156 | ret = {k: v for k, v in ret.items() if v is not None} 157 | return ret 158 | 159 | 160 | DEFAULT_GAMMA_PARAM_HINT = LMFitParameterHints( 161 | name="gamma", value=1, min=1e-05, max=70, vary=False 162 | ) 163 | 164 | 165 | def main(): 166 | pass 167 | # breakpoint() 168 | 169 | 170 | if __name__ == "__main__": 171 | main() 172 | -------------------------------------------------------------------------------- /src/raman_fitting/models/deconvolution/spectrum_regions.py: -------------------------------------------------------------------------------- 1 | from enum import StrEnum 2 | from typing import Dict 3 | 4 | from pydantic import BaseModel 5 | from raman_fitting.config.default_models import load_config_from_toml_files 6 | 7 | 8 | def get_default_regions_from_toml_files() -> Dict[str, Dict[str, float]]: 9 | default_regions = ( 10 | load_config_from_toml_files().get("spectrum", {}).get("regions", {}) 11 | ) 12 | return default_regions 13 | 14 | 15 | RegionNames = StrEnum( 16 | "RegionNames", " ".join(get_default_regions_from_toml_files()), module=__name__ 17 | ) 18 | 19 | 20 | class SpectrumRegionLimits(BaseModel): 21 | name: RegionNames 22 | min: int 23 | max: int 24 | extra_margin: int = 20 25 | -------------------------------------------------------------------------------- /src/raman_fitting/models/fit_models.py: -------------------------------------------------------------------------------- 1 | from typing import Dict 2 | import time 3 | 4 | from pydantic import BaseModel, model_validator, Field, ConfigDict 5 | from lmfit import Model as LMFitModel 6 | from lmfit.model import ModelResult 7 | 8 | from raman_fitting.models.deconvolution.base_model import BaseLMFitModel 9 | from raman_fitting.models.deconvolution.spectrum_regions import RegionNames 10 | from raman_fitting.models.post_deconvolution.calculate_params import ( 11 | calculate_ratio_of_unique_vars_in_results, 12 | ) 13 | 14 | from raman_fitting.models.spectrum import SpectrumData 15 | 16 | 17 | class SpectrumFitModel(BaseModel): 18 | model_config = ConfigDict(arbitrary_types_allowed=True) 19 | 20 | spectrum: SpectrumData 21 | model: BaseLMFitModel 22 | region: RegionNames 23 | fit_kwargs: Dict = Field(default_factory=dict, repr=False) 24 | fit_result: ModelResult = Field(None, init_var=False) 25 | param_results: Dict = Field(default_factory=dict) 26 | elapsed_time: float = Field(0, init_var=False, repr=False) 27 | 28 | @model_validator(mode="after") 29 | def match_region_names(self) -> "SpectrumFitModel": 30 | model_region = self.model.region_name 31 | spec_region = self.spectrum.region_name 32 | if model_region != spec_region: 33 | raise ValueError( 34 | f"Region names do not match {model_region} and {spec_region}" 35 | ) 36 | return self 37 | 38 | def run_fit(self) -> None: 39 | if "method" not in self.fit_kwargs: 40 | self.fit_kwargs["method"] = "leastsq" 41 | lmfit_model = self.model.lmfit_model 42 | start_time = time.time() 43 | fit_result = call_fit_on_model(lmfit_model, self.spectrum, **self.fit_kwargs) 44 | end_time = time.time() 45 | elapsed_seconds = abs(start_time - end_time) 46 | self.elapsed_time = elapsed_seconds 47 | self.fit_result = fit_result 48 | self.post_process() 49 | 50 | def post_process(self): 51 | if not self.fit_result: 52 | return 53 | param_results = self.fit_result.params.valuesdict() 54 | params_ratio_vars = calculate_ratio_of_unique_vars_in_results( 55 | param_results, raise_exception=False 56 | ) 57 | self.param_results["ratios"] = params_ratio_vars 58 | 59 | 60 | def call_fit_on_model( 61 | model: LMFitModel, spectrum: SpectrumData, method="leastsq", **kwargs 62 | ) -> ModelResult: 63 | # ideas: improve fitting loop so that starting parameters from modelX and modelX+Si are shared, faster... 64 | init_params = model.make_params() 65 | x, y = spectrum.ramanshift, spectrum.intensity 66 | out = model.fit(y, init_params, x=x, method=method, **kwargs) # 'leastsq' 67 | return out 68 | -------------------------------------------------------------------------------- /src/raman_fitting/models/post_deconvolution/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MyPyDavid/raman-fitting/554760daa97161f7de38b69a2afd4897d2020976/src/raman_fitting/models/post_deconvolution/__init__.py -------------------------------------------------------------------------------- /src/raman_fitting/models/post_deconvolution/calculate_params.py: -------------------------------------------------------------------------------- 1 | from inspect import getmembers, isfunction 2 | from typing import Dict, Any 3 | 4 | from raman_fitting.models.post_deconvolution import parameter_ratio_funcs 5 | 6 | RATIO_FUNC_PREFIX = "ratio_" 7 | functions = [ 8 | fn 9 | for _, fn in getmembers(parameter_ratio_funcs, isfunction) 10 | if fn.__module__ == parameter_ratio_funcs.__name__ 11 | ] 12 | ratio_funcs = list( 13 | filter(lambda x: x.__name__.startswith(RATIO_FUNC_PREFIX), functions) 14 | ) 15 | 16 | 17 | def calculate_params_from_results( 18 | combined_results: Dict, 19 | var_name: str, 20 | prefix: str | None = None, 21 | raise_exception=True, 22 | ) -> dict[str, dict[str, Any]]: 23 | results = {} 24 | for ratio_func in ratio_funcs: 25 | try: 26 | label, ratio = ratio_func(combined_results, var_name, prefix=prefix) 27 | func = ratio_func.__name__ 28 | results[func] = {"label": label, "ratio": ratio} 29 | except (ValueError, KeyError) as e: 30 | if raise_exception: 31 | raise e from e 32 | continue 33 | return results 34 | 35 | 36 | def calculate_ratio_of_unique_vars_in_results( 37 | results: Dict, raise_exception: bool = True 38 | ) -> dict[Any, dict[str, dict[str, Any]]]: 39 | uniq_vars = set(i.split("_")[-1] for i in results.keys()) 40 | var_ratios = {} 41 | for var_name in uniq_vars: 42 | ratios = calculate_params_from_results( 43 | results, var_name, raise_exception=raise_exception 44 | ) 45 | var_ratios[var_name] = ratios 46 | return var_ratios 47 | 48 | 49 | def main(): 50 | print(functions) 51 | print(list(map(str, ratio_funcs))) 52 | 53 | 54 | if __name__ == "__main__": 55 | main() 56 | -------------------------------------------------------------------------------- /src/raman_fitting/models/post_deconvolution/parameter_ratio_funcs.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple, List, Dict 2 | from functools import wraps 3 | 4 | from raman_fitting.utils.decorators import decorator_with_kwargs 5 | from raman_fitting.utils.string_operations import join_prefix_suffix 6 | 7 | 8 | def validate_result(result, var_name: str, requires: List[str] | None = None): 9 | req_vars = {join_prefix_suffix(i, var_name) for i in requires} 10 | provided_vars = {join_prefix_suffix(i, var_name) for i in result.keys()} 11 | if provided_vars < req_vars: 12 | raise ValueError( 13 | f"Missing required vars {req_vars} in result: {', '.join(result.keys())}" 14 | ) 15 | 16 | 17 | @decorator_with_kwargs 18 | def calculate_ratio(function, requires: List[str] | None = None): 19 | @wraps(function) 20 | def wrapper(result, var_name: str, prefix: str | None = None, **kwargs): 21 | validate_result(result, var_name, requires=requires) 22 | prefix = prefix or "" 23 | return function(result, var_name, prefix=prefix) 24 | 25 | return wrapper 26 | 27 | 28 | def get_var(peak: str, result: Dict, var_name: str): 29 | return result[join_prefix_suffix(peak.upper(), var_name)] 30 | 31 | 32 | @calculate_ratio(requires=["D", "G"]) 33 | def ratio_d_to_g(result, var_name: str, prefix: str | None = None) -> Tuple[str, float]: 34 | d_ = get_var("D", result, var_name) 35 | g_ = get_var("G", result, var_name) 36 | ratio = d_ / g_ 37 | label = f"{prefix}D/{prefix}G" 38 | return label, ratio 39 | 40 | 41 | @calculate_ratio(requires=["D", "G"]) 42 | def ratio_la_d_to_g( 43 | result, var_name: str, prefix: str | None = None 44 | ) -> Tuple[str, float]: 45 | ratio = 4.4 * (ratio_d_to_g(result, var_name, prefix=prefix)[-1]) ** -1 46 | label = f"La_{prefix}G" 47 | return label, ratio 48 | 49 | 50 | @calculate_ratio(requires=["D", "G", "D2"]) 51 | def ratio_d_to_gplusd2( 52 | result, var_name: str, prefix: str | None = None 53 | ) -> Tuple[str, float] | None: 54 | d = get_var("D", result, var_name) 55 | g = get_var("G", result, var_name) 56 | d2 = get_var("D2", result, var_name) 57 | ratio = d / (g + d2) 58 | label = f"{prefix}D/({prefix}G+{prefix}D2)" 59 | return label, ratio 60 | 61 | 62 | @calculate_ratio(requires=["D", "G", "D2"]) 63 | def ratio_la_d_to_gplusd2( 64 | result, var_name: str, prefix: str | None = None 65 | ) -> Tuple[str, float]: 66 | ratio = 4.4 * (ratio_d_to_gplusd2(result, var_name, prefix=prefix)[-1]) ** -1 67 | label = (f"La_{prefix}G+D2",) 68 | return label, ratio 69 | 70 | 71 | @calculate_ratio(requires=["D2", "G", "D3"]) 72 | def ratio_d3_to_gplusd2( 73 | result, var_name: str, prefix: str | None = None 74 | ) -> Tuple[str, float] | None: 75 | d2 = get_var("D2", result, var_name) 76 | d3 = get_var("D3", result, var_name) 77 | g = get_var("G", result, var_name) 78 | ratio = d3 / (g + d2) 79 | label = f"{prefix}D3/({prefix}G+{prefix}D2" 80 | return label, ratio 81 | 82 | 83 | @calculate_ratio(requires=["D3", "G"]) 84 | def ratio_d3_to_g( 85 | result, var_name: str, prefix: str | None = None 86 | ) -> Tuple[str, float] | None: 87 | d3 = get_var("D3", result, var_name) 88 | g = get_var("G", result, var_name) 89 | ratio = d3 / g 90 | label = f"{prefix}D3/{prefix}G" 91 | return label, ratio 92 | 93 | 94 | @calculate_ratio(requires=["D4", "G"]) 95 | def ratio_d4_to_g( 96 | result, var_name: str, prefix: str | None = None 97 | ) -> Tuple[str, float] | None: 98 | d4 = get_var("D4", result, var_name) 99 | g = get_var("G", result, var_name) 100 | ratio = d4 / g 101 | label = f"{prefix}D4/{prefix}G" 102 | return label, ratio 103 | 104 | 105 | @calculate_ratio(requires=["D1D1", "D"]) 106 | def ratio_d1d1_to_d(result, var_name: str, prefix: str | None = None): 107 | d1d1 = get_var("D1D1", result, var_name) 108 | d = get_var("D", result, var_name) 109 | ratio = 8.8 * d1d1 / d 110 | label = f"Leq_{prefix}" 111 | return label, ratio 112 | 113 | 114 | @calculate_ratio(requires=["D1D1", "GD1"]) 115 | def ratio_d1d1_to_gd1( 116 | result, var_name: str, prefix: str | None = None 117 | ) -> Tuple[str, float]: 118 | d1d1 = get_var("D1D1", result, var_name) 119 | gd1 = get_var("GD1", result, var_name) 120 | ratio = d1d1 / gd1 121 | label = f"{prefix}D1D1/{prefix}GD1" 122 | 123 | return label, ratio 124 | 125 | 126 | if __name__ == "__main__": 127 | result = {"D_peak": 1, "G_peak": 2, "D1D1_peak": 3} 128 | var_name = "peak" 129 | print(ratio_d_to_g(result, var_name)) 130 | -------------------------------------------------------------------------------- /src/raman_fitting/models/spectrum.py: -------------------------------------------------------------------------------- 1 | from typing import Sequence 2 | import numpy as np 3 | 4 | from pydantic import ( 5 | BaseModel, 6 | FilePath, 7 | AwareDatetime, 8 | model_validator, 9 | Field, 10 | ) 11 | import pydantic_numpy.typing as pnd 12 | 13 | 14 | class SpectrumData(BaseModel): 15 | ramanshift: pnd.Np1DArrayFp32 = Field(repr=False) 16 | intensity: pnd.Np1DArrayFp32 = Field(repr=False) 17 | label: str 18 | region_name: str | None = None 19 | source: FilePath | Sequence[FilePath] | str | Sequence[str] | None = None 20 | 21 | @model_validator(mode="after") 22 | def validate_equal_length(self): 23 | if len(self.ramanshift) != len(self.intensity): 24 | raise ValueError("Spectrum arrays are not of equal length.") 25 | return self 26 | 27 | @model_validator(mode="after") 28 | def check_if_contains_nan(self): 29 | if np.isnan(self.ramanshift).any(): 30 | raise ValueError("Ramanshift contains NaN") 31 | 32 | if np.isnan(self.intensity).any(): 33 | raise ValueError("Intensity contains NaN") 34 | return self 35 | 36 | # length is derived property 37 | def __len__(self): 38 | return len(self.ramanshift) 39 | 40 | 41 | class SpectrumMetaData(BaseModel): 42 | sample_id: str 43 | sample_group: str 44 | sample_position: str 45 | creation_date: AwareDatetime 46 | source_file: FilePath # FileStem is derived 47 | -------------------------------------------------------------------------------- /src/raman_fitting/models/splitter.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Any 2 | import numpy as np 3 | 4 | from pydantic import BaseModel, model_validator, Field 5 | from .spectrum import SpectrumData 6 | from .deconvolution.spectrum_regions import ( 7 | SpectrumRegionLimits, 8 | RegionNames, 9 | get_default_regions_from_toml_files, 10 | ) 11 | 12 | 13 | class SplitSpectrum(BaseModel): 14 | spectrum: SpectrumData 15 | region_limits: Dict[str, SpectrumRegionLimits] = Field(None, init_var=None) 16 | spec_regions: Dict[str, SpectrumData] = Field(None, init_var=None) 17 | info: Dict[str, Any] = Field(default_factory=dict) 18 | 19 | @model_validator(mode="after") 20 | def process_spectrum(self) -> "SplitSpectrum": 21 | if self.region_limits is None: 22 | region_limits = get_default_spectrum_region_limits() 23 | self.region_limits = region_limits 24 | 25 | if self.spec_regions is not None: 26 | return self 27 | spec_regions = split_spectrum_data_in_regions( 28 | self.spectrum.ramanshift, 29 | self.spectrum.intensity, 30 | spec_region_limits=self.region_limits, 31 | label=self.spectrum.label, 32 | source=self.spectrum.source, 33 | ) 34 | self.spec_regions = spec_regions 35 | return self 36 | 37 | def get_region(self, region_name: RegionNames): 38 | region_name = RegionNames(region_name) 39 | spec_region_keys = [ 40 | i for i in self.spec_regions.keys() if region_name.name in i 41 | ] 42 | if len(spec_region_keys) != 1: 43 | raise ValueError(f"Key {region_name} not in {spec_region_keys}") 44 | spec_region_key = spec_region_keys[0] 45 | return self.spec_regions[spec_region_key] 46 | 47 | 48 | def get_default_spectrum_region_limits( 49 | regions_mapping: Dict = None, 50 | ) -> Dict[str, SpectrumRegionLimits]: 51 | if regions_mapping is None: 52 | regions_mapping = get_default_regions_from_toml_files() 53 | regions = {} 54 | for region_name, region_config in regions_mapping.items(): 55 | regions[region_name] = SpectrumRegionLimits(name=region_name, **region_config) 56 | return regions 57 | 58 | 59 | def split_spectrum_data_in_regions( 60 | ramanshift: np.array, 61 | intensity: np.array, 62 | spec_region_limits=None, 63 | label=None, 64 | source=None, 65 | ) -> Dict[str, SpectrumData]: 66 | """ 67 | For splitting of spectra into the several SpectrumRegionLimits, 68 | the names of the regions are taken from SpectrumRegionLimits 69 | and set as attributes to the instance. 70 | """ 71 | 72 | if spec_region_limits is None: 73 | spec_region_limits = get_default_spectrum_region_limits() 74 | spec_regions = {} 75 | for region_name, region in spec_region_limits.items(): 76 | # find indices of region in ramanshift array 77 | ind = (ramanshift >= np.min(region.min)) & (ramanshift <= np.max(region.max)) 78 | region_lbl = f"region_{region_name}" 79 | if label is not None and label not in region_lbl: 80 | region_lbl = f"{label}_{region_lbl}" 81 | _data = { 82 | "ramanshift": ramanshift[ind], 83 | "intensity": intensity[ind], 84 | "label": region_lbl, 85 | "region_name": region_name, 86 | "source": source, 87 | } 88 | spec_regions[region_lbl] = SpectrumData(**_data) 89 | return spec_regions 90 | -------------------------------------------------------------------------------- /src/raman_fitting/processing/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MyPyDavid/raman-fitting/554760daa97161f7de38b69a2afd4897d2020976/src/raman_fitting/processing/__init__.py -------------------------------------------------------------------------------- /src/raman_fitting/processing/baseline_subtraction.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import numpy as np 4 | from scipy.stats import linregress 5 | 6 | from ..models.splitter import SplitSpectrum 7 | from ..models.spectrum import SpectrumData 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | 12 | def subtract_baseline_per_region(spec: SpectrumData, split_spectrum: SplitSpectrum): 13 | ramanshift = spec.ramanshift 14 | intensity = spec.intensity 15 | region_name = spec.region_name 16 | label = spec.label 17 | regions_data = split_spectrum.spec_regions 18 | region_limits = split_spectrum.region_limits 19 | selected_intensity = intensity 20 | region_config = region_limits[region_name] 21 | region_name_first_order = list( 22 | filter(lambda x: "first_order" in x, regions_data.keys()) 23 | ) 24 | if ( 25 | any((i in region_name or i in label) for i in ("full", "norm")) 26 | and region_name_first_order 27 | ): 28 | selected_intensity = regions_data[region_name_first_order[0]].intensity 29 | region_config = region_limits["first_order"] 30 | 31 | bl_linear = linregress( 32 | ramanshift[[0, -1]], 33 | [ 34 | np.mean(selected_intensity[0 : region_config.extra_margin]), 35 | np.mean(selected_intensity[-region_config.extra_margin : :]), 36 | ], 37 | ) 38 | i_blcor = intensity - (bl_linear[0] * ramanshift + bl_linear[1]) 39 | return i_blcor, bl_linear 40 | 41 | 42 | def subtract_baseline_from_split_spectrum( 43 | split_spectrum: SplitSpectrum = None, label=None 44 | ) -> SplitSpectrum: 45 | _bl_spec_regions = {} 46 | _info = {} 47 | label = "blcorr" if label is None else label 48 | for region_name, spec in split_spectrum.spec_regions.items(): 49 | blcorr_int, blcorr_lin = subtract_baseline_per_region(spec, split_spectrum) 50 | new_label = f"{label}_{spec.label}" if label not in spec.label else spec.label 51 | spec = SpectrumData( 52 | **{ 53 | "ramanshift": spec.ramanshift, 54 | "intensity": blcorr_int, 55 | "label": new_label, 56 | "region_name": region_name, 57 | "source": spec.source, 58 | } 59 | ) 60 | _bl_spec_regions.update(**{region_name: spec}) 61 | _info.update(**{region_name: blcorr_lin}) 62 | bl_corrected_spectra = split_spectrum.model_copy( 63 | update={"spec_regions": _bl_spec_regions, "info": _info} 64 | ) 65 | return bl_corrected_spectra 66 | -------------------------------------------------------------------------------- /src/raman_fitting/processing/despike.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on Mon May 3 11:10:59 2021 3 | 4 | @author: dw 5 | """ 6 | 7 | from typing import Dict, Tuple, Any, Optional 8 | import copy 9 | import logging 10 | 11 | import numpy as np 12 | 13 | from pydantic import BaseModel, Field, model_validator 14 | 15 | from raman_fitting.models.spectrum import SpectrumData 16 | 17 | logger = logging.getLogger(__name__) 18 | 19 | 20 | class SpectrumDespiker(BaseModel): 21 | spectrum: Optional[SpectrumData] = None 22 | threshold_z_value: int = 4 23 | moving_region_size: int = 1 24 | ignore_lims: Tuple[int, int] = (20, 46) 25 | info: Dict = Field(default_factory=dict) 26 | processed_spectrum: SpectrumData = Field(None) 27 | 28 | @model_validator(mode="after") 29 | def process_spectrum(self) -> "SpectrumDespiker": 30 | if self.spectrum is None: 31 | raise ValueError("SpectrumDespiker, spectrum is None") 32 | despiked_intensity, result_info = self.call_despike_spectrum( 33 | self.spectrum.intensity 34 | ) 35 | despiked_spec = self.spectrum.model_copy( 36 | update={"intensity": despiked_intensity}, deep=True 37 | ) 38 | SpectrumData.model_validate(despiked_spec, from_attributes=True) 39 | self.processed_spectrum = despiked_spec 40 | self.info.update(**result_info) 41 | return self 42 | 43 | def process_intensity(self, intensity: np.ndarray) -> np.ndarray: 44 | despiked_intensity, _ = self.call_despike_spectrum(intensity) 45 | return despiked_intensity 46 | 47 | def call_despike_spectrum(self, intensity: np.ndarray) -> Tuple[np.ndarray, Dict]: 48 | despiked_intensity, result_info = despike_spectrum( 49 | intensity, 50 | self.threshold_z_value, 51 | self.moving_region_size, 52 | ignore_lims=self.ignore_lims, 53 | ) 54 | return despiked_intensity, result_info 55 | 56 | 57 | def despike_spectrum( 58 | intensity: np.ndarray, 59 | threshold_z_value: int, 60 | moving_region_size: int, 61 | ignore_lims=(20, 46), 62 | ) -> Tuple[np.ndarray, Dict[str, Any]]: 63 | """ 64 | A Despiking algorithm from reference literature: 65 | https://doi.org/10.1016/j.chemolab.2018.06.009 66 | 67 | Parameters 68 | ---------- 69 | input_intensity : np.ndarray 70 | The intensity array of which the desipked intensity will be calculated. 71 | info : dict, optional 72 | Extra information for despiking settings are added to this dict. 73 | Attributes 74 | --------- 75 | despiked_intensity : np.ndarray 76 | The resulting array of the despiked intensity of same length as input_intensity. 77 | Notes 78 | -------- 79 | Let Y1;...;Yn represent the values of a single Raman spectrum recorded at 80 | equally spaced wavenumbers. 81 | From this series, form the detrended differenced seriesr Yt ...:This simple 82 | data processing step has the effect of annihilating linear and slow movingcurve 83 | linear trends, however, 84 | sharp localised spikes will be preserved.Denote the median and the median absolute 85 | deviation of 86 | D.A. Whitaker, K. Hayes. Chemometrics and Intelligent Laboratory Systems 179 (2018) 82–84 87 | """ 88 | 89 | z_intensity = calc_z_value_intensity(intensity) 90 | filtered_z_intensity = filter_z_intensity_values(z_intensity, threshold_z_value) 91 | i_despiked = despike_filter( 92 | intensity, filtered_z_intensity, moving_region_size, ignore_lims=ignore_lims 93 | ) 94 | result = {"z_intensity": z_intensity, "filtered_z_intensity": filtered_z_intensity} 95 | return i_despiked, result 96 | 97 | 98 | def calc_z_value_intensity(intensity: np.ndarray) -> np.ndarray: 99 | diff_intensity = np.append(np.diff(intensity), 0) # dYt 100 | median_diff_intensity = np.median(diff_intensity) # dYt_Median 101 | median_abs_deviation = np.median(abs(diff_intensity - median_diff_intensity)) 102 | intensity_values_z = ( 103 | 0.6745 * (diff_intensity - median_diff_intensity) 104 | ) / median_abs_deviation 105 | return intensity_values_z 106 | 107 | 108 | def filter_z_intensity_values(z_intensity, z_intensityhreshold): 109 | filtered_z_intensity = copy.deepcopy(z_intensity) 110 | filtered_z_intensity[np.abs(z_intensity) > z_intensityhreshold] = np.nan 111 | filtered_z_intensity[0] = filtered_z_intensity[-1] = 0 112 | return filtered_z_intensity 113 | 114 | 115 | def despike_filter( 116 | intensity: np.ndarray, 117 | filtered_z_intensity: np.ndarray, 118 | moving_region_size: int, 119 | ignore_lims=(20, 46), 120 | ): 121 | n = len(intensity) 122 | i_despiked = copy.deepcopy(intensity) 123 | spikes = np.nonzero(np.isnan(filtered_z_intensity)) 124 | for i in list(spikes[0]): 125 | if i < ignore_lims[0] or i > ignore_lims[1]: 126 | w = np.arange( 127 | max(0, i - moving_region_size), min(n, i + moving_region_size) 128 | ) 129 | w = w[~np.isnan(filtered_z_intensity[w])] 130 | if intensity[w].any(): 131 | i_despiked[i] = np.mean(intensity[w]) 132 | else: 133 | i_despiked[i] = intensity[i] 134 | return i_despiked 135 | -------------------------------------------------------------------------------- /src/raman_fitting/processing/filter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | from dataclasses import dataclass 5 | from typing import Callable, Protocol, Tuple, Dict 6 | import numpy as np 7 | from scipy import signal 8 | 9 | from raman_fitting.models.spectrum import SpectrumData 10 | 11 | 12 | class IntensityProcessor(Protocol): 13 | def process_intensity(self, intensity: np.ndarray) -> np.ndarray: ... 14 | 15 | 16 | @dataclass 17 | class IntensityFilter: 18 | name: str 19 | filter_func: Callable 20 | filter_args: Tuple 21 | filter_kwargs: Dict 22 | 23 | def process_intensity(self, intensity: np.ndarray) -> np.ndarray: 24 | if intensity is None: 25 | raise ValueError("no intensity given to filter") 26 | filtered_intensity = self.filter_func( 27 | intensity, *self.filter_args, **self.filter_kwargs 28 | ) 29 | return filtered_intensity 30 | 31 | 32 | available_filters = { 33 | "savgol_filter": IntensityFilter( 34 | "savgol_filter", 35 | signal.savgol_filter, 36 | filter_args=(13, 3), 37 | filter_kwargs=dict(mode="nearest"), 38 | ) 39 | } 40 | 41 | 42 | def filter_spectrum( 43 | spectrum: SpectrumData = None, filter_name="savgol_filter" 44 | ) -> SpectrumData: 45 | if filter_name not in available_filters: 46 | raise ValueError(f"Chosen filter {filter_name} not available.") 47 | 48 | filter_class = available_filters[filter_name] 49 | filtered_intensity = filter_class.process_intensity(spectrum.intensity) 50 | label = f"{filter_name}_{spectrum.label}" 51 | filtered_spectrum = spectrum.model_copy( 52 | update={"intensity": filtered_intensity, "label": label} 53 | ) 54 | return filtered_spectrum 55 | 56 | 57 | """ 58 | Parameters 59 | ---------- 60 | ramanshift : array or list 61 | collection of the ramanshift values 62 | intensity : array or list 63 | collection of the intensity values 64 | label : TYPE, optional 65 | DESCRIPTION. The default is "". 66 | **kwargs : TYPE 67 | DESCRIPTION. 68 | 69 | Returns 70 | ------- 71 | None. 72 | """ 73 | -------------------------------------------------------------------------------- /src/raman_fitting/processing/normalization.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | import numpy as np 4 | 5 | from ..models.splitter import SplitSpectrum 6 | from ..models.spectrum import SpectrumData 7 | from ..models.fit_models import SpectrumFitModel, LMFitModel 8 | 9 | from loguru import logger 10 | 11 | 12 | def get_simple_normalization_intensity(split_spectrum: SplitSpectrum) -> float: 13 | norm_spec = split_spectrum.get_region("normalization") 14 | normalization_intensity = np.nanmax(norm_spec.intensity) 15 | return normalization_intensity 16 | 17 | 18 | def get_normalization_factor( 19 | split_spectrum: SplitSpectrum, 20 | norm_method="simple", 21 | normalization_model: LMFitModel = None, 22 | ) -> float: 23 | simple_norm = get_simple_normalization_intensity(split_spectrum) 24 | normalization_intensity = simple_norm 25 | 26 | if "fit" in norm_method and normalization_model is not None: 27 | fit_norm = normalizer_fit_model( 28 | split_spectrum, normalization_model=normalization_model 29 | ) 30 | if fit_norm is not None: 31 | normalization_intensity = fit_norm 32 | norm_factor = 1 / normalization_intensity 33 | 34 | return norm_factor 35 | 36 | 37 | def normalize_regions_in_split_spectrum( 38 | split_spectrum: SplitSpectrum, norm_factor: float, label: Optional[str] = None 39 | ) -> SplitSpectrum: 40 | norm_spec_regions = {} 41 | norm_infos = {} 42 | label = split_spectrum.spectrum.label if label is None else label 43 | for region_name, spec in split_spectrum.spec_regions.items(): 44 | norm_label = f"{region_name}_{label}" if region_name not in label else label 45 | norm_label = f"norm_{norm_label}" if "norm" not in norm_label else norm_label 46 | # label looks like "norm_regionname_label" 47 | _data = SpectrumData( 48 | **{ 49 | "ramanshift": spec.ramanshift, 50 | "intensity": spec.intensity * norm_factor, 51 | "label": norm_label, 52 | "region_name": region_name, 53 | "source": spec.source, 54 | } 55 | ) 56 | norm_spec_regions.update(**{region_name: _data}) 57 | norm_infos.update(**{region_name: {"normalization_factor": norm_factor}}) 58 | norm_spectra = split_spectrum.model_copy( 59 | update={"spec_regions": norm_spec_regions, "info": norm_infos} 60 | ) 61 | return norm_spectra 62 | 63 | 64 | def normalize_split_spectrum( 65 | split_spectrum: SplitSpectrum = None, 66 | ) -> SplitSpectrum: 67 | "Normalize the spectrum intensity according to normalization method." 68 | normalization_factor = get_normalization_factor(split_spectrum) 69 | norm_data = normalize_regions_in_split_spectrum( 70 | split_spectrum, normalization_factor 71 | ) 72 | return norm_data 73 | 74 | 75 | def normalizer_fit_model( 76 | specrum: SpectrumData, normalization_model: LMFitModel 77 | ) -> float | None: 78 | spec_fit = SpectrumFitModel(spectrum=specrum, model=normalization_model) 79 | spec_fit.run_fit() 80 | if not spec_fit.fit_result: 81 | return 82 | try: 83 | return spec_fit.fit_result.params["G_height"].value 84 | except KeyError as e: 85 | logger.error(e) 86 | -------------------------------------------------------------------------------- /src/raman_fitting/processing/post_processing.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import Protocol 3 | 4 | from raman_fitting.models.spectrum import SpectrumData 5 | 6 | from .baseline_subtraction import subtract_baseline_from_split_spectrum 7 | from .filter import filter_spectrum 8 | from .despike import SpectrumDespiker 9 | from ..models.splitter import SplitSpectrum 10 | from .normalization import normalize_split_spectrum 11 | 12 | 13 | class PreProcessor(Protocol): 14 | def process_spectrum(self, spectrum: SpectrumData = None): ... 15 | 16 | 17 | class PostProcessor(Protocol): 18 | def process_spectrum(self, split_spectrum: SplitSpectrum = None): ... 19 | 20 | 21 | @dataclass 22 | class SpectrumProcessor: 23 | spectrum: SpectrumData 24 | processed: bool = False 25 | clean_spectrum: SplitSpectrum | None = None 26 | 27 | def __post_init__(self): 28 | processed_spectrum = self.process_spectrum() 29 | self.clean_spectrum = processed_spectrum 30 | self.processed = True 31 | 32 | def process_spectrum(self) -> SplitSpectrum: 33 | pre_processed_spectrum = self.pre_process_intensity(spectrum=self.spectrum) 34 | post_processed_spectra = self.post_process_spectrum( 35 | spectrum=pre_processed_spectrum 36 | ) 37 | return post_processed_spectra 38 | 39 | def pre_process_intensity(self, spectrum: SpectrumData = None) -> SpectrumData: 40 | filtered_spectrum = filter_spectrum(spectrum=spectrum) 41 | despiker = SpectrumDespiker(spectrum=filtered_spectrum) 42 | return despiker.processed_spectrum 43 | 44 | def post_process_spectrum(self, spectrum: SpectrumData = None) -> SplitSpectrum: 45 | split_spectrum = SplitSpectrum(spectrum=spectrum) 46 | baseline_subtracted = subtract_baseline_from_split_spectrum( 47 | split_spectrum=split_spectrum 48 | ) 49 | normalized_spectra = normalize_split_spectrum( 50 | split_spectrum=baseline_subtracted 51 | ) 52 | return normalized_spectra 53 | -------------------------------------------------------------------------------- /src/raman_fitting/types.py: -------------------------------------------------------------------------------- 1 | from typing import TypeAlias, Dict 2 | 3 | from raman_fitting.models.deconvolution.base_model import BaseLMFitModel 4 | from raman_fitting.models.fit_models import SpectrumFitModel 5 | 6 | LMFitModelCollection: TypeAlias = Dict[str, Dict[str, BaseLMFitModel]] 7 | SpectrumFitModelCollection: TypeAlias = Dict[str, Dict[str, SpectrumFitModel]] 8 | -------------------------------------------------------------------------------- /src/raman_fitting/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MyPyDavid/raman-fitting/554760daa97161f7de38b69a2afd4897d2020976/src/raman_fitting/utils/__init__.py -------------------------------------------------------------------------------- /src/raman_fitting/utils/decorators.py: -------------------------------------------------------------------------------- 1 | from typing import Callable 2 | from functools import wraps, partial 3 | from inspect import signature 4 | 5 | 6 | def decorator_with_kwargs(decorator: Callable) -> Callable: 7 | """ 8 | Source: https://gist.github.com/ramonrosa/402af55633e9b6c273882ac074760426 9 | Decorator factory to give decorated decorators the skill to receive 10 | optional keyword arguments. 11 | If a decorator "some_decorator" is decorated with this function: 12 | @decorator_with_kwargs 13 | def some_decorator(decorated_function, kwarg1=1, kwarg2=2): 14 | def wrapper(*decorated_function_args, **decorated_function_kwargs): 15 | '''Modifies the behavior of decorated_function according 16 | to the value of kwarg1 and kwarg2''' 17 | ... 18 | return wrapper 19 | It will be usable in the following ways: 20 | @some_decorator 21 | def func(x): 22 | ... 23 | @some_decorator() 24 | def func(x): 25 | ... 26 | @some_decorator(kwarg1=3) # or other combinations of kwargs 27 | def func(x, y): 28 | ... 29 | :param decorator: decorator to be given optional kwargs-handling skills 30 | :type decorator: Callable 31 | :raises TypeError: if the decorator does not receive a single Callable or 32 | keyword arguments 33 | :raises TypeError: if the signature of the decorated decorator does not 34 | conform to: Callable, **keyword_arguments 35 | :return: modified decorator 36 | :rtype: Callable 37 | """ 38 | 39 | @wraps(decorator) 40 | def decorator_wrapper(*args, **kwargs): 41 | if (len(kwargs) == 0) and (len(args) == 1) and callable(args[0]): 42 | return decorator(args[0]) 43 | if len(args) == 0: 44 | return partial(decorator, **kwargs) 45 | raise TypeError( 46 | f"{decorator.__name__} expects either a single Callable " 47 | "or keyword arguments" 48 | ) 49 | 50 | signature_values = signature(decorator).parameters.values() 51 | signature_args = [ 52 | param.name for param in signature_values if param.default == param.empty 53 | ] 54 | 55 | if len(signature_args) != 1: 56 | raise TypeError( 57 | f"{decorator.__name__} signature should be of the form:\n" 58 | f"{decorator.__name__}(function: typing.Callable, " 59 | "kwarg_1=default_1, kwarg_2=default_2, ...) -> Callable" 60 | ) 61 | 62 | return decorator_wrapper 63 | -------------------------------------------------------------------------------- /src/raman_fitting/utils/file_reader.py: -------------------------------------------------------------------------------- 1 | """ Class for reading in files, can be extended for other than txt formats""" 2 | 3 | from pathlib import Path 4 | 5 | import numpy as np 6 | 7 | 8 | class FileReader: 9 | def __init__(self, file_path=Path()): 10 | self._file_path = file_path 11 | self.read_in() 12 | 13 | def read_in(self): 14 | ramanshift, intensity_raw = np.array([]), np.array([]) 15 | i = 0 16 | while not ramanshift.any(): 17 | try: 18 | ramanshift, intensity_raw = np.loadtxt( 19 | self._file_path, usecols=(0, 1), unpack=True, skiprows=i 20 | ) 21 | print(self._file_path, len(ramanshift), len(intensity_raw)) 22 | self._skiprows = i 23 | self._read_succes = True 24 | except ValueError: 25 | i += 1 26 | 27 | self.ramanshift = ramanshift 28 | self.intensity_raw = intensity_raw 29 | -------------------------------------------------------------------------------- /src/raman_fitting/utils/string_operations.py: -------------------------------------------------------------------------------- 1 | from lmfit.parameter import Parameter 2 | 3 | 4 | def join_prefix_suffix(prefix: str, suffix: str) -> str: 5 | prefix_ = prefix.rstrip("_") 6 | suffix_ = suffix.lstrip("_") 7 | if suffix_ in prefix: 8 | return prefix_ 9 | return f"{prefix_}_{suffix_}" 10 | 11 | 12 | def prepare_text_from_param(param: Parameter) -> str: 13 | text = "" 14 | if not param: 15 | return text 16 | _ptext = "" 17 | _val = param.value 18 | _min = param.min 19 | if _min != _val: 20 | _ptext += f"{_min} < " 21 | _ptext += f"{_val}" 22 | _max = param.max 23 | if _max != _val: 24 | _ptext += f" > {_max}" 25 | text += f", center : {_ptext}" 26 | return text 27 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MyPyDavid/raman-fitting/554760daa97161f7de38b69a2afd4897d2020976/tests/__init__.py -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | """ 2 | Configuration file for pytest and commonly used fixtures 3 | """ 4 | 5 | import pytest 6 | from raman_fitting.config import settings 7 | from raman_fitting.config.path_settings import InternalPathSettings 8 | 9 | # Global fixtures 10 | 11 | 12 | @pytest.fixture(autouse=True) 13 | def tmp_raman_dir(tmp_path): 14 | d = tmp_path / "raman-fitting" 15 | d.mkdir() 16 | yield d 17 | d.rmdir() 18 | 19 | 20 | @pytest.fixture(autouse=True) 21 | def internal_paths(): 22 | return InternalPathSettings() 23 | 24 | 25 | @pytest.fixture(autouse=True) 26 | def example_files(internal_paths): 27 | example_files = list(internal_paths.example_fixtures.rglob("*txt")) 28 | return example_files 29 | 30 | 31 | @pytest.fixture(autouse=True) 32 | def default_definitions(internal_paths): 33 | return settings.default_definitions 34 | 35 | 36 | @pytest.fixture(autouse=True) 37 | def default_models(internal_paths): 38 | return settings.default_models 39 | 40 | 41 | @pytest.fixture(autouse=True) 42 | def default_models_first_order(default_models): 43 | return default_models.get("first_order") 44 | 45 | 46 | @pytest.fixture(autouse=True) 47 | def default_models_second_order(default_models): 48 | return default_models.get("second_order") 49 | -------------------------------------------------------------------------------- /tests/deconvolution_models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MyPyDavid/raman-fitting/554760daa97161f7de38b69a2afd4897d2020976/tests/deconvolution_models/__init__.py -------------------------------------------------------------------------------- /tests/deconvolution_models/test_base_model.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on Sun Jun 6 09:35:02 2021 3 | 4 | @author: DW 5 | """ 6 | 7 | import pytest 8 | from functools import partial 9 | 10 | from pydantic import ValidationError 11 | 12 | from raman_fitting.models.deconvolution.base_model import ( 13 | SUBSTRATE_PEAK, 14 | BaseLMFitModel, 15 | ) 16 | 17 | SUBSTRATE_PREFIX = SUBSTRATE_PEAK.split("peak")[0] 18 | 19 | 20 | def helper_get_list_components(bm): 21 | _listcompsprefix = partial(map, lambda x,: getattr(x, "prefix")) 22 | _bm_prefix = list(_listcompsprefix(bm.lmfit_model.components)) 23 | return _bm_prefix 24 | 25 | 26 | def test_empty_base_model(): 27 | with pytest.raises(ValidationError): 28 | BaseLMFitModel() 29 | with pytest.raises(ValidationError): 30 | BaseLMFitModel(name="Test_empty") 31 | 32 | with pytest.raises(ValidationError): 33 | BaseLMFitModel(peaks="A+B") 34 | 35 | with pytest.raises(ValidationError): 36 | BaseLMFitModel(name="Test_empty", peaks="A+B", region_name="full") 37 | 38 | 39 | def test_base_model_2peaks(): 40 | bm = BaseLMFitModel(name="Test_2peaks", peaks="K2+D+G", region_name="full") 41 | assert set(helper_get_list_components(bm)) == set(["D_", "G_"]) 42 | bm.add_substrate() 43 | assert set(helper_get_list_components(bm)) == set(["D_", "G_", SUBSTRATE_PREFIX]) 44 | bm.remove_substrate() 45 | assert set(helper_get_list_components(bm)) == set(["D_", "G_"]) 46 | 47 | 48 | def test_base_model_wrong_chars_model_name(): 49 | bm = BaseLMFitModel( 50 | name="Test_wrong_chars", 51 | peaks="K2+---////+ +7 +K1111+1D+D2", 52 | region_name="full", 53 | ) 54 | assert set(helper_get_list_components(bm)) == set(["D2_"]) 55 | bm.add_substrate() 56 | assert set(helper_get_list_components(bm)) == set(["D2_", SUBSTRATE_PREFIX]) 57 | -------------------------------------------------------------------------------- /tests/deconvolution_models/test_base_peaks.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import pytest 3 | 4 | from pydantic import ValidationError 5 | from raman_fitting.models.deconvolution.base_peak import ( 6 | BasePeak, 7 | ) 8 | 9 | 10 | logger = logging.getLogger(__name__) 11 | logging.captureWarnings(True) # sends these warning to the logger 12 | 13 | 14 | def _error_message_contains(excinfo, testmsg: str, verbose: bool = False): 15 | _fltr_str = [ 16 | i if i not in ["(", ")"] else " " 17 | for i in str(excinfo.value) 18 | if i.isalnum() or i in (",", ".", " ", "_", "(", ")") 19 | ] 20 | _cl_str = "".join(map(str, _fltr_str)) 21 | _cl_str_split = _cl_str.split(" ") 22 | _test = all(i in _cl_str_split for i in testmsg.split(" ")) 23 | 24 | if not _test: 25 | _test = any(i in _cl_str_split for i in testmsg.split(" ")) 26 | 27 | if not _test or verbose: 28 | print(list(((i, i in _cl_str_split) for i in testmsg.split(" ")))) 29 | print(_cl_str_split) 30 | return _test 31 | 32 | 33 | def test_basepeak_initialization(): 34 | with pytest.raises(ValidationError): 35 | BasePeak() 36 | with pytest.raises(ValidationError): 37 | BasePeak(peak_name="test") 38 | with pytest.raises(ValidationError): 39 | BasePeak(peak_type="Voigt") 40 | test_peak = BasePeak(peak_name="test", peak_type="Voigt") 41 | assert test_peak.peak_name == "test" 42 | 43 | 44 | @pytest.mark.skip(reason="TODO: add field validations") 45 | def test_empty_base_class_with_kwargs_raises(): 46 | eb = BasePeak(peak_type="Voigt", peak_name="test") 47 | 48 | assert eb.peak_type == "Voigt" 49 | 50 | # add in field validation str_length 51 | with pytest.raises(ValueError) as excinfo: 52 | eb.peak_name = 10 * "emptytest" 53 | assert _error_message_contains(excinfo, "value for peak_name is too long 90") 54 | 55 | # add built in field validation for peak_type 56 | with pytest.raises(ValueError) as excinfo: 57 | eb.peak_type = "VoigtLorentzian" 58 | assert _error_message_contains( 59 | excinfo, 60 | ''''Multiple options ['Lorentzian', 'Voigt'] for misspelled value "VoigtLorentzian"''', 61 | ) 62 | 63 | 64 | def test_base_class_good_with_init_extra_tests(): 65 | td1_kwargs = dict( 66 | peak_type="Voigt", 67 | peak_name="D1D1", 68 | param_hints={ 69 | "center": {"value": 2650, "min": 2600, "max": 2750}, 70 | "sigma": {"value": 60, "min": 1, "max": 200}, 71 | "amplitude": {"value": 14, "min": 1e-03, "max": 100}, 72 | }, 73 | ) 74 | 75 | td1 = BasePeak(**td1_kwargs) 76 | assert td1.peak_type == "Voigt" 77 | assert td1.peak_name == "D1D1" 78 | peakmod = "" 79 | assert str(td1.lmfit_model) == peakmod 80 | # _class_str = f"center : 2600 < 2650 > 2750" 81 | # assertIn(_class_str, str(td1)) 82 | # dont test attr setters 83 | # td1.peak_name = "R2D2" 84 | # assert td1.lmfit_model.prefix == "R2D2_" 85 | 86 | 87 | def test_base_class_good_with_init(): 88 | d1_kwargs = dict( 89 | peak_name="D1D1", 90 | peak_type="Gaussian", 91 | param_hints={ 92 | "center": {"value": 2650, "min": 2600, "max": 2750}, 93 | "sigma": {"value": 60, "min": 1, "max": 200}, 94 | "amplitude": {"value": 14, "min": 1e-03, "max": 100}, 95 | }, 96 | ) 97 | 98 | td1 = BasePeak(**d1_kwargs) 99 | assert td1.peak_name == d1_kwargs["peak_name"] 100 | 101 | 102 | def test_base_class_good_with_init_added_method(): 103 | tkwargs = dict( 104 | peak_type="Lorentzian", 105 | peak_name="D1D1", 106 | param_hints={ 107 | "center": {"value": 2650, "min": 2600, "max": 2750}, 108 | "sigma": {"value": 60, "min": 1, "max": 200}, 109 | "amplitude": {"value": 14, "min": 1e-03, "max": 100}, 110 | }, 111 | ) 112 | 113 | td1m = BasePeak(**tkwargs) 114 | assert td1m.peak_type == tkwargs["peak_type"] 115 | 116 | 117 | def test_base_class_good_with_attributes_and_init(): 118 | tkwargs = dict( 119 | param_hints={ 120 | "center": {"value": 2435, "min": 2400, "max": 2550}, 121 | "sigma": {"value": 30, "min": 1, "max": 200}, 122 | "amplitude": {"value": 2, "min": 1e-03, "max": 100}, 123 | }, 124 | peak_type="Voigt", 125 | peak_name="R2D2", 126 | ) 127 | 128 | nca = BasePeak(**tkwargs) 129 | _center_value = nca.lmfit_model.param_hints["center"]["value"] 130 | assert _center_value == 2435 131 | 132 | 133 | def test_base_class_good_with_attributes_no_init(): 134 | tkwargs = dict( 135 | param_hints={ 136 | "center": {"value": 2435, "min": 2400, "max": 2550}, 137 | "sigma": {"value": 30, "min": 1, "max": 200}, 138 | "amplitude": {"value": 2, "min": 1e-03, "max": 100}, 139 | }, 140 | peak_type="Voigt", 141 | peak_name="R2D2", 142 | ) 143 | 144 | ncni = BasePeak(**tkwargs) 145 | assert ncni.param_hints["center"].value == 2435 146 | assert ncni.lmfit_model.param_hints["center"]["value"] == 2435 147 | 148 | 149 | def test_base_class_good_with_attributes_init_collision_values(): 150 | tkwargs = dict( 151 | param_hints={ 152 | "center": {"value": 2435, "min": 2400, "max": 2550}, 153 | "sigma": {"value": 30, "min": 1, "max": 200}, 154 | "amplitude": {"value": 2, "min": 1e-03, "max": 100}, 155 | }, 156 | peak_type="Voigt", 157 | peak_name="R2D2", 158 | ) 159 | nci = BasePeak(**tkwargs) 160 | assert nci.peak_type == "Voigt" 161 | assert nci.lmfit_model.param_hints["center"]["value"] == 2435 162 | -------------------------------------------------------------------------------- /tests/deconvolution_models/test_fit_models.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import pytest 4 | 5 | from raman_fitting.models.fit_models import SpectrumFitModel 6 | from raman_fitting.imports.spectrumdata_parser import SpectrumReader 7 | from raman_fitting.processing.post_processing import SpectrumProcessor 8 | 9 | 10 | @pytest.fixture 11 | def clean_spec(example_files) -> None: 12 | file = [i for i in example_files if "_pos4" in i.stem][0] 13 | specread = SpectrumReader(file) 14 | 15 | spectrum_processor = SpectrumProcessor(specread.spectrum) 16 | clean_spec_1st_order = spectrum_processor.clean_spectrum.spec_regions[ 17 | "savgol_filter_raw_region_first_order" 18 | ] 19 | clean_spec_1st_order.region_name = "first_order" 20 | return clean_spec_1st_order 21 | 22 | 23 | def test_fit_first_order(clean_spec, default_models): 24 | spectrum = clean_spec 25 | test_component = "center" 26 | 27 | for model_name, test_model in default_models["first_order"].items(): 28 | # with subTest(model_name=model_name, test_model=test_model): 29 | spec_fit = SpectrumFitModel( 30 | **{"spectrum": spectrum, "model": test_model, "region": "first_order"} 31 | ) 32 | spec_fit.run_fit() 33 | for component in test_model.lmfit_model.components: 34 | # with subTest(component=component): 35 | peak_component = f"{component.prefix}{test_component}" 36 | fit_value = spec_fit.fit_result.best_values[peak_component] 37 | init_value = spec_fit.fit_result.init_values[peak_component] 38 | assert math.isclose(fit_value, init_value, rel_tol=0.05) 39 | assert spec_fit.fit_result.success 40 | -------------------------------------------------------------------------------- /tests/deconvolution_models/test_peak_validation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MyPyDavid/raman-fitting/554760daa97161f7de38b69a2afd4897d2020976/tests/deconvolution_models/test_peak_validation.py -------------------------------------------------------------------------------- /tests/delegating/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | -------------------------------------------------------------------------------- /tests/delegating/test_main_delegator.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from raman_fitting.config.path_settings import RunModes 4 | from raman_fitting.delegating.main_delegator import MainDelegator 5 | 6 | 7 | @pytest.fixture(scope="module") 8 | def delegator(): 9 | return MainDelegator(run_mode=RunModes.PYTEST) 10 | 11 | 12 | def test_initialize_models(delegator): 13 | assert "first_order" in delegator.lmfit_models 14 | assert "first_order" in delegator.selected_models 15 | with pytest.raises(KeyError): 16 | delegator.select_fitting_model("no_name", "no model") 17 | 18 | 19 | def test_delegator_index(delegator): 20 | assert delegator.index 21 | assert len(delegator.index.raman_files) == 5 22 | selection = delegator.select_samples_from_index() 23 | assert len(delegator.index.raman_files) == len(selection) 24 | 25 | 26 | def test_main_run(delegator): 27 | assert delegator.results 28 | -------------------------------------------------------------------------------- /tests/empty.toml: -------------------------------------------------------------------------------- 1 | # Empty config file 2 | -------------------------------------------------------------------------------- /tests/exporting/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Fri May 28 09:04:40 2021 5 | 6 | @author: zmg 7 | """ 8 | -------------------------------------------------------------------------------- /tests/exporting/test_plotting.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | #!/usr/bin/env python3 3 | # -*- coding: utf-8 -*- 4 | """ 5 | Created on Fri May 14 09:29:16 2021 6 | 7 | @author: zmg 8 | """ 9 | # flake8: noqa 10 | 11 | import pytest 12 | 13 | from raman_fitting.models.deconvolution.init_models import InitializeModels 14 | from raman_fitting.exports.plot_formatting import ( 15 | get_cmap_list, 16 | assign_colors_to_peaks, 17 | DEFAULT_COLOR, 18 | COLOR_BLACK, 19 | ) 20 | 21 | 22 | # class PeakModelAnnotation(unittest.TestCase): 23 | @pytest.fixture() 24 | def initialized_models(): 25 | return InitializeModels() 26 | 27 | 28 | def test_get_cmap_list(): 29 | assert get_cmap_list(0) == None 30 | _cmap = get_cmap_list(50) 31 | assert _cmap == [DEFAULT_COLOR] * 50 32 | _cmap = get_cmap_list(5) 33 | assert len(_cmap) >= 5 34 | _cmap1 = get_cmap_list(5, default_color=COLOR_BLACK) 35 | assert _cmap1 == [COLOR_BLACK] * 5 36 | 37 | 38 | def test_assign_colors_to_peaks(initialized_models): 39 | for order_type, model_collection in initialized_models.lmfit_models.items(): 40 | for model_name, model in model_collection.items(): 41 | annotated_models = assign_colors_to_peaks(model.lmfit_model.components) 42 | prefixes = set([i.prefix for i in model.lmfit_model.components]) 43 | assert prefixes == set(annotated_models.keys()) 44 | -------------------------------------------------------------------------------- /tests/indexing/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | -------------------------------------------------------------------------------- /tests/indexing/test_filename_parser.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from raman_fitting.imports.models import RamanFileInfo 4 | from raman_fitting.imports.samples.sample_id_helpers import ( 5 | overwrite_sample_id_from_mapper, 6 | overwrite_sample_group_id_from_parts, 7 | ) 8 | 9 | 10 | from raman_fitting.imports.samples.sample_id_helpers import ( 11 | parse_string_to_sample_id_and_position, 12 | ) 13 | 14 | example_parse_fixture = { 15 | "errEMP2_1.txt": ("errEMP2", 1), 16 | "errTS2_pos1.txt": ("errTS2", 1), 17 | "Si_spectrum01.txt": ("Si", 1), 18 | "testDW38C_pos1.txt": ("testDW38C", 1), 19 | "testDW38C_pos2.txt": ("testDW38C", 2), 20 | "testDW38C_pos3.txt": ("testDW38C", 3), 21 | "testDW38C_pos4.txt": ("testDW38C", 4), 22 | "DW_AB_CD-EF_GE_pos3": ("DW_AB_CD-EF_GE", 3), 23 | "DW99-pos3": ("DW99", 3), 24 | "Si": ("Si", 0), 25 | } 26 | 27 | 28 | # class TestFilenameParser(unittest.TestCase): 29 | result_attr = "parse_result" 30 | sample_id_name_mapper = {} 31 | sGrp_name_mapper = {} 32 | 33 | 34 | @pytest.fixture() 35 | def path_parsers(example_files): 36 | path_parsers_ = [] 37 | for fn in example_files: 38 | path_parsers_.append(RamanFileInfo(**{"file": fn})) 39 | return path_parsers_ 40 | 41 | 42 | def test_ramanfileinfo(path_parsers): 43 | assert all(isinstance(i, RamanFileInfo) for i in path_parsers) 44 | 45 | 46 | def test_sample_id_name_mapper(): 47 | for k, val in sample_id_name_mapper.items(): 48 | _mapval = overwrite_sample_id_from_mapper(k, sample_id_name_mapper) 49 | assert _mapval == val 50 | 51 | 52 | def test_overwrite_sample_id_from_mapper(): 53 | assert "TEST" == overwrite_sample_group_id_from_parts([], "TEST", sGrp_name_mapper) 54 | for k, val in sGrp_name_mapper.items(): 55 | empty_path_parts = RamanFileInfo(file=f"{k}/TEST.txt") 56 | assert val == overwrite_sample_group_id_from_parts( 57 | empty_path_parts.parts, "TEST", sGrp_name_mapper 58 | ) 59 | 60 | 61 | def test_parse_string_to_sample_id_and_position(): 62 | for file, _expected in example_parse_fixture.items(): 63 | assert parse_string_to_sample_id_and_position(file) == _expected 64 | -------------------------------------------------------------------------------- /tests/indexing/test_indexer.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from raman_fitting.config.path_settings import ( 4 | get_run_mode_paths, 5 | RunModes, 6 | ) 7 | from raman_fitting.imports.files.file_indexer import ( 8 | RamanFileIndex, 9 | initialize_index_from_source_files, 10 | ) 11 | from raman_fitting.imports.models import RamanFileInfo 12 | 13 | run_mode = RunModes.PYTEST 14 | run_paths = get_run_mode_paths(run_mode) 15 | 16 | 17 | @pytest.fixture 18 | def index(example_files, internal_paths, tmp_raman_dir): 19 | pytest_fixtures_files = list(internal_paths.pytest_fixtures.rglob("*txt")) 20 | index_file = internal_paths.temp_index_file 21 | all_test_files = example_files + pytest_fixtures_files 22 | index = initialize_index_from_source_files( 23 | index_file=index_file, files=all_test_files, force_reindex=True 24 | ) 25 | return index 26 | 27 | 28 | def test_index_make_examples(index, example_files): 29 | assert isinstance(index, RamanFileIndex) 30 | assert isinstance(index.raman_files[0], RamanFileInfo) 31 | assert len(index.dataset) > 1 32 | assert len(index.dataset) == len(example_files) 33 | 34 | 35 | # @unittest.skip("export_index not yet implemented") 36 | def test_load_index(index): 37 | index.index_file.exists() 38 | new_index = RamanFileIndex(index_file=index.index_file, force_reindex=False) 39 | assert isinstance(new_index, RamanFileIndex) 40 | -------------------------------------------------------------------------------- /tests/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MyPyDavid/raman-fitting/554760daa97161f7de38b69a2afd4897d2020976/tests/models/__init__.py -------------------------------------------------------------------------------- /tests/models/test_base_peak.py: -------------------------------------------------------------------------------- 1 | from raman_fitting.models.deconvolution.base_peak import BasePeak 2 | 3 | 4 | def test_initialize_base_peaks( 5 | default_definitions, default_models_first_order, default_models_second_order 6 | ): 7 | peaks = {} 8 | 9 | peak_items = { 10 | **default_definitions["first_order"]["peaks"], 11 | **default_definitions["second_order"]["peaks"], 12 | }.items() 13 | for k, v in peak_items: 14 | peaks.update({k: BasePeak(**v)}) 15 | 16 | peak_d = BasePeak(**default_definitions["first_order"]["peaks"]["D"]) 17 | assert ( 18 | peak_d.peak_name 19 | == default_definitions["first_order"]["peaks"]["D"]["peak_name"] 20 | ) 21 | assert ( 22 | peak_d.peak_type 23 | == default_definitions["first_order"]["peaks"]["D"]["peak_type"] 24 | ) 25 | assert ( 26 | peak_d.lmfit_model.components[0].prefix 27 | == default_definitions["first_order"]["peaks"]["D"]["peak_name"] + "_" 28 | ) 29 | assert ( 30 | peak_d.param_hints["center"].value 31 | == default_definitions["first_order"]["peaks"]["D"]["param_hints"]["center"][ 32 | "value" 33 | ] 34 | ) 35 | -------------------------------------------------------------------------------- /tests/models/test_calculate_params.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from raman_fitting.models.post_deconvolution.calculate_params import ratio_funcs 4 | 5 | 6 | result_first = {"D_center": 1, "G_center": 2, "D1D1_center": 3} 7 | first_peaks = "G+D+D2+D3+D4+D5" 8 | result_second = ( 9 | {"D4D4 +D1D1+GD1+D2D2"}, 10 | {"D_center": 1, "G_center": 2, "D1D1_center": 3}, 11 | ) 12 | var_name = "peak" 13 | 14 | 15 | @pytest.fixture 16 | def list_of_ratio_funcs(): 17 | return list(ratio_funcs) 18 | 19 | 20 | @pytest.fixture 21 | def results_first(default_models_first_order): 22 | return { 23 | k: val.get("value") 24 | for k, val in default_models_first_order[ 25 | "5peaks" 26 | ].lmfit_model.param_hints.items() 27 | if "value" in val 28 | } 29 | 30 | 31 | @pytest.fixture 32 | def results_second(default_models_second_order): 33 | return { 34 | k: val.get("value") 35 | for k, val in default_models_second_order[ 36 | "2nd_4peaks" 37 | ].lmfit_model.param_hints.items() 38 | if "value" in val 39 | } 40 | 41 | 42 | def test_calculate_params_keyerror(list_of_ratio_funcs, results_first): 43 | var_name = "no_var" 44 | with pytest.raises(KeyError): 45 | list_of_ratio_funcs[0](results_first, var_name) 46 | 47 | 48 | def test_calculate_params_from_results( 49 | results_first, results_second, list_of_ratio_funcs 50 | ): 51 | combined_results = {**results_first, **results_second} 52 | 53 | prefix = "" 54 | var_name = "center" 55 | 56 | results = {} 57 | for ratio_func in list_of_ratio_funcs: 58 | label, ratio = ratio_func(combined_results, var_name, prefix=prefix) 59 | 60 | func = ratio_func.__name__ 61 | results[func] = {"label": label, "ratio": ratio} 62 | assert results 63 | assert results["ratio_d_to_g"]["ratio"] < 1 64 | assert results["ratio_d_to_g"]["label"] == "D/G" 65 | for k, val in results.items(): 66 | assert val["label"] 67 | assert val["ratio"] > 0 68 | -------------------------------------------------------------------------------- /tests/models/test_fit_models.py: -------------------------------------------------------------------------------- 1 | from raman_fitting.imports.spectrumdata_parser import SpectrumReader 2 | from raman_fitting.models.fit_models import SpectrumFitModel 3 | from raman_fitting.processing.post_processing import SpectrumProcessor 4 | 5 | 6 | def test_fit_model(example_files, default_models_first_order): 7 | file = [i for i in example_files if "_pos4" in i.stem][0] 8 | 9 | specread = SpectrumReader(file) 10 | 11 | spectrum_processor = SpectrumProcessor(specread.spectrum) 12 | clean_spec_1st_order = spectrum_processor.clean_spectrum.spec_regions[ 13 | "savgol_filter_raw_region_first_order" 14 | ] 15 | clean_spec_1st_order.region_name = "first_order" 16 | 17 | model_2peaks = default_models_first_order["2peaks"] 18 | spec_fit = SpectrumFitModel( 19 | spectrum=clean_spec_1st_order, 20 | model=model_2peaks, 21 | region=clean_spec_1st_order.region_name, 22 | ) 23 | spec_fit.run_fit() 24 | assert spec_fit.fit_result.success 25 | assert spec_fit.fit_result.best_values 26 | assert spec_fit.param_results["ratios"]["center"]["ratio_d_to_g"]["ratio"] < 1 27 | assert spec_fit.param_results["ratios"]["center"]["ratio_la_d_to_g"]["ratio"] < 10 28 | d_amp_ = spec_fit.fit_result.best_values["D_amplitude"] 29 | g_amp_ = spec_fit.fit_result.best_values["G_amplitude"] 30 | dg_ratio = d_amp_ / g_amp_ 31 | assert ( 32 | spec_fit.param_results["ratios"]["amplitude"]["ratio_d_to_g"]["ratio"] 33 | == dg_ratio 34 | ) 35 | -------------------------------------------------------------------------------- /tests/processing/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Fri May 28 09:04:45 2021 5 | 6 | @author: zmg 7 | """ 8 | -------------------------------------------------------------------------------- /tests/processing/test_cleaner.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import numpy as np 4 | from raman_fitting.processing.despike import SpectrumDespiker 5 | 6 | 7 | int_arrays = ( 8 | np.array([1, 2, 3, 4, 5]), 9 | np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), 10 | np.array([2, 2, 2, 2, 2, 2, 30, 20, 2, 2, 2, 2, 2, 2]) 11 | ) 12 | 13 | @pytest.mark.parametrize('array', int_arrays) 14 | def test_despiker(array): 15 | despiker = SpectrumDespiker.model_construct() 16 | 17 | desp_int = despiker.process_intensity(array) 18 | assert len(desp_int) == len(array) 19 | -------------------------------------------------------------------------------- /tests/processing/test_spectrum_constructor.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from raman_fitting.imports.spectrumdata_parser import SpectrumReader 4 | from raman_fitting.models.deconvolution.spectrum_regions import RegionNames 5 | 6 | 7 | def test_spectrum_data_loader_empty(): 8 | with pytest.raises(ValueError): 9 | SpectrumReader("empty.txt") 10 | 11 | 12 | def test_spectrum_data_loader_file(example_files): 13 | for file in example_files: 14 | sprdr = SpectrumReader(file) 15 | assert len(sprdr.spectrum.intensity) == 1600 16 | assert len(sprdr.spectrum.ramanshift) == 1600 17 | assert sprdr.spectrum.source == file 18 | assert sprdr.spectrum.region_name == RegionNames.full 19 | -------------------------------------------------------------------------------- /tests/test_fixtures/empty-lines_1.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | end 15 | -------------------------------------------------------------------------------- /todos.md: -------------------------------------------------------------------------------- 1 | # List of IDEAs for the raman fitting code 2 | ```py 3 | # IDEA change version definition 4 | 5 | # IDEA list: 6 | # improved logger, each module needs a getlogger(name) 7 | # IDEA future daemonize the fitting process for using the package and dropping files in the datafiles folder 8 | # IDEA add docs with Sphinx, readthedocs 9 | # IDEA improve AsyncIO into main delegator processes 10 | # IDEA fix plotting because of DeprecationWarning in savefig 11 | # IDEA add database for spectrum data storage 12 | # IDEA future GUI webinterface 13 | 14 | # IDEA improve fitting loop so that starting parameters from modelX and modelX+Si are shared, faster... 15 | ``` 16 | --------------------------------------------------------------------------------