├── .build.info
├── .flake8
├── .gitattributes
├── .github
└── workflows
│ ├── build-test-codecov.yml
│ ├── github-actions-demo.yml
│ ├── test-release-candidate.yaml
│ └── upload-to-testpypi.yml
├── .gitignore
├── .pre-commit-config.yaml
├── .secrets.baseline
├── Dockerfile
├── LICENSE
├── README.md
├── examples
├── fitting_Model_1st_6peaks+Si.png
└── raw_data.png
├── pyproject.toml
├── src
└── raman_fitting
│ ├── __init__.py
│ ├── config
│ ├── __init__.py
│ ├── base_settings.py
│ ├── default_models
│ │ ├── __init__.py
│ │ ├── first_order.toml
│ │ ├── normalization.toml
│ │ ├── second_order.toml
│ │ └── spectrum_regions.toml
│ ├── filepath_helper.py
│ ├── logging_config.py
│ └── path_settings.py
│ ├── delegating
│ ├── __init__.py
│ ├── main_delegator.py
│ ├── models.py
│ ├── pre_processing.py
│ ├── run_fit_multi.py
│ └── run_fit_spectrum.py
│ ├── example_fixtures
│ ├── Si_spectrum01.txt
│ ├── __init__.py
│ ├── testDW38C_pos1.txt
│ ├── testDW38C_pos2.txt
│ ├── testDW38C_pos3.txt
│ └── testDW38C_pos4.txt
│ ├── exports
│ ├── __init__.py
│ ├── exporter.py
│ ├── file_table.py
│ ├── plot_formatting.py
│ ├── plotting_fit_results.py
│ └── plotting_raw_data.py
│ ├── imports
│ ├── __init__.py
│ ├── collector.py
│ ├── files
│ │ ├── file_finder.py
│ │ ├── file_indexer.py
│ │ ├── index_funcs.py
│ │ ├── index_helpers.py
│ │ ├── metadata.py
│ │ ├── utils.py
│ │ └── validators.py
│ ├── models.py
│ ├── samples
│ │ ├── models.py
│ │ └── sample_id_helpers.py
│ ├── spectrum
│ │ ├── __init__.py
│ │ ├── datafile_parsers.py
│ │ ├── spectra_collection.py
│ │ └── validators.py
│ └── spectrumdata_parser.py
│ ├── interfaces
│ ├── __init__.py
│ ├── argparse_cli.py
│ ├── typer_cli.py
│ └── utils.py
│ ├── models
│ ├── __init__.py
│ ├── deconvolution
│ │ ├── __init__.py
│ │ ├── base_model.py
│ │ ├── base_peak.py
│ │ ├── init_models.py
│ │ ├── lmfit_parameter.py
│ │ └── spectrum_regions.py
│ ├── fit_models.py
│ ├── post_deconvolution
│ │ ├── __init__.py
│ │ ├── calculate_params.py
│ │ └── parameter_ratio_funcs.py
│ ├── spectrum.py
│ └── splitter.py
│ ├── processing
│ ├── __init__.py
│ ├── baseline_subtraction.py
│ ├── despike.py
│ ├── filter.py
│ ├── normalization.py
│ └── post_processing.py
│ ├── types.py
│ └── utils
│ ├── __init__.py
│ ├── decorators.py
│ ├── file_reader.py
│ └── string_operations.py
├── tests
├── __init__.py
├── conftest.py
├── deconvolution_models
│ ├── __init__.py
│ ├── test_base_model.py
│ ├── test_base_peaks.py
│ ├── test_fit_models.py
│ └── test_peak_validation.py
├── delegating
│ ├── __init__.py
│ └── test_main_delegator.py
├── empty.toml
├── exporting
│ ├── __init__.py
│ └── test_plotting.py
├── indexing
│ ├── __init__.py
│ ├── test_filename_parser.py
│ └── test_indexer.py
├── models
│ ├── __init__.py
│ ├── test_base_peak.py
│ ├── test_calculate_params.py
│ └── test_fit_models.py
├── processing
│ ├── __init__.py
│ ├── test_cleaner.py
│ └── test_spectrum_constructor.py
└── test_fixtures
│ ├── empty-lines_1.txt
│ └── wrong-values-in-lines_pos1.txt
└── todos.md
/.build.info:
--------------------------------------------------------------------------------
1 | # Build release overview
2 | # Reference:
3 | # https://github.com/pauliacomi/pyGAPS/blob/c19bf45a896ff787acf8a29f77652a90236dd6c5/.build.info
4 | # This file contains details about how to
5 | # perform a release of this package
6 |
7 | # Create new branch for release (we use git flow)
8 | # Respect semantic versioning for the releases
9 | git flow release start x.y.z
10 |
11 | # Ensure all tests are passing
12 | # or use CI provider to run them automatically
13 | pytest --cov --cov-report xml:coverage.xml
14 |
15 | # Run bumpversion to change any version strings
16 | # scattered throughout the source code
17 | bumpversion major/minor/patch
18 | # !!! Check if pre-commit hooks are enabled
19 | # bump2version may fail the commit if pre-commit hooks fail...
20 | # bump2version patch --no-commit
21 |
22 | # Finish branch
23 | git flow release finish x.y.z
24 |
25 | # Push, including tags
26 | git push --tags
27 |
28 | # Check CI if project has been successfully pushed to pypi
29 |
--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | # Rule definitions: http://flake8.pycqa.org/en/latest/user/error-codes.html
3 | # D203: 1 blank line required before class docstring
4 | # W503: line break before binary operator
5 | exclude =
6 | venv*,
7 | .venv,
8 | __pycache__,
9 | node_modules,
10 | bower_components,
11 | migrations,
12 | .tox,
13 | .eggs,
14 | build,
15 | dist
16 |
17 | extend-exclude=
18 | *_Conflict.py
19 | *_old.py
20 | *_dev_*.py
21 |
22 |
23 | ignore = D203,W503
24 | max-complexity = 9
25 | max-line-length = 140
26 | extend-ignore =
27 | E115, E121, E123, E126, E128,
28 | E202, E203, E125, E128,
29 | E222, E226, E231, E241, E251, E262
30 | E303, E306,
31 | F523, E261, E265, E266,
32 | E301, E302, E305,
33 | F401, E402, F402, F403,
34 | E501, F502, F524, F541,
35 | F601, W605,
36 | E712, E713, E722, E741
37 | F811, F841,
38 | C901
39 |
40 | per-file-ignores =
41 |
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | .git_archival.txt export-subst
2 |
--------------------------------------------------------------------------------
/.github/workflows/build-test-codecov.yml:
--------------------------------------------------------------------------------
1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
3 |
4 | name: Lint Build Test Codecov
5 |
6 | on:
7 | push:
8 | branches: [ main ]
9 | pull_request:
10 | branches: [ main, pre-release ]
11 |
12 | jobs:
13 |
14 | build:
15 | runs-on: ${{ matrix.os }}
16 | strategy:
17 | fail-fast: true
18 | matrix:
19 | os: [ ubuntu-latest, macos-latest, windows-latest ]
20 | python-version: [3.11]
21 |
22 |
23 | steps:
24 | - uses: actions/checkout@v3
25 |
26 | - name: Set up Python ${{ matrix.python-version }}
27 | uses: actions/setup-python@v4
28 | with:
29 | python-version: ${{ matrix.python-version }}
30 |
31 | - name: Install dependencies
32 | run: |
33 | python3 -m pip install -U pip
34 | python3 -m pip install -U build
35 | python3 -m pip install flake8
36 |
37 | - name: Lint with flake8
38 | run: |
39 | # stop the build if there are Python syntax errors or undefined names
40 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
41 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
42 | flake8 . --count --exit-zero --max-complexity=10 --max-line-length=140 --statistics
43 |
44 | - name: Build wheels
45 | run: |
46 | python3 -m build
47 | # install the package in editable mode for the coverage report
48 | python3 -m pip install -e .["pytest"]
49 |
50 | - name: Generate coverage report
51 | run: |
52 | pytest --cov --cov-report=xml --cov-report=term-missing
53 | - name: Upload Coverage to Codecov
54 | uses: codecov/codecov-action@v1
55 |
56 | - name: raman_fitting run examples
57 |
58 |
59 | run: |
60 | raman_fitting run examples
61 |
--------------------------------------------------------------------------------
/.github/workflows/github-actions-demo.yml:
--------------------------------------------------------------------------------
1 | name: GitHub Actions Demo
2 | on: [push]
3 | jobs:
4 | Explore-GitHub-Actions:
5 | runs-on: ubuntu-latest
6 | steps:
7 | - run: echo "🎉 The job was automatically triggered by a ${{ github.event_name }} event."
8 | - run: echo "🐧 This job is now running on a ${{ runner.os }} server hosted by GitHub!"
9 | - run: echo "🔎 The name of your branch is ${{ github.ref }} and your repository is ${{ github.repository }}."
10 | - name: Check out repository code
11 | uses: actions/checkout@v3
12 | - run: echo "💡 The ${{ github.repository }} repository has been cloned to the runner by $GITHUB_ACTOR or ${{ github.actor }}."
13 | - run: echo "🖥️ The workflow is now ready to test your code on the runner."
14 | - name: List files in the repository
15 | run: |
16 | ls ${{ github.workspace }}
17 | - run: echo "🍏 This job's status is ${{ job.status }}."
18 |
--------------------------------------------------------------------------------
/.github/workflows/test-release-candidate.yaml:
--------------------------------------------------------------------------------
1 | # Tests for releases and release candidates
2 | #
3 | # Runs on every tag creation, and all pushes and PRs to release branches
4 | # named "v1.2.x", etc.
5 | #
6 | # This workflow is more extensive than the regular test workflow.
7 | # - Tests are executed on more Python versions
8 | # - Tests are run on more operating systems
9 | # - N.B. There is no pip cache here to ensure runs are always against the
10 | # very latest versions of dependencies, even if this workflow ran recently.
11 | #
12 | # In addition, the package is built as a wheel on each OS/Python job, and these
13 | # are stored as artifacts to use for your distribution process. There is an
14 | # extra job (disabled by default) which can be enabled to push to Test PyPI.
15 |
16 | # Reference:
17 | # https://github.com/scottclowe/python-template-repo
18 |
19 | name: release candidate tests
20 |
21 | on:
22 | workflow_dispatch:
23 |
24 | push:
25 | branches:
26 | # Release branches.
27 | # Examples: "v1", "v3.0", "v1.2.x", "1.5.0", "1.2rc0"
28 | # Expected usage is (for example) a branch named "v1.2.x" which contains
29 | # the latest release in the 1.2 series.
30 | - 'v[0-9]+'
31 | - 'v?[0-9]+.[0-9x]+'
32 | - 'v?[0-9]+.[0-9]+.[0-9x]+'
33 | - 'v?[0-9]+.[0-9]+.[0-9]+.[0-9x]+'
34 | - 'v?[0-9]+.[0-9x]+rc[0-9]*'
35 | tags:
36 | # Run whenever any tag is created
37 | - '**'
38 | pull_request:
39 | branches:
40 | # Release branches
41 | - 'v[0-9]+'
42 | - 'v?[0-9]+.[0-9x]+'
43 | - 'v?[0-9]+.[0-9]+.[0-9x]+'
44 | - 'v?[0-9]+.[0-9]+.[0-9]+.[0-9x]+'
45 | - 'v?[0-9]+.[0-9x]+rc[0-9]*'
46 | release:
47 | # Run on a new release
48 | types: [created, edited, published]
49 |
50 | jobs:
51 | test-build:
52 | runs-on: ${{ matrix.os }}
53 | strategy:
54 | matrix:
55 | os: [ubuntu-latest, windows-latest, macos-latest]
56 | python-version: ["3.11"]
57 | env:
58 | OS: ${{ matrix.os }}
59 | PYTHON: ${{ matrix.python-version }}
60 |
61 | steps:
62 | - uses: actions/checkout@v3
63 |
64 | - name: Set up Python ${{ matrix.python-version }}
65 | uses: actions/setup-python@v4
66 | with:
67 | python-version: ${{ matrix.python-version }}
68 |
69 | - name: Install dependencies
70 | run: |
71 | python -m pip install --upgrade pip
72 | python -m pip install flake8
73 | python -m pip install -U build
74 | python -m pip install --editable .["pytest"]
75 |
76 | - name: Sanity check with flake8
77 | run: |
78 | # stop the build if there are Python syntax errors or undefined names
79 | python -m flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
80 | # exit-zero treats all errors as warnings
81 | python -m flake8 . --count --exit-zero --statistics
82 |
83 | - name: Debug environment
84 | run: python -m pip freeze
85 |
86 | - name: Test with pytest
87 | run: |
88 | pytest --cov --cov-report=xml --cov-report=term-missing
89 | # python -m pytest --cov=raman_fitting --cov-report=term-missing --cov-report=xml --cov-config .coveragerc --junitxml=testresults.xml
90 |
91 | - name: Upload coverage to Codecov
92 | if: false
93 | uses: codecov/codecov-action@v1
94 | with:
95 | flags: unittests
96 | env_vars: OS,PYTHON
97 | name: Python ${{ matrix.python-version }} on ${{ runner.os }}
98 |
99 | - name: Build wheels
100 | run: |
101 | python3 -m pip install -U build
102 | python3 -m build
103 |
104 | - name: Store wheel artifacts
105 | uses: actions/upload-artifact@v2
106 | with:
107 | name: wheel-${{ runner.os }}
108 | path: dist/*
109 |
110 |
111 | publish:
112 | # Disabled by default
113 | if: startsWith(github.ref, 'refs/tags/')
114 | needs: test-build
115 |
116 | runs-on: ubuntu-latest
117 | steps:
118 | - uses: actions/checkout@v3
119 |
120 | - name: Download wheel artifacts
121 | uses: actions/download-artifact@v2
122 | with:
123 | name: wheel-${{ runner.os }}
124 | path: dist/
125 |
126 | - name: Store wheel artifacts
127 | uses: actions/upload-artifact@v2
128 | with:
129 | name: wheel-${{ runner.os }}
130 | path: dist/*
131 |
132 | - name: Publish package to Test PyPI
133 | if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags')
134 | uses: pypa/gh-action-pypi-publish@release/v1
135 | with:
136 | user: __token__
137 | password: ${{ secrets.TWINE_TEST_TOKEN }}
138 | repository_url: https://test.pypi.org/legacy/
139 |
140 | - name: Publish package to PyPI 📦
141 | if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags')
142 | uses: pypa/gh-action-pypi-publish@release/v1
143 | with:
144 | user: __token__
145 | password: ${{ secrets.PYPI_API_TOKEN }}
146 |
--------------------------------------------------------------------------------
/.github/workflows/upload-to-testpypi.yml:
--------------------------------------------------------------------------------
1 | # This is a basic workflow to help you get started with Actions
2 |
3 | name: Publish to TestPyPI and PyPI
4 |
5 | # Controls when the action will run.
6 | on:
7 | # Triggers the workflow on push to the master branch
8 | #push:
9 | # branches: [ master ]
10 |
11 | # Allows you to run this workflow manually from the Actions tab
12 | workflow_dispatch:
13 |
14 | # A workflow run is made up of one or more jobs that can run sequentially or in parallel
15 | jobs:
16 | # This workflow contains a single job called "build"
17 | build:
18 | # The type of runner that the job will run on
19 | name: Build Python 🐍 distributions to 📦
20 | #if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags')
21 | runs-on: ubuntu-latest
22 |
23 | # Steps represent a sequence of tasks that will be executed as part of the job
24 | steps:
25 | # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
26 | - uses: actions/checkout@v3
27 | # with:
28 | # fetch-depth: 0
29 | - name: Fetch all history for all tags and branches
30 | run: git fetch --prune --unshallow
31 |
32 | - name: Set up python 3.11
33 | uses: actions/setup-python@v4
34 | with:
35 | python-version: 3.11
36 |
37 | # Installs and upgrades pip, installs other dependencies and installs the package from pyproject.toml
38 | - name: Installs and upgrades pip and installs other dependencies
39 | run: |
40 | # Upgrade pip
41 | python3 -m pip install --upgrade pip
42 | # Install build deps
43 | python3 -m pip install -U build
44 | # If requirements.txt exists, install from it
45 | python3 -m pip install -r requirements.txt
46 |
47 | - name: Builds the package
48 | run: |
49 | # Install package with build
50 | python3 -m build
51 |
52 | - name: Store wheel artifacts
53 | uses: actions/upload-artifact@v2
54 | with:
55 | name: wheel-${{ runner.os }}-${{ runner.python-version }}
56 | path: dist/*
57 |
58 | publish:
59 |
60 | name: Publish 📦 to PyPI and TestPyPI
61 | #if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags')
62 | needs: build
63 | runs-on: ubuntu-latest
64 | # Steps represent a sequence of tasks that will be executed as part of the job
65 | steps:
66 | - uses: actions/checkout@v3
67 |
68 | - name: Download wheel artifacts
69 | uses: actions/download-artifact@v2
70 | with:
71 | name: wheel-${{ runner.os }}-${{ runner.python-version }}
72 | path: dist/
73 |
74 | - name: Store aggregated wheel artifacts
75 | uses: actions/upload-artifact@v2
76 | with:
77 | name: wheels
78 | path: dist/*
79 |
80 | # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
81 | - name: Check to TestPyPI
82 | run: |
83 | python3 -m pip install -U twine
84 | # Check twine in advance even though gh-action-pypi also does that
85 | twine check dist/*
86 | # Upload to TestPyPI
87 | - name: Publish package to TestPyPI 📦
88 | if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v')
89 | uses: pypa/gh-action-pypi-publish@release/v1
90 | with:
91 | user: __token__
92 | password: ${{ secrets.TWINE_TEST_TOKEN }}
93 | repository_url: https://test.pypi.org/legacy/
94 |
95 | - name: Publish package to PyPI 📦
96 | if: startsWith(github.ref, 'refs/tags/v')
97 | uses: pypa/gh-action-pypi-publish@v1.5.1
98 | with:
99 | user: __token__
100 | password: ${{ secrets.PYPI_API_TOKEN }}
101 | verbose: true
102 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | MANIFEST
27 |
28 | # PyInstaller
29 | # Usually these files are written by a python script from a template
30 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
31 | *.manifest
32 | *.spec
33 |
34 | # Installer logs
35 | pip-log.txt
36 | pip-delete-this-directory.txt
37 |
38 | # Unit test / coverage reports
39 | htmlcov/
40 | .tox/
41 | .coverage
42 | .coverage.*
43 | .cache
44 | nosetests.xml
45 | coverage.xml
46 | testresults.xml
47 |
48 | *.cover
49 | .hypothesis/
50 | .pytest_cache/
51 |
52 | # Translations
53 | *.mo
54 | *.pot
55 |
56 | # Django stuff:
57 | *.log
58 | local_settings.py
59 | db.sqlite3
60 |
61 | # Flask stuff:
62 | instance/
63 | .webassets-cache
64 |
65 | # Scrapy stuff:
66 | .scrapy
67 |
68 | # Sphinx documentation
69 | docs/_build/
70 |
71 | # PyBuilder
72 | target/
73 |
74 | # Jupyter Notebook
75 | .ipynb_checkpoints
76 |
77 | # pyenv
78 | .python-version
79 |
80 | # celery beat schedule file
81 | celerybeat-schedule
82 |
83 | # SageMath parsed files
84 | *.sage.py
85 |
86 | # Environments
87 | .env
88 | .venv
89 | env/
90 | env*/
91 | venv/
92 | ENV/
93 | env.bak/
94 | venv.bak/
95 |
96 | # Spyder project settings
97 | .spyderproject
98 | .spyproject
99 |
100 | # VS code project settings
101 | .vscode
102 |
103 | # Rope project settings
104 | .ropeproject
105 |
106 | # mkdocs documentation
107 | /site
108 |
109 | # mypy
110 | .mypy_cache/
111 |
112 | # pycharm
113 | .todo/
114 |
115 | # datafiles and results
116 | **/results/*
117 | tests/test_results/**
118 |
119 | /*.csv
120 | /*.zip
121 | #/*.txt
122 | /*.xlsx
123 |
124 | # local configuration settings
125 | local_config.py
126 |
127 | # all logs
128 | logs/
129 |
130 | # trained models (will be created in CI)
131 | /*.pkl
132 |
133 | # extra tox files
134 | tox.ini.bak
135 | tox-generated.ini
136 |
137 | # Generated by setuptools-scm
138 | */*/_version.py
139 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | # See https://pre-commit.com for more information
2 | # See https://pre-commit.com/hooks.html for more hooks
3 | # Temporary disabling hooks: SKIP=flake8 git commit -m "foo"
4 | repos:
5 | - repo: https://github.com/pre-commit/pre-commit-hooks
6 | rev: v4.5.0
7 | hooks:
8 | - id: check-added-large-files
9 | name: Check for files larger than 5 MB
10 | args: [ "--maxkb=5120" ]
11 | - id: end-of-file-fixer
12 | name: Check for a blank line at the end of scripts (auto-fixes)
13 | exclude: '\.Rd'
14 | - id: trailing-whitespace
15 | name: Check for trailing whitespaces (auto-fixes)
16 | - repo: https://github.com/astral-sh/ruff-pre-commit
17 | # Ruff version.
18 | rev: v0.3.2
19 | hooks:
20 | # Run the linter.
21 | - id: ruff
22 | args: [ --fix ]
23 | # Run the formatter.
24 | - id: ruff-format
25 | - repo: https://github.com/gitleaks/gitleaks
26 | rev: v8.18.2
27 | hooks:
28 | - id: gitleaks
29 |
--------------------------------------------------------------------------------
/.secrets.baseline:
--------------------------------------------------------------------------------
1 | {
2 | "version": "1.0.3",
3 | "plugins_used": [
4 | {
5 | "name": "ArtifactoryDetector"
6 | },
7 | {
8 | "name": "AWSKeyDetector"
9 | },
10 | {
11 | "name": "AzureStorageKeyDetector"
12 | },
13 | {
14 | "name": "Base64HighEntropyString",
15 | "limit": 4.5
16 | },
17 | {
18 | "name": "BasicAuthDetector"
19 | },
20 | {
21 | "name": "CloudantDetector"
22 | },
23 | {
24 | "name": "HexHighEntropyString",
25 | "limit": 3.0
26 | },
27 | {
28 | "name": "IbmCloudIamDetector"
29 | },
30 | {
31 | "name": "IbmCosHmacDetector"
32 | },
33 | {
34 | "name": "JwtTokenDetector"
35 | },
36 | {
37 | "name": "KeywordDetector",
38 | "keyword_exclude": ""
39 | },
40 | {
41 | "name": "MailchimpDetector"
42 | },
43 | {
44 | "name": "NpmDetector"
45 | },
46 | {
47 | "name": "PrivateKeyDetector"
48 | },
49 | {
50 | "name": "SlackDetector"
51 | },
52 | {
53 | "name": "SoftlayerDetector"
54 | },
55 | {
56 | "name": "SquareOAuthDetector"
57 | },
58 | {
59 | "name": "StripeDetector"
60 | },
61 | {
62 | "name": "TwilioKeyDetector"
63 | }
64 | ],
65 | "filters_used": [
66 | {
67 | "path": "detect_secrets.filters.allowlist.is_line_allowlisted"
68 | },
69 | {
70 | "path": "detect_secrets.filters.common.is_ignored_due_to_verification_policies",
71 | "min_level": 2
72 | },
73 | {
74 | "path": "detect_secrets.filters.heuristic.is_indirect_reference"
75 | },
76 | {
77 | "path": "detect_secrets.filters.heuristic.is_likely_id_string"
78 | },
79 | {
80 | "path": "detect_secrets.filters.heuristic.is_potential_uuid"
81 | },
82 | {
83 | "path": "detect_secrets.filters.heuristic.is_prefixed_with_dollar_sign"
84 | },
85 | {
86 | "path": "detect_secrets.filters.heuristic.is_sequential_string"
87 | },
88 | {
89 | "path": "detect_secrets.filters.heuristic.is_templated_secret"
90 | }
91 | ],
92 | "results": {},
93 | "generated_at": "2021-06-14T10:43:14Z"
94 | }
95 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | # set base image (host OS)
2 | FROM python:3.11
3 |
4 | RUN addgroup -S nonroot \
5 | && adduser -S nonroot -G nonroot
6 |
7 | USER nonroot
8 |
9 | # set the working directory in the container
10 | WORKDIR /code
11 |
12 | # copy the dependencies file to the working directory
13 | COPY ./raman-fitting ./raman-fitting
14 |
15 | # copy setup.cfg to work dir
16 | # COPY setup.cfg .
17 | # COPY setup.py .
18 | # install package test, maybe not possible because only src
19 | # RUN pip install -e ./
20 |
21 |
22 | # install dependencies
23 | RUN pip install -r requirements.txt
24 |
25 | RUN pip install --upgrade build
26 | RUN build ./
27 | RUN pip install -e ./
28 |
29 | # copy the content of the local src directory to the working directory
30 | #COPY src/ .
31 |
32 | # command to run on container start
33 | CMD [ "raman_fitting run examples" ]
34 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2021 David Wallace
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [](https://github.com/psf/black)
2 | [](https://github.com/MyPyDavid/raman-fitting/actions/workflows/build-test-codecov.yml)
3 | [](https://codecov.io/gh/MyPyDavid/raman-fitting)
4 | [](https://github.com/MyPyDavid/raman-fitting/actions/workflows/upload-to-testpypi.yml)
5 |
6 |
7 |
8 |
9 |
10 |
11 | # raman-fitting
12 | A Python framework that performs a deconvolution on typical parts of interest on the spectrum of carbonaceous materials.
13 | The deconvolutions are done with models which are composed of collections of lineshapes or peaks that are typically assigned to these spectra in scientific literature.
14 |
15 | In batch processing mode this package will index the raman data files in a chosen folder.
16 | First, it will try to extract a sample ID and position number from the filenames and create an index of the files in a dataframe. Over this index a preprocessing, fitting and exporting loop will start.
17 | There are several models, each with a different combination of typical peaks, used for fitting. Each individual typical peak is defined as a class in the deconvolution/default_models folder with some added literature reference in the docstring. Here, the individual peak parameter settings can also be easily adjusted for initial values, limits, shape (eg. Lorentzian, Gaussian and Voigt) or be fixed at certain initial values.
18 | Export is done with plots and excel files for the spectral data and fitting parameters for further analysis.
19 |
20 |
21 | ### Example plots
22 |
23 | https://github.com/MyPyDavid/raman-fitting/wiki
24 |
25 |
26 | ### Set up virtual environment and install the package
27 |
28 | A release is now available on PyPI, installation can be done with these commands in a terminal.
29 | ``` bash
30 | # Setting up and activating a virtual environment
31 | python -m venv env # python 3.11 is recommended
32 | source env/bin/activate
33 |
34 | # Installation from PyPI
35 | python -m pip install raman_fitting
36 | ```
37 |
38 | #### From source installation
39 |
40 | The following shows how to install the package from this source repository.
41 | Download or clone this repository in a certain folder.
42 | ``` bash
43 | git clone https://github.com/MyPyDavid/raman-fitting.git
44 |
45 | # set up and activate venv ...
46 |
47 | # regular install
48 | python -m pip install raman-fitting/
49 |
50 | # editable/develop mode
51 | python -m pip install -e raman-fitting/
52 | ```
53 |
54 | ### Usage
55 |
56 | #### Post installation test run
57 |
58 | In order to test the package after installation, please try the following command in a terminal CLI.
59 | ``` bash
60 | raman_fitting run examples
61 | ```
62 | or these commands in the Python interpreter or in a Jupyter Notebook.
63 | ``` python
64 | import raman_fitting
65 | raman_fitting.make_examples()
66 | ```
67 | This test run should yield the resulting plots and files in the following folder. Where home means the local user home directory depending on the OS.
68 | ``` bash
69 | # Linux
70 | home/.raman_fitting/example_results
71 |
72 | # For Other OSs, log messages will show:
73 | # Results saved in ...
74 |
75 | ```
76 |
77 | #### Fitting your own datafiles
78 | Place your data files in the default location or change this default setting in the config.
79 | ``` bash
80 | home/.raman_fitting/datafiles
81 | ```
82 | The following command will attempt the indexing, preprocessing, fitting and plotting on all the files found in this folder.
83 | ``` bash
84 | # default run mode is "normal" means over all the files found in the index
85 | raman_fitting
86 |
87 | # If you add a lot of files, try to check if the index is properly constructed
88 | # before fitting them.
89 | raman_fitting make index
90 |
91 | # Location of index
92 | home/.raman_fitting/datafiles/results/raman_fitting_index.csv
93 | ```
94 |
95 | #### Datafiles
96 |
97 | The raman data files should be .txt files with two columns of data values.
98 | The first column should contain the Raman shift values and the second one the measured intensity.
99 | Filenames will be parsed into a sampleID and position, in order to take the mean of the measured intensity
100 | of several positions on the same sample.
101 |
102 | An example of filename formatting and parsing result:
103 | ``` python
104 | samplename1_pos1.txt => sampleID = 'samplename1', position = 1
105 | sample2-100_3.txt => sampleID = 'sample2-100', position = 3
106 | ```
107 | ### Version
108 |
109 | The current version is v0.8.0
110 |
111 | ### Dependencies
112 |
113 | - python >= 3.11
114 | - lmfit >= 1.2.0
115 | - pandas >= 2.0.0
116 | - scipy >= 1.10.1
117 | - matplotlib >= 3.7.2
118 | - numpy >= 1.24.2
119 |
--------------------------------------------------------------------------------
/examples/fitting_Model_1st_6peaks+Si.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyPyDavid/raman-fitting/554760daa97161f7de38b69a2afd4897d2020976/examples/fitting_Model_1st_6peaks+Si.png
--------------------------------------------------------------------------------
/examples/raw_data.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyPyDavid/raman-fitting/554760daa97161f7de38b69a2afd4897d2020976/examples/raw_data.png
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["hatchling", "hatch-vcs"]
3 | build-backend = "hatchling.build"
4 |
5 | [project]
6 | name = "raman_fitting"
7 | license = {file = "LICENSE"}
8 | authors = [
9 | {name = "David Wallace", email = "mypydavid@proton.me"},
10 | ]
11 | description = "Python framework for the batch processing and deconvolution of raman spectra."
12 | readme = {file = "README.md", content-type = "text/markdown"}
13 | keywords = ["spectroscopy", "Raman", "fitting", "deconvolution", "batch processing", "carbonaceous materials"]
14 | classifiers = [
15 | "License :: OSI Approved :: MIT License",
16 | "Natural Language :: English",
17 | "Programming Language :: Python",
18 | "Programming Language :: Python :: 3 :: Only",
19 | "Programming Language :: Python :: 3.11",
20 | "Programming Language :: Python :: Implementation :: CPython",
21 | "Programming Language :: Python :: Implementation :: PyPy",
22 | "Intended Audience :: Science/Research",
23 | "Topic :: Scientific/Engineering :: Physics",
24 | "Topic :: Scientific/Engineering :: Chemistry",
25 | ]
26 | dynamic = ["version"]
27 | dependencies = [
28 | "pandas~=2.1.2",
29 | "scipy~=1.11.3",
30 | "lmfit~=1.2.2",
31 | "matplotlib~=3.8.0",
32 | "numpy~=1.26.1",
33 | "tablib~=3.5.0",
34 | "pydantic>=2.5",
35 | "pydantic-settings>=2.1",
36 | "pydantic_numpy>=4.1",
37 | "loguru>=0.7",
38 | "typer[all]",
39 | "mpire[dill]~=2.10.0",
40 | ]
41 |
42 | [project.optional-dependencies]
43 | pytest = [
44 | "pytest",
45 | "pytest-cov",
46 | "pytest-flake8",
47 | "mock",
48 | "wheel"
49 | ]
50 | dev = [
51 | "isort",
52 | "pylint",
53 | "flake8",
54 | "autopep8",
55 | "pydocstyle",
56 | "black",
57 | "bump2version",
58 | "raman_fitting[pytest]"
59 | ]
60 |
61 | [project.urls]
62 | homepage = "https://pypi.org/project/raman-fitting/"
63 | repository = "https://github.com/MyPyDavid/raman-fitting.git"
64 | # documentation = "https://raman-fitting.readthedocs.io/en/latest/"
65 |
66 | [project.scripts]
67 | raman_fitting = "raman_fitting.interfaces.typer_cli:app"
68 |
69 |
70 | [tool.hatch.version]
71 | source = "vcs"
72 |
73 | [tool.hatch.build.hooks.vcs]
74 | version-file = "src/raman_fitting/_version.py"
75 |
76 | [tool.hatch.build.targets.sdist]
77 | exclude = ["/profiling"]
78 |
79 | [tool.hatch.build.targets.wheel]
80 | only-include = ["src"]
81 | sources = ["src"]
82 |
83 | # testing
84 | [tool.pytest.ini_options]
85 | minversion = "7.0"
86 | pythonpath = "src"
87 | addopts = [
88 | "--import-mode=importlib",
89 | "-ra -q",
90 | "--cov",
91 | "--cov-report term-missing html xml annotate",
92 | ]
93 | testpaths = [
94 | "tests",
95 | ]
96 |
97 | [tool.coverage.run]
98 | source = ["src"]
99 | omit = [
100 | "*/example_fixtures/*",
101 | ]
102 |
103 | [tool.coverage.report]
104 | # Regexes for lines to exclude from consideration
105 | exclude_also = [
106 | # Don't complain about missing debug-only code:
107 | "def __repr__",
108 | "if self\\.debug",
109 |
110 | # Don't complain if tests don't hit defensive assertion code:
111 | "raise AssertionError",
112 | "raise NotImplementedError",
113 |
114 | # Don't complain if non-runnable code isn't run:
115 | "if 0:",
116 | "if __name__ == .__main__.:",
117 |
118 | # Don't complain about abstract methods, they aren't run:
119 | "@(abc\\.)?abstractmethod",
120 | ]
121 |
122 | ignore_errors = true
123 |
124 | [tool.coverage.html]
125 | directory = "coverage_html_report"
126 |
127 |
128 | [tool.bumpversion]
129 | current_version = "0.8.0"
130 | commit = true
131 | commit_args = "--no-verify"
132 | message = "Bump version: {current_version} → {new_version}"
133 | tag = true
134 | allow_dirty = true
135 | tag_name = "{new_version}"
136 | tag_message = "Bump version: {current_version} → {new_version}"
137 | parse = "(?P\\d+)\\.(?P\\d+)\\.(?P\\d+)(\\.dev(?P\\d+))?"
138 | serialize =[
139 | "{major}.{minor}.{patch}.dev{dev}",
140 | "{major}.{minor}.{patch}"
141 | ]
142 |
143 | [[tool.bumpversion.files]]
144 | filename= "README.md"
145 | search = "The current version is v{current_version}"
146 | replace = "The current version is v{new_version}"
147 |
148 | [[tool.bumpversion.files]]
149 | filename= "pyproject.toml"
150 | search = "current_version = '{current_version}'"
151 | replace = "current_version = '{new_version}'"
152 |
--------------------------------------------------------------------------------
/src/raman_fitting/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = "David Wallace"
2 | __docformat__ = "restructuredtext"
3 | __status__ = "Development"
4 | __future_package_name__ = "pyramdeconv"
5 | __current_package_name__ = "raman_fitting"
6 | __package_name__ = __current_package_name__
7 |
8 | import importlib.util
9 |
10 | try:
11 | from ._version import __version__
12 | except ImportError:
13 | # -- Source mode --
14 | try:
15 | # use setuptools_scm to get the current version from src using git
16 | from setuptools_scm import get_version as _gv
17 | from os import path as _path
18 |
19 | __version__ = _gv(_path.join(_path.dirname(__file__), _path.pardir))
20 | except ModuleNotFoundError:
21 | __version__ = "importerr_modulenotfound_version"
22 | except Exception:
23 | __version__ = "importerr_exception_version"
24 | except Exception:
25 | __version__ = "catch_exception_version"
26 |
27 | import sys
28 | import warnings
29 |
30 | from loguru import logger
31 |
32 | # This code is written for Python 3.11 and higher
33 | if sys.version_info.major < 3 and sys.version_info.minor < 11:
34 | logger.error(f"{__package_name__} requires Python 3.11 or higher.")
35 | sys.exit(1)
36 |
37 | # Let users know if they're missing any hard dependencies
38 | hard_dependencies = ("numpy", "pandas", "scipy", "matplotlib", "lmfit", "pydantic")
39 | soft_dependencies = {}
40 | missing_dependencies = []
41 |
42 |
43 | for dependency in hard_dependencies:
44 | if not importlib.util.find_spec(dependency):
45 | missing_dependencies.append(dependency)
46 |
47 | if missing_dependencies:
48 | raise ImportError(f"Missing required dependencies {missing_dependencies}")
49 |
50 | for dependency in soft_dependencies:
51 | if not importlib.util.find_spec(dependency):
52 | warnings.warn(
53 | f"Missing important package {dependency}. {soft_dependencies[dependency]}"
54 | )
55 |
56 | del hard_dependencies, soft_dependencies, dependency, missing_dependencies
57 |
--------------------------------------------------------------------------------
/src/raman_fitting/config/__init__.py:
--------------------------------------------------------------------------------
1 | from raman_fitting.config.base_settings import Settings
2 |
3 | settings = Settings()
--------------------------------------------------------------------------------
/src/raman_fitting/config/base_settings.py:
--------------------------------------------------------------------------------
1 | from typing import Dict
2 | from pathlib import Path
3 |
4 | from pydantic import (
5 | Field,
6 | )
7 |
8 | from pydantic_settings import BaseSettings
9 |
10 | from raman_fitting.models.deconvolution.base_model import BaseLMFitModel
11 | from raman_fitting.models.deconvolution.base_model import (
12 | get_models_and_peaks_from_definitions,
13 | )
14 | from raman_fitting.models.deconvolution.spectrum_regions import (
15 | get_default_regions_from_toml_files,
16 | )
17 | from .default_models import load_config_from_toml_files
18 | from .path_settings import create_default_package_dir_or_ask, InternalPathSettings
19 | from types import MappingProxyType
20 |
21 |
22 | def get_default_models_and_peaks_from_definitions():
23 | models_and_peaks_definitions = load_config_from_toml_files()
24 | return get_models_and_peaks_from_definitions(models_and_peaks_definitions)
25 |
26 |
27 | class Settings(BaseSettings):
28 | default_models: Dict[str, Dict[str, BaseLMFitModel]] = Field(
29 | default_factory=get_default_models_and_peaks_from_definitions,
30 | alias="my_default_models",
31 | init_var=False,
32 | validate_default=False,
33 | )
34 | default_regions: Dict[str, Dict[str, float]] | None = Field(
35 | default_factory=get_default_regions_from_toml_files,
36 | alias="my_default_regions",
37 | init_var=False,
38 | validate_default=False,
39 | )
40 | default_definitions: MappingProxyType | None = Field(
41 | default_factory=load_config_from_toml_files,
42 | alias="my_default_definitions",
43 | init_var=False,
44 | validate_default=False,
45 | )
46 |
47 | destination_dir: Path = Field(default_factory=create_default_package_dir_or_ask)
48 | internal_paths: InternalPathSettings = Field(default_factory=InternalPathSettings)
49 |
--------------------------------------------------------------------------------
/src/raman_fitting/config/default_models/__init__.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | from types import MappingProxyType
3 | import tomllib
4 |
5 |
6 | def load_config_from_toml_files() -> MappingProxyType:
7 | current_parent_dir = Path(__file__).resolve().parent
8 | default_peak_settings = {}
9 | for i in current_parent_dir.glob("*.toml"):
10 | default_peak_settings.update(tomllib.loads(i.read_bytes().decode()))
11 | if not default_peak_settings:
12 | raise ValueError("default models should not be empty.")
13 |
14 | return MappingProxyType(default_peak_settings)
15 |
--------------------------------------------------------------------------------
/src/raman_fitting/config/default_models/first_order.toml:
--------------------------------------------------------------------------------
1 | [first_order]
2 |
3 | [first_order.models]
4 | 1peak = "G"
5 | 2peaks = "G+D"
6 | 3peaks = "G+D+D3"
7 | 4peaks = "G+D+D3+D4"
8 | 5peaks = "G+D+D2+D3+D4"
9 | 6peaks = "G+D+D2+D3+D4+D5"
10 |
11 | [first_order.peaks]
12 |
13 | [first_order.peaks.G]
14 | docstring = """
15 | Graphite belongs to the P63/mmc (D46h) space group. If considering only a graphene plane, at
16 | the à point of the Brillouin zone, there are six normal modes that possess only one mode (doubly
17 | degenerate in plane) with a E2g representation, which is Raman active
18 | G ; Ideal graphitic lattice (E2g-symmetry)
19 | G peak center stable over different laser wavelengths.
20 | Influenced by potential, HSO4 adsorption (or ionization of G- and G+),
21 | magnetic fields, pressure
22 | Für G: 1580-1590 D5 und D2 weiß ich nicht
23 | """
24 | peak_name = "G"
25 | peak_type = "Lorentzian"
26 | [first_order.peaks.G.param_hints]
27 | center = {value = 1571, min = 1545, max = 1595}
28 | sigma = {value = 30, min = 5, max = 150}
29 | amplitude = {value = 100, min = 1e-05, max = 500}
30 |
31 |
32 | [first_order.peaks.D]
33 | docstring = """
34 | D or D1 ; Disordered graphitic lattice (graphene layer edges,A1gsymmetry)
35 | A defective graphite presents other bands that can be as intense as the G band at D=1350 and D'=1615 cm-1
36 | These bands are activated by defects due to the breaking of the crystal symmetry that relax the Raman selection rules.
37 | Für D1: 1340-1350
38 | """
39 | peak_name = "D"
40 | peak_type = "Lorentzian"
41 | [first_order.peaks.D.param_hints]
42 | center = {value = 1350, min = 1330, max = 1380}
43 | sigma = {value = 35, min = 1, max = 150}
44 | amplitude = {value = 120, min = 1e-05, max = 500}
45 |
46 | [first_order.peaks.D2]
47 | docstring = """
48 | D2 or D' ; Right next to the G peak, sometimes not obvious as G peak split.
49 | Disordered graphitic lattice (surface graphene layers,E2g-symmetry)
50 | j.molstruc.2010.12.065
51 | """
52 | peak_name = "D2"
53 | peak_type = "Lorentzian"
54 | [first_order.peaks.D2.param_hints]
55 | center = {value = 1606, min = 1592, max = 1635}
56 | sigma = {value = 30, min = 5, max = 150}
57 | amplitude = {value = 35, min = 5, max = 500}
58 |
59 |
60 | [first_order.peaks.D3]
61 | docstring = """
62 | D3 or D'' or A or Am ; Between the D and G peak, sometimes too broad.
63 | For amorphous carbon (Gaussian[26]or Lorentzian[3,18,27]line shape).
64 | Für D3: 1495-1515
65 | """
66 | peak_name = "D3"
67 | peak_type = "Lorentzian"
68 | [first_order.peaks.D3.param_hints]
69 | center = {value = 1480, min = 1450, max = 1525}
70 | sigma = {value = 25, min = 1, max = 150}
71 | amplitude = {value = 25, min = 1e-02, max = 500}
72 |
73 | [first_order.peaks.D4]
74 | docstring = """
75 | D4 or I ; Below D band, a shoulder sometimes split with D5 band.
76 | Disordered graphitic lattice (A1gsymmetry)[10],polyenes[3,27], ionic impurities
77 | D4 peak at 1212 cm−1
78 | Jurkiewicz, K., Pawlyta, M., Zygadło, D. et al. J Mater Sci (2018) 53: 3509. https://doi.org/10.1007/s10853-017-1753-7
79 | Für D4: 1185-1210, but depends on if there is D5 or not.
80 | """
81 | peak_name = "D4"
82 | peak_type = "Lorentzian"
83 | [first_order.peaks.D4.param_hints]
84 | center = {value = 1230, min = 1180, max = 1310}
85 | sigma = {value = 40, min = 1, max = 150}
86 | amplitude = {value = 20, min = 1e-02, max = 200}
87 |
88 | [first_order.peaks.D5]
89 | docstring = """
90 | D5 peak at 1110 cm−1. At lowest should of D peak, below D4.
91 | Ref: Jurkiewicz, K., Pawlyta, M., Zygadło, D. et al. J Mater Sci (2018) 53: 3509. https://doi.org/10.1007/s10853-017-1753-7
92 | """
93 | peak_name = "D5"
94 | peak_type = "Lorentzian"
95 | [first_order.peaks.D5.param_hints]
96 | center = {value = 1110, min = 1080, max = 1150}
97 | sigma = {value = 40, min = 1, max = 150}
98 | amplitude = {value = 20, min = 1e-02, max = 200}
99 |
100 | [first_order.peaks.Si1]
101 | docstring = """
102 | ===== Extra peak at ca. 960 cm-1 presumably from Si substrate 2nd order === not from Nafion...
103 | => Either cut the Spectra 1000-2000
104 | => Place an extra Gaussian peak at 960 in the fit
105 | """
106 | peak_name = "Si1"
107 | peak_type = "Gaussian"
108 | is_substrate = true
109 | [first_order.peaks.Si1.param_hints]
110 | center = {value = 960, min = 900, max = 980}
111 | sigma = {value = 10, min = 0, max = 150}
112 | amplitude = {value = 10, min = 0, max = 200}
113 |
--------------------------------------------------------------------------------
/src/raman_fitting/config/default_models/normalization.toml:
--------------------------------------------------------------------------------
1 | [normalization]
2 |
3 | [normalization.models]
4 | norm = "norm_G+norm_D"
5 |
6 | [normalization.peaks]
7 |
8 | [normalization.peaks.norm_G]
9 | docstring = """
10 | G_peak used for normalization
11 | """
12 | peak_name = "norm_G"
13 | peak_type = "Lorentzian"
14 | is_for_normalization = true
15 | [normalization.peaks.norm_G.param_hints]
16 | center = {"value" = 1581, "min" = 1500, "max" = 1600}
17 | sigma = {"value" = 40, "min" = 1e-05, "max" = 1e3}
18 | amplitude = {"value" = 8e4, "min" = 1e2}
19 |
20 | [normalization.peaks.norm_D]
21 | docstring = """
22 | D_peak for normalization
23 | """
24 | peak_name = "norm_D"
25 | peak_type = "Lorentzian"
26 | is_for_normalization = true
27 | [normalization.peaks.norm_D.param_hints]
28 | center = {"value" = 1350, "min" = 1300, "max" = 1400}
29 | sigma = {"value" = 90, "min" = 1e-05}
30 | amplitude = {"value" = 10e5, "min" = 1e2}
31 |
--------------------------------------------------------------------------------
/src/raman_fitting/config/default_models/second_order.toml:
--------------------------------------------------------------------------------
1 | [second_order]
2 |
3 | [second_order.models]
4 | 2nd_4peaks = "D4D4+D1D1+GD1+D2D2"
5 |
6 | [second_order.peaks]
7 |
8 | [second_order.peaks.D4D4]
9 | peak_name = "D4D4"
10 | peak_type = "Lorentzian"
11 | [second_order.peaks.D4D4.param_hints]
12 | center = {value = 2435, min = 2400, max = 2550}
13 | sigma = {value = 30, min = 1, max = 200}
14 | amplitude = {value = 2, min = 1e-03, max = 100}
15 |
16 | [second_order.peaks.D1D1]
17 | peak_name = "D1D1"
18 | peak_type = "Lorentzian"
19 | [second_order.peaks.D1D1.param_hints]
20 | center = {value = 2650, min = 2600, max = 2750}
21 | sigma = {value = 60, min = 1, max = 200}
22 | amplitude = {value = 14, min = 1e-03, max = 100}
23 |
24 | [second_order.peaks.GD1]
25 | peak_name = "GD1"
26 | peak_type = "Lorentzian"
27 | [second_order.peaks.GD1.param_hints]
28 | center = {value = 2900, min = 2800, max = 2950}
29 | sigma = {value = 50, min = 1, max = 200}
30 | amplitude = {value = 10, min = 1e-03, max = 100}
31 |
32 | [second_order.peaks.D2D2]
33 | peak_type = "Lorentzian"
34 | peak_name = "D2D2"
35 | [second_order.peaks.D2D2.param_hints]
36 | center = {value = 3250, min = 3000, max = 3400}
37 | sigma = {value = 60, min = 20, max = 200}
38 | amplitude = {value = 1, min = 1e-03, max = 100}
39 |
--------------------------------------------------------------------------------
/src/raman_fitting/config/default_models/spectrum_regions.toml:
--------------------------------------------------------------------------------
1 | [spectrum]
2 |
3 | [spectrum.regions]
4 | full = {"min" = 200, "max" = 3600}
5 | full_first_and_second = {"min" = 800, "max" = 3500}
6 | low = {"min" = 150, "max" = 850, "extra_margin" = 10}
7 | first_order = {"min" = 900, "max" = 2000}
8 | mid = {"min" = 1850, "max" = 2150, "extra_margin" = 10}
9 | normalization = {"min" = 1500, "max" = 1675, "extra_margin" = 10}
10 | second_order = {"min" = 2150, "max" = 3380}
--------------------------------------------------------------------------------
/src/raman_fitting/config/filepath_helper.py:
--------------------------------------------------------------------------------
1 | """ this module prepares the local file paths for data and results"""
2 |
3 |
4 | from pathlib import Path
5 |
6 | from loguru import logger
7 |
8 |
9 | def check_and_make_dirs(destdir: Path) -> None:
10 | _destfile = None
11 | if destdir.suffix:
12 | _destfile = destdir
13 | destdir = _destfile.parent
14 |
15 | if not destdir.is_dir():
16 | destdir.mkdir(exist_ok=True, parents=True)
17 | logger.info(
18 | f"check_and_make_dirs the results directory did not exist and was created at:\n{destdir}\n"
19 | )
20 |
21 | if _destfile:
22 | _destfile.touch()
23 |
24 |
25 | def create_dir_or_ask_user_input(destdir: Path, ask_user=True):
26 | counter, max_attempts = 0, 10
27 | while not destdir.exists() and counter < max_attempts:
28 | answer = "y"
29 | if ask_user:
30 | answer = input(
31 | f"Directory to store files raman_fitting:\n{destdir}\nCan this be folder be created? (y/n)"
32 | )
33 | if "y" in answer.lower():
34 | destdir.mkdir(exist_ok=True, parents=True)
35 |
36 | if "y" not in answer.lower():
37 | new_path_user = input(
38 | "Please provide the directory to store files raman_fitting:"
39 | )
40 | try:
41 | new_path = Path(new_path_user).resolve()
42 | except Exception as e:
43 | print(f"Exception: {e}")
44 | counter += 1
45 | destdir = new_path
46 |
47 | logger.info(f"Directory created: {destdir}")
48 | return destdir
49 |
--------------------------------------------------------------------------------
/src/raman_fitting/config/logging_config.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import sys
3 |
4 | # Multiple calls to logging.getLogger('someLogger') return a
5 | # reference to the same logger object. This is true not only
6 | # within the same module, but also across modules as long as
7 | # it is in the same Python interpreter process.
8 |
9 | FORMATTER = logging.Formatter(
10 | "%(asctime)s — %(name)s — %(levelname)s —%(funcName)s:%(lineno)d — %(message)s"
11 | )
12 |
13 |
14 | log_format = (
15 | "[%(asctime)s] — %(name)s — %(levelname)s —"
16 | "%(funcName)s:%(lineno)d—12s %(message)s"
17 | )
18 | # '[%(asctime)s] %(levelname)-8s %(name)-12s %(message)s')
19 |
20 | # Define basic configuration
21 | logging.basicConfig(
22 | # Define logging level
23 | level=logging.DEBUG,
24 | # Define the format of log messages
25 | format=log_format,
26 | # Provide the filename to store the log messages
27 | filename=("debug.log"),
28 | )
29 |
30 |
31 | def get_console_handler():
32 | console_handler = logging.StreamHandler(sys.stdout)
33 | console_handler.setFormatter(FORMATTER)
34 | return console_handler
35 |
--------------------------------------------------------------------------------
/src/raman_fitting/config/path_settings.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | import tempfile
3 | from enum import StrEnum, auto
4 |
5 |
6 | from pydantic import (
7 | BaseModel,
8 | DirectoryPath,
9 | FilePath,
10 | ConfigDict,
11 | Field,
12 | model_validator,
13 | )
14 |
15 |
16 | from .filepath_helper import check_and_make_dirs
17 |
18 |
19 | PACKAGE_NAME = "raman_fitting"
20 | CURRENT_FILE: Path = Path(__file__).resolve()
21 | PACKAGE_ROOT: Path = CURRENT_FILE.parent.parent
22 | REPO_ROOT: Path = PACKAGE_ROOT.parent
23 | INTERNAL_DEFAULT_MODELS: Path = CURRENT_FILE.parent / "default_models"
24 | # MODEL_DIR: Path = PACKAGE_ROOT / "deconvolution_models"
25 | INTERNAL_EXAMPLE_FIXTURES: Path = PACKAGE_ROOT / "example_fixtures"
26 | INTERNAL_PYTEST_FIXTURES: Path = REPO_ROOT / "tests" / "test_fixtures"
27 |
28 | # Home dir from pathlib.Path for storing the results
29 | USER_HOME_PACKAGE: Path = Path.home() / PACKAGE_NAME
30 | # pyramdeconv is the new version package name
31 |
32 | # Optional local configuration file
33 | USER_LOCAL_CONFIG_FILE: Path = USER_HOME_PACKAGE / f"{PACKAGE_NAME}/toml"
34 |
35 | INDEX_FILE_NAME = f"{PACKAGE_NAME}_index.csv"
36 | # Storage file of the index
37 | USER_INDEX_FILE_PATH: Path = USER_HOME_PACKAGE / INDEX_FILE_NAME
38 |
39 | TEMP_DIR = Path(tempfile.mkdtemp(prefix="raman-fitting-"))
40 | TEMP_RESULTS_DIR: Path = TEMP_DIR / "results"
41 |
42 | CLEAN_SPEC_REGION_NAME_PREFIX = "savgol_filter_raw_region_"
43 |
44 | ERROR_MSG_TEMPLATE = "{sample_group} {sampleid}: {msg}"
45 |
46 |
47 | class InternalPathSettings(BaseModel):
48 | settings_file: FilePath = Field(CURRENT_FILE)
49 | package_root: DirectoryPath = Field(PACKAGE_ROOT)
50 | default_models_dir: DirectoryPath = Field(INTERNAL_DEFAULT_MODELS)
51 | example_fixtures: DirectoryPath = Field(INTERNAL_EXAMPLE_FIXTURES)
52 | pytest_fixtures: DirectoryPath = Field(INTERNAL_PYTEST_FIXTURES)
53 | temp_dir: DirectoryPath = Field(TEMP_RESULTS_DIR)
54 | temp_index_file: FilePath = Field(TEMP_DIR / INDEX_FILE_NAME)
55 |
56 |
57 | EXPORT_FOLDER_NAMES = {
58 | "plots": "fitting_plots",
59 | "components": "fitting_components",
60 | "raw_data": "raw_data",
61 | }
62 |
63 |
64 | class RunModes(StrEnum):
65 | NORMAL = auto()
66 | PYTEST = auto()
67 | EXAMPLES = auto()
68 | DEBUG = auto()
69 |
70 |
71 | def get_run_mode_paths(run_mode: RunModes, user_package_home: Path = None):
72 | if user_package_home is None:
73 | user_package_home = USER_HOME_PACKAGE
74 | if isinstance(run_mode, str):
75 | run_mode = RunModes(run_mode)
76 |
77 | RUN_MODE_PATHS = {
78 | RunModes.PYTEST.name: {
79 | "RESULTS_DIR": TEMP_RESULTS_DIR,
80 | "DATASET_DIR": INTERNAL_EXAMPLE_FIXTURES,
81 | "USER_CONFIG_FILE": INTERNAL_EXAMPLE_FIXTURES / f"{PACKAGE_NAME}.toml",
82 | "INDEX_FILE": TEMP_RESULTS_DIR / f"{PACKAGE_NAME}_index.csv",
83 | },
84 | RunModes.EXAMPLES.name: {
85 | "RESULTS_DIR": user_package_home / "examples",
86 | "DATASET_DIR": INTERNAL_EXAMPLE_FIXTURES,
87 | "USER_CONFIG_FILE": INTERNAL_EXAMPLE_FIXTURES / f"{PACKAGE_NAME}.toml",
88 | "INDEX_FILE": user_package_home / "examples" / f"{PACKAGE_NAME}_index.csv",
89 | },
90 | RunModes.NORMAL.name: {
91 | "RESULTS_DIR": user_package_home / "results",
92 | "DATASET_DIR": user_package_home / "datafiles",
93 | "USER_CONFIG_FILE": user_package_home / "raman_fitting.toml",
94 | "INDEX_FILE": user_package_home / f"{PACKAGE_NAME}_index.csv",
95 | },
96 | }
97 | if run_mode.name not in RUN_MODE_PATHS:
98 | raise ValueError(f"Choice of run_mode {run_mode.name} not supported.")
99 | return RUN_MODE_PATHS[run_mode.name]
100 |
101 |
102 | class ExportPathSettings(BaseModel):
103 | results_dir: Path
104 | plots: DirectoryPath = Field(None, validate_default=False)
105 | components: DirectoryPath = Field(None, validate_default=False)
106 | raw_data: DirectoryPath = Field(None, validate_default=False)
107 |
108 | @model_validator(mode="after")
109 | def set_export_path_settings(self) -> "ExportPathSettings":
110 | if not self.results_dir.is_dir():
111 | self.results_dir.mkdir(exist_ok=True, parents=True)
112 |
113 | plots: DirectoryPath = self.results_dir.joinpath(EXPORT_FOLDER_NAMES["plots"])
114 | self.plots = plots
115 | components: DirectoryPath = self.results_dir.joinpath(
116 | EXPORT_FOLDER_NAMES["components"]
117 | )
118 | self.components = components
119 | raw_data: DirectoryPath = self.results_dir.joinpath(
120 | EXPORT_FOLDER_NAMES["raw_data"]
121 | )
122 | self.raw_data = raw_data
123 | return self
124 |
125 |
126 | class RunModePaths(BaseModel):
127 | model_config = ConfigDict(alias_generator=str.upper)
128 |
129 | run_mode: RunModes
130 | results_dir: DirectoryPath
131 | dataset_dir: DirectoryPath
132 | user_config_file: Path
133 | index_file: Path
134 |
135 |
136 | def initialize_run_mode_paths(
137 | run_mode: RunModes, user_package_home: Path = None
138 | ) -> RunModePaths:
139 | run_mode_paths = get_run_mode_paths(run_mode, user_package_home=user_package_home)
140 |
141 | for destname, destdir in run_mode_paths.items():
142 | destdir = Path(destdir)
143 | check_and_make_dirs(destdir)
144 | return RunModePaths(RUN_MODE=run_mode, **run_mode_paths)
145 |
146 |
147 | def create_default_package_dir_or_ask():
148 | return USER_HOME_PACKAGE
149 |
--------------------------------------------------------------------------------
/src/raman_fitting/delegating/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyPyDavid/raman-fitting/554760daa97161f7de38b69a2afd4897d2020976/src/raman_fitting/delegating/__init__.py
--------------------------------------------------------------------------------
/src/raman_fitting/delegating/main_delegator.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=W0614,W0401,W0611,W0622,C0103,E0401,E0402
2 | from dataclasses import dataclass, field
3 | from typing import Dict, List, Sequence, Any
4 |
5 | from raman_fitting.config.path_settings import (
6 | RunModes,
7 | ERROR_MSG_TEMPLATE,
8 | initialize_run_mode_paths,
9 | )
10 | from raman_fitting.config import settings
11 |
12 | from raman_fitting.imports.models import RamanFileInfo
13 |
14 | from raman_fitting.models.deconvolution.base_model import BaseLMFitModel
15 | from raman_fitting.models.splitter import RegionNames
16 | from raman_fitting.exports.exporter import ExportManager
17 | from raman_fitting.imports.files.file_indexer import (
18 | RamanFileIndex,
19 | groupby_sample_group,
20 | groupby_sample_id,
21 | IndexSelector,
22 | initialize_index_from_source_files,
23 | )
24 |
25 | from raman_fitting.delegating.models import (
26 | AggregatedSampleSpectrumFitResult,
27 | )
28 | from raman_fitting.delegating.pre_processing import (
29 | prepare_aggregated_spectrum_from_files,
30 | )
31 | from raman_fitting.types import LMFitModelCollection
32 | from raman_fitting.delegating.run_fit_spectrum import run_fit_over_selected_models
33 |
34 |
35 | from loguru import logger
36 |
37 |
38 | @dataclass
39 | class MainDelegator:
40 | # IDEA Add flexible input handling for the cli, such a path to dir, or list of files
41 | # or create index when no kwargs are given.
42 | """
43 | Main delegator for the processing of files containing Raman spectra.
44 |
45 | Creates plots and files in the config RESULTS directory.
46 | """
47 |
48 | run_mode: RunModes
49 | use_multiprocessing: bool = False
50 | lmfit_models: LMFitModelCollection = field(
51 | default_factory=lambda: settings.default_models
52 | )
53 | fit_model_region_names: Sequence[RegionNames] = field(
54 | default=(RegionNames.first_order, RegionNames.second_order)
55 | )
56 | fit_model_specific_names: Sequence[str] | None = None
57 | sample_ids: Sequence[str] = field(default_factory=list)
58 | sample_groups: Sequence[str] = field(default_factory=list)
59 | index: RamanFileIndex = None
60 | selection: Sequence[RamanFileInfo] = field(init=False)
61 | selected_models: Sequence[RamanFileInfo] = field(init=False)
62 |
63 | results: Dict[str, Any] | None = field(default=None, init=False)
64 | export: bool = True
65 |
66 | def __post_init__(self):
67 | run_mode_paths = initialize_run_mode_paths(self.run_mode)
68 | if self.index is None:
69 | raman_files = run_mode_paths.dataset_dir.glob("*.txt")
70 | index_file = run_mode_paths.index_file
71 | self.index = initialize_index_from_source_files(
72 | files=raman_files, index_file=index_file, force_reindex=True
73 | )
74 |
75 | self.selection = self.select_samples_from_index()
76 | self.selected_models = self.select_models_from_provided_models()
77 | self.main_run()
78 | if self.export:
79 | self.exports = self.call_export_manager()
80 |
81 | def select_samples_from_index(self) -> Sequence[RamanFileInfo]:
82 | index = self.index
83 | # breakpoint()
84 | index_selector = IndexSelector(
85 | **dict(
86 | raman_files=index.raman_files,
87 | sample_groups=self.sample_groups,
88 | sample_ids=self.sample_ids,
89 | )
90 | )
91 | selection = index_selector.selection
92 | if not selection:
93 | logger.info("Selection was empty.")
94 | return selection
95 |
96 | def call_export_manager(self):
97 | # breakpoint()
98 | export = ExportManager(self.run_mode, self.results)
99 | exports = export.export_files()
100 | return exports
101 |
102 | # region_names:List[RegionNames], model_names: List[str]
103 | def select_models_from_provided_models(self) -> LMFitModelCollection:
104 | selected_region_names = self.fit_model_region_names
105 | selected_model_names = self.fit_model_specific_names
106 | selected_models = {}
107 | for region_name, all_region_models in self.lmfit_models.items():
108 | if region_name not in selected_region_names:
109 | continue
110 | if not selected_model_names:
111 | selected_models[region_name] = all_region_models
112 | continue
113 | selected_region_models = {}
114 | for mod_name, mod_val in all_region_models.items():
115 | if mod_name not in selected_model_names:
116 | continue
117 | selected_region_models[mod_name] = mod_val
118 |
119 | selected_models[region_name] = selected_region_models
120 | return selected_models
121 |
122 | def select_fitting_model(
123 | self, region_name: RegionNames, model_name: str
124 | ) -> BaseLMFitModel:
125 | try:
126 | return self.lmfit_models[region_name][model_name]
127 | except KeyError as exc:
128 | raise KeyError(f"Model {region_name} {model_name} not found.") from exc
129 |
130 | def main_run(self):
131 | selection = self.select_samples_from_index()
132 | if not self.fit_model_region_names:
133 | logger.info("No model region names were selected.")
134 | if not self.selected_models:
135 | logger.info("No fit models were selected.")
136 |
137 | results = {}
138 |
139 | for group_name, grp in groupby_sample_group(selection):
140 | results[group_name] = {}
141 | for sample_id, sample_grp in groupby_sample_id(grp):
142 | sgrp = list(sample_grp)
143 | results[group_name][sample_id] = {}
144 | _error_msg = None
145 |
146 | if not sgrp:
147 | _err = "group is empty"
148 | _error_msg = ERROR_MSG_TEMPLATE.format(group_name, sample_id, _err)
149 | logger.debug(_error_msg)
150 | results[group_name][sample_id]["errors"] = _error_msg
151 | continue
152 |
153 | unique_positions = {i.sample.position for i in sgrp}
154 | if len(unique_positions) <= len(sgrp):
155 | # handle edge-case, multiple source files for a single position on a sample
156 | _error_msg = f"Handle multiple source files for a single position on a sample, {group_name} {sample_id}"
157 | results[group_name][sample_id]["errors"] = _error_msg
158 | logger.debug(_error_msg)
159 | model_result = run_fit_over_selected_models(
160 | sgrp,
161 | self.selected_models,
162 | use_multiprocessing=self.use_multiprocessing,
163 | )
164 | results[group_name][sample_id]["fit_results"] = model_result
165 | self.results = results
166 |
167 |
168 | def get_results_over_selected_models(
169 | raman_files: List[RamanFileInfo], models: LMFitModelCollection, fit_model_results
170 | ) -> Dict[RegionNames, AggregatedSampleSpectrumFitResult]:
171 | results = {}
172 | for region_name, region_grp in models.items():
173 | aggregated_spectrum = prepare_aggregated_spectrum_from_files(
174 | region_name, raman_files
175 | )
176 | if aggregated_spectrum is None:
177 | continue
178 | fit_region_results = AggregatedSampleSpectrumFitResult(
179 | region_name=region_name,
180 | aggregated_spectrum=aggregated_spectrum,
181 | fit_model_results=fit_model_results,
182 | )
183 | results[region_name] = fit_region_results
184 | return results
185 |
186 |
187 | def make_examples():
188 | # breakpoint()
189 | _main_run = MainDelegator(
190 | run_mode="pytest", fit_model_specific_names=["2peaks", "3peaks", "2nd_4peaks"]
191 | )
192 | _main_run.main_run()
193 | return _main_run
194 |
195 |
196 | if __name__ == "__main__":
197 | example_run = make_examples()
198 |
--------------------------------------------------------------------------------
/src/raman_fitting/delegating/models.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=W0614,W0401,W0611,W0622,C0103,E0401,E0402
2 | from typing import Dict, Sequence
3 |
4 | from pydantic import BaseModel
5 |
6 | from raman_fitting.imports.models import RamanFileInfo
7 |
8 | from raman_fitting.models.spectrum import SpectrumData
9 | from raman_fitting.models.fit_models import SpectrumFitModel
10 | from raman_fitting.models.splitter import RegionNames
11 | from raman_fitting.imports.spectrumdata_parser import SpectrumReader
12 | from raman_fitting.processing.post_processing import SpectrumProcessor
13 |
14 |
15 | class PreparedSampleSpectrum(BaseModel):
16 | file_info: RamanFileInfo
17 | read: SpectrumReader
18 | processed: SpectrumProcessor
19 |
20 |
21 | class AggregatedSampleSpectrum(BaseModel):
22 | sources: Sequence[PreparedSampleSpectrum]
23 | spectrum: SpectrumData
24 |
25 |
26 | class AggregatedSampleSpectrumFitResult(BaseModel):
27 | region_name: RegionNames
28 | aggregated_spectrum: AggregatedSampleSpectrum
29 | fit_model_results: Dict[str, SpectrumFitModel]
30 |
--------------------------------------------------------------------------------
/src/raman_fitting/delegating/pre_processing.py:
--------------------------------------------------------------------------------
1 | from typing import List
2 |
3 | from raman_fitting.models.splitter import RegionNames
4 | from raman_fitting.imports.spectrumdata_parser import SpectrumReader
5 | from raman_fitting.processing.post_processing import SpectrumProcessor
6 | from raman_fitting.imports.models import RamanFileInfo
7 | from .models import (
8 | AggregatedSampleSpectrum,
9 | PreparedSampleSpectrum,
10 | )
11 |
12 | from loguru import logger
13 |
14 | from raman_fitting.config.path_settings import CLEAN_SPEC_REGION_NAME_PREFIX
15 | from ..imports.spectrum.spectra_collection import SpectraDataCollection
16 |
17 |
18 | def prepare_aggregated_spectrum_from_files(
19 | region_name: RegionNames, raman_files: List[RamanFileInfo]
20 | ) -> AggregatedSampleSpectrum | None:
21 | select_region_key = f"{CLEAN_SPEC_REGION_NAME_PREFIX}{region_name}"
22 | clean_data_for_region = []
23 | data_sources = []
24 | for i in raman_files:
25 | read = SpectrumReader(i.file)
26 | processed = SpectrumProcessor(read.spectrum)
27 | prepared_spec = PreparedSampleSpectrum(
28 | file_info=i, read=read, processed=processed
29 | )
30 | data_sources.append(prepared_spec)
31 | selected_clean_data = processed.clean_spectrum.spec_regions[select_region_key]
32 | clean_data_for_region.append(selected_clean_data)
33 | if not clean_data_for_region:
34 | logger.warning(
35 | f"prepare_mean_data_for_fitting received no files. {region_name}"
36 | )
37 | return
38 | spectra_collection = SpectraDataCollection(
39 | spectra=clean_data_for_region, region_name=region_name
40 | )
41 | aggregated_spectrum = AggregatedSampleSpectrum(
42 | sources=data_sources, spectrum=spectra_collection.mean_spectrum
43 | )
44 | return aggregated_spectrum
45 |
--------------------------------------------------------------------------------
/src/raman_fitting/delegating/run_fit_multi.py:
--------------------------------------------------------------------------------
1 | from typing import Dict, List
2 |
3 | from loguru import logger
4 | from mpire import WorkerPool
5 |
6 | from raman_fitting.models.fit_models import SpectrumFitModel
7 |
8 |
9 | def run_fit_multi(**kwargs) -> SpectrumFitModel:
10 | # include optional https://lmfit.github.io/lmfit-py/model.html#saving-and-loading-modelresults
11 | spectrum = kwargs.pop("spectrum")
12 | model = kwargs.pop("model")
13 | lmfit_model = model["lmfit_model"]
14 | region = kwargs.pop("region")
15 | import time
16 |
17 | lmfit_kwargs = {}
18 | if "method" not in kwargs:
19 | lmfit_kwargs["method"] = "leastsq"
20 |
21 | init_params = lmfit_model.make_params()
22 | start_time = time.time()
23 | x, y = spectrum["ramanshift"], spectrum["intensity"]
24 | out = lmfit_model.fit(y, init_params, x=x, **lmfit_kwargs) # 'leastsq'
25 | end_time = time.time()
26 | elapsed_seconds = abs(start_time - end_time)
27 | elapsed_time = elapsed_seconds
28 | logger.debug(
29 | f"Fit with model {model['name']} on {region} success: {out.success} in {elapsed_time:.2f}s."
30 | )
31 | return out
32 |
33 |
34 | def run_fit_multiprocessing(
35 | spec_fits: List[SpectrumFitModel],
36 | ) -> Dict[str, SpectrumFitModel]:
37 | spec_fits_dumps = [i.model_dump() for i in spec_fits]
38 |
39 | with WorkerPool(n_jobs=4, use_dill=True) as pool:
40 | results = pool.map(
41 | run_fit_multi, spec_fits_dumps, progress_bar=True, progress_bar_style="rich"
42 | )
43 | # patch spec_fits, setattr fit_result
44 | fit_model_results = {}
45 | for result in results:
46 | _spec_fit_search = [
47 | i for i in spec_fits if i.model.lmfit_model.name == result.model.name
48 | ]
49 | if len(_spec_fit_search) != 1:
50 | continue
51 | _spec_fit = _spec_fit_search[0]
52 | _spec_fit.fit_result = result
53 | fit_model_results[_spec_fit.model.name] = _spec_fit
54 | return fit_model_results
55 |
--------------------------------------------------------------------------------
/src/raman_fitting/delegating/run_fit_spectrum.py:
--------------------------------------------------------------------------------
1 | from typing import List, Dict
2 |
3 | from raman_fitting.delegating.run_fit_multi import run_fit_multiprocessing
4 | from raman_fitting.models.spectrum import SpectrumData
5 | from raman_fitting.types import LMFitModelCollection
6 | from raman_fitting.delegating.models import AggregatedSampleSpectrumFitResult
7 | from raman_fitting.delegating.pre_processing import (
8 | prepare_aggregated_spectrum_from_files,
9 | )
10 | from raman_fitting.imports.models import RamanFileInfo
11 | from raman_fitting.models.deconvolution.spectrum_regions import RegionNames
12 | from raman_fitting.models.fit_models import SpectrumFitModel
13 |
14 | from loguru import logger
15 |
16 |
17 | def run_fit_over_selected_models(
18 | raman_files: List[RamanFileInfo],
19 | models: LMFitModelCollection,
20 | use_multiprocessing: bool = False,
21 | ) -> Dict[RegionNames, AggregatedSampleSpectrumFitResult]:
22 | results = {}
23 | for region_name, model_region_grp in models.items():
24 | aggregated_spectrum = prepare_aggregated_spectrum_from_files(
25 | region_name, raman_files
26 | )
27 | if aggregated_spectrum is None:
28 | continue
29 | spec_fits = prepare_spec_fit_regions(
30 | aggregated_spectrum.spectrum, model_region_grp
31 | )
32 | if use_multiprocessing:
33 | fit_model_results = run_fit_multiprocessing(spec_fits)
34 | else:
35 | fit_model_results = run_fit_loop(spec_fits)
36 | fit_region_results = AggregatedSampleSpectrumFitResult(
37 | region_name=region_name,
38 | aggregated_spectrum=aggregated_spectrum,
39 | fit_model_results=fit_model_results,
40 | )
41 | results[region_name] = fit_region_results
42 | return results
43 |
44 |
45 | def prepare_spec_fit_regions(
46 | spectrum: SpectrumData, model_region_grp
47 | ) -> List[SpectrumFitModel]:
48 | spec_fits = []
49 | for model_name, model in model_region_grp.items():
50 | region = model.region_name.name
51 | spec_fit = SpectrumFitModel(spectrum=spectrum, model=model, region=region)
52 | spec_fits.append(spec_fit)
53 | return spec_fits
54 |
55 |
56 | def run_fit_loop(spec_fits: List[SpectrumFitModel]) -> Dict[str, SpectrumFitModel]:
57 | fit_model_results = {}
58 | for spec_fit in spec_fits:
59 | # include optional https://lmfit.github.io/lmfit-py/model.html#saving-and-loading-modelresults
60 | spec_fit.run_fit()
61 | logger.debug(
62 | f"Fit with model {spec_fit.model.name} on {spec_fit.region} success: {spec_fit.fit_result.success} in {spec_fit.elapsed_time:.2f}s."
63 | )
64 | fit_model_results[spec_fit.model.name] = spec_fit
65 | return fit_model_results
66 |
--------------------------------------------------------------------------------
/src/raman_fitting/example_fixtures/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyPyDavid/raman-fitting/554760daa97161f7de38b69a2afd4897d2020976/src/raman_fitting/example_fixtures/__init__.py
--------------------------------------------------------------------------------
/src/raman_fitting/exports/__init__.py:
--------------------------------------------------------------------------------
1 | #
2 |
--------------------------------------------------------------------------------
/src/raman_fitting/exports/exporter.py:
--------------------------------------------------------------------------------
1 | from dataclasses import dataclass
2 | from typing import Dict, Any
3 | from raman_fitting.config.path_settings import (
4 | RunModes,
5 | initialize_run_mode_paths,
6 | ExportPathSettings,
7 | )
8 | from raman_fitting.config import settings
9 |
10 | from raman_fitting.exports.plotting_fit_results import fit_spectrum_plot
11 | from raman_fitting.exports.plotting_raw_data import raw_data_spectra_plot
12 |
13 |
14 | from loguru import logger
15 |
16 |
17 | class ExporterError(Exception):
18 | """Error occured during the exporting functions"""
19 |
20 |
21 | @dataclass
22 | class ExportManager:
23 | run_mode: RunModes
24 | results: Dict[str, Any] | None = None
25 |
26 | def __post_init__(self):
27 | self.paths = initialize_run_mode_paths(
28 | self.run_mode, user_package_home=settings.destination_dir
29 | )
30 |
31 | def export_files(self):
32 | # breakpoint() self.results
33 | exports = []
34 | for group_name, group_results in self.results.items():
35 | for sample_id, sample_results in group_results.items():
36 | export_dir = self.paths.results_dir / group_name / sample_id
37 | export_paths = ExportPathSettings(results_dir=export_dir)
38 | try:
39 | raw_data_spectra_plot(
40 | sample_results["fit_results"], export_paths=export_paths
41 | )
42 | except Exception as exc:
43 | logger.error(f"Plotting error, raw_data_spectra_plot: {exc}")
44 | try:
45 | fit_spectrum_plot(
46 | sample_results["fit_results"], export_paths=export_paths
47 | )
48 | except Exception as exc:
49 | logger.error(f"plotting error fit_spectrum_plot: {exc}")
50 | raise exc from exc
51 | exports.append(
52 | {
53 | "sample": sample_results["fit_results"],
54 | "export_paths": export_paths,
55 | }
56 | )
57 | return exports
58 |
--------------------------------------------------------------------------------
/src/raman_fitting/exports/file_table.py:
--------------------------------------------------------------------------------
1 | from typing import List
2 |
3 | from raman_fitting.models.spectrum import SpectrumData
4 |
5 |
6 | def raw_data_spectra_export(spectra: List[SpectrumData]):
7 | try:
8 | for spec in spectra:
9 | wnxl_outpath_spectra = spec.mean_info.DestRaw.unique()[0].joinpath(
10 | f"spectra_{spec.sIDmean_col}_{spec.regionname}.xlsx"
11 | )
12 | spec.mean_spec.to_excel(wnxl_outpath_spectra)
13 |
14 | _0_spec = spectra[0]
15 | wnxl_outpath_info = _0_spec.mean_info.DestRaw.unique()[0].joinpath(
16 | f"info_{_0_spec.sIDmean_col}.xlsx"
17 | )
18 | _0_spec.mean_info.to_excel(wnxl_outpath_info)
19 | except Exception as e:
20 | print("no extra Raw Data plots: {0}".format(e))
21 |
22 |
23 | def export_xls_from_spec(self, res_peak_spec):
24 | try:
25 | res_peak_spec.FitComponents.to_excel(
26 | res_peak_spec.extrainfo["DestFittingModel"].with_suffix(".xlsx"),
27 | index=False,
28 | )
29 |
30 | except Exception as e:
31 | print("Error export_xls_from_spec", e)
32 |
--------------------------------------------------------------------------------
/src/raman_fitting/exports/plot_formatting.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | """
4 | Created on Wed Apr 28 15:08:26 2021
5 |
6 | @author: zmg
7 | """
8 |
9 | from collections import namedtuple
10 | from typing import Sequence, Tuple
11 |
12 | from raman_fitting.models.splitter import RegionNames
13 |
14 | import matplotlib.pyplot as plt
15 | from lmfit import Model as LMFitModel
16 |
17 | from loguru import logger
18 |
19 |
20 | CMAP_OPTIONS_DEFAULT = ("Dark2", "tab20")
21 | DEFAULT_COLOR = (0.4, 0.4, 0.4, 1.0)
22 | COLOR_BLACK = (0, 0, 0, 1) # black as fallback default color
23 |
24 | ModelValidation = namedtuple("ModelValidation", "valid peak_group model_inst message")
25 |
26 |
27 | PLOT_REGION_AXES = {
28 | RegionNames.full: (0, 0),
29 | RegionNames.low: (0, 1),
30 | RegionNames.first_order: (0, 2),
31 | RegionNames.mid: (1, 1),
32 | RegionNames.second_order: (1, 2),
33 | RegionNames.normalization: (1, 0),
34 | }
35 |
36 |
37 | class PeakValidationWarning(UserWarning):
38 | pass
39 |
40 |
41 | class NotFoundAnyModelsWarning(PeakValidationWarning):
42 | pass
43 |
44 |
45 | class CanNotInitializeModelWarning(PeakValidationWarning):
46 | pass
47 |
48 |
49 | def get_cmap_list(
50 | length: int,
51 | cmap_options: Tuple = CMAP_OPTIONS_DEFAULT,
52 | default_color: Tuple = DEFAULT_COLOR,
53 | ) -> Tuple | None:
54 | lst = list(range(length))
55 | if not lst:
56 | return None
57 |
58 | # set fallback color from class
59 | if isinstance(default_color, tuple) and default_color is not None:
60 | if len(default_color) == 4:
61 | cmap = [default_color for _ in lst]
62 | return cmap
63 | elif default_color is None:
64 | cmap = [DEFAULT_COLOR for _ in lst]
65 | else:
66 | raise ValueError(f"default color is not tuple but {type(default_color)}")
67 |
68 | # set cmap colors from cmap options
69 | if cmap_options:
70 | try:
71 | pltcmaps = [plt.get_cmap(cmap) for cmap in cmap_options]
72 | # Take shortest colormap but not
73 | cmap = min(
74 | [i for i in pltcmaps if len(lst) <= len(i.colors)],
75 | key=lambda x: len(x.colors),
76 | default=cmap,
77 | )
78 | # if succesfull
79 | if "ListedColormap" in str(type(cmap)):
80 | cmap = cmap.colors
81 |
82 | except Exception as exc:
83 | logger.warning(f"get_cmap_list error setting cmap colors:{exc}")
84 |
85 | return cmap
86 |
87 |
88 | def assign_colors_to_peaks(selected_models: Sequence[LMFitModel]) -> dict:
89 | cmap_get = get_cmap_list(len(selected_models))
90 | annotated_models = {}
91 | for n, peak in enumerate(selected_models):
92 | color = ", ".join([str(i) for i in cmap_get[n]])
93 | lenpars = len(peak.param_names)
94 | res = {"index": n, "color": color, "lenpars": lenpars, "peak": peak}
95 | annotated_models[peak.prefix] = res
96 | return annotated_models
97 |
98 |
99 | def __repr__(self):
100 | _repr = "Validated Peak model collection"
101 | if self.selected_models:
102 | _selmods = f", {len(self.selected_models)} models from: " + "\n\t- "
103 | _repr += _selmods
104 | _joinmods = "\n\t- ".join(
105 | [f"{i.peak_group}: {i.model_inst} \t" for i in self.selected_models]
106 | )
107 | _repr += _joinmods
108 | else:
109 | _repr += ", empty selected models"
110 | return _repr
111 |
--------------------------------------------------------------------------------
/src/raman_fitting/exports/plotting_fit_results.py:
--------------------------------------------------------------------------------
1 | from typing import Dict
2 |
3 | import matplotlib
4 | import matplotlib.pyplot as plt
5 | from matplotlib import gridspec
6 | from matplotlib.axes import Axes
7 |
8 | from matplotlib.text import Text
9 | from matplotlib.ticker import AutoMinorLocator
10 |
11 | from raman_fitting.imports.samples.models import SampleMetaData
12 | from raman_fitting.models.fit_models import SpectrumFitModel
13 |
14 |
15 | from raman_fitting.config.path_settings import ExportPathSettings
16 | from raman_fitting.models.splitter import RegionNames
17 | from raman_fitting.delegating.models import AggregatedSampleSpectrumFitResult
18 |
19 | from loguru import logger
20 |
21 |
22 | matplotlib.rcParams.update({"font.size": 14})
23 | FIT_REPORT_MIN_CORREL = 0.7
24 |
25 |
26 | def fit_spectrum_plot(
27 | aggregated_spectra: Dict[RegionNames, AggregatedSampleSpectrumFitResult],
28 | export_paths: ExportPathSettings | None = None,
29 | plot_annotation=True,
30 | plot_residuals=True,
31 | ): # pragma: no cover
32 | first_order = aggregated_spectra[RegionNames.first_order]
33 | second_order = aggregated_spectra[RegionNames.second_order]
34 |
35 | sources = first_order.aggregated_spectrum.sources
36 | sample = sources[0].file_info.sample
37 | second_model_name = "2nd_4peaks"
38 | second_model = second_order.fit_model_results.get(second_model_name)
39 | for first_model_name, first_model in first_order.fit_model_results.items():
40 | prepare_combined_spectrum_fit_result_plot(
41 | first_model,
42 | second_model,
43 | sample,
44 | export_paths,
45 | plot_annotation=plot_annotation,
46 | plot_residuals=plot_residuals,
47 | )
48 |
49 |
50 | def prepare_combined_spectrum_fit_result_plot(
51 | first_model: SpectrumFitModel,
52 | second_model: SpectrumFitModel,
53 | sample: SampleMetaData,
54 | export_paths: ExportPathSettings,
55 | plot_annotation=True,
56 | plot_residuals=True,
57 | ):
58 | plt.figure(figsize=(28, 24))
59 | gs = gridspec.GridSpec(4, 1, height_ratios=[4, 1, 4, 1])
60 | ax = plt.subplot(gs[0])
61 | ax_res = plt.subplot(gs[1])
62 | ax.set_title(f"{sample.id}")
63 |
64 | first_model_name = first_model.model.name
65 |
66 | fit_plot_first(ax, ax_res, first_model, plot_residuals=plot_residuals)
67 | _bbox_artists = None
68 | if plot_annotation:
69 | annotate_report_first = prepare_annotate_fit_report_first(
70 | ax, first_model.fit_result
71 | )
72 | _bbox_artists = (annotate_report_first,)
73 |
74 | if second_model is not None:
75 | ax2nd = plt.subplot(gs[2])
76 | ax2nd_res = plt.subplot(gs[3])
77 | fit_plot_second(ax2nd, ax2nd_res, second_model, plot_residuals=plot_residuals)
78 | if plot_annotation:
79 | annotate_report_second = prepare_annotate_fit_report_second(
80 | ax2nd, second_model.fit_result
81 | )
82 | if annotate_report_second is not None:
83 | _bbox_artists = (annotate_report_first, annotate_report_second)
84 |
85 | # set axes labels and legend
86 | set_axes_labels_and_legend(ax)
87 |
88 | plot_special_si_components(ax, first_model)
89 | if export_paths is not None:
90 | savepath = export_paths.plots.joinpath(f"Model_{first_model_name}").with_suffix(
91 | ".png"
92 | )
93 | plt.savefig(
94 | savepath,
95 | dpi=100,
96 | bbox_extra_artists=_bbox_artists,
97 | bbox_inches="tight",
98 | )
99 | logger.debug(f"Plot saved to {savepath}")
100 | plt.close()
101 |
102 |
103 | def fit_plot_first(
104 | ax, ax_res, first_model: SpectrumFitModel, plot_residuals: bool = True
105 | ) -> matplotlib.text.Text | None:
106 | first_result = first_model.fit_result
107 | first_components = first_model.fit_result.components
108 | first_eval_comps = first_model.fit_result.eval_components()
109 | first_model_name = first_model.model.name
110 |
111 | ax.grid(True, "both")
112 | ax_res.grid(True, "both")
113 | ax.get_yaxis().set_tick_params(direction="in")
114 | ax.get_xaxis().set_tick_params(direction="in")
115 |
116 | ax.xaxis.set_minor_locator(AutoMinorLocator(2))
117 | ax.yaxis.set_minor_locator(AutoMinorLocator(2))
118 | ax.tick_params(which="both", direction="in")
119 | ax.set_facecolor("oldlace")
120 | ax_res.set_facecolor("oldlace")
121 | ax.plot(
122 | first_model.spectrum.ramanshift,
123 | first_result.best_fit,
124 | label=first_model_name,
125 | lw=3,
126 | c="r",
127 | )
128 | ax.plot(
129 | first_model.spectrum.ramanshift,
130 | first_result.data,
131 | label="Data",
132 | lw=3,
133 | c="grey",
134 | alpha=0.8,
135 | )
136 |
137 | if plot_residuals:
138 | ax_res.plot(
139 | first_model.spectrum.ramanshift,
140 | first_result.residual,
141 | label="Residual",
142 | lw=3,
143 | c="k",
144 | alpha=0.8,
145 | )
146 |
147 | for _component in first_components: # automatic color cycle 'cyan' ...
148 | peak_name = _component.prefix.rstrip("_")
149 | ax.plot(
150 | first_model.spectrum.ramanshift,
151 | first_eval_comps[_component.prefix],
152 | ls="--",
153 | lw=4,
154 | label=peak_name,
155 | )
156 | center_col = _component.prefix + "center"
157 | ax.annotate(
158 | f"{peak_name}:\n {first_result.best_values[center_col]:.0f}",
159 | xy=(
160 | first_result.best_values[center_col] * 0.97,
161 | 0.7 * first_result.params[_component.prefix + "height"].value,
162 | ),
163 | xycoords="data",
164 | )
165 |
166 |
167 | def fit_plot_second(
168 | ax2nd, ax2nd_res, second_model: SpectrumFitModel, plot_residuals: bool = True
169 | ) -> None:
170 | if second_model:
171 | second_result = second_model.fit_result
172 | second_components = second_model.fit_result.components
173 | second_eval_comps = second_model.fit_result.eval_components()
174 | second_model_name = second_model.model.name
175 | else:
176 | second_components = []
177 | second_result = None
178 | second_model_name = None
179 | second_eval_comps = None
180 | if second_model:
181 | ax2nd.grid(True)
182 | ax2nd_res.grid(True)
183 | ax2nd.xaxis.set_minor_locator(AutoMinorLocator(2))
184 | ax2nd.yaxis.set_minor_locator(AutoMinorLocator(2))
185 | ax2nd.tick_params(which="both", direction="in")
186 | ax2nd.set_facecolor("oldlace")
187 | ax2nd_res.set_facecolor("oldlace")
188 | if second_result is not None:
189 | ax2nd.plot(
190 | second_model.spectrum.ramanshift,
191 | second_result.best_fit,
192 | label=second_model_name,
193 | lw=3,
194 | c="r",
195 | )
196 | ax2nd.plot(
197 | second_model.spectrum.ramanshift,
198 | second_result.data,
199 | label="Data",
200 | lw=3,
201 | c="grey",
202 | alpha=0.5,
203 | )
204 | if plot_residuals:
205 | ax2nd_res.plot(
206 | second_model.spectrum.ramanshift,
207 | second_result.residual,
208 | label="Residual",
209 | lw=3,
210 | c="k",
211 | alpha=0.8,
212 | )
213 |
214 | for _component in second_components: # automatic color cycle 'cyan' ...
215 | if second_eval_comps is None:
216 | continue
217 |
218 | peak_name = _component.prefix.rstrip("_")
219 | ax2nd.plot(
220 | second_model.spectrum.ramanshift,
221 | second_eval_comps[_component.prefix],
222 | ls="--",
223 | lw=4,
224 | label=peak_name,
225 | )
226 | center_col = _component.prefix + "center"
227 | ax2nd.annotate(
228 | f"{peak_name}\n {second_result.best_values[center_col]:.0f}",
229 | xy=(
230 | second_result.best_values[center_col] * 0.97,
231 | 0.8 * second_result.params[_component.prefix + "height"].value,
232 | ),
233 | xycoords="data",
234 | )
235 | ax2nd.set_ylim(-0.02, second_result.data.max() * 1.5)
236 |
237 | set_axes_labels_and_legend(ax2nd)
238 |
239 |
240 | def prepare_annotate_fit_report_second(ax2nd, second_result) -> Text:
241 | props = dict(boxstyle="round", facecolor="wheat", alpha=0.5)
242 | annotate_report_second = ax2nd.text(
243 | 1.01,
244 | 0.7,
245 | second_result.fit_report(min_correl=FIT_REPORT_MIN_CORREL),
246 | transform=ax2nd.transAxes,
247 | fontsize=11,
248 | verticalalignment="top",
249 | bbox=props,
250 | )
251 |
252 | return annotate_report_second
253 |
254 |
255 | def prepare_annotate_fit_report_first(ax, first_result):
256 | fit_report = first_result.fit_report(min_correl=FIT_REPORT_MIN_CORREL)
257 | if len(fit_report) > -1:
258 | fit_report = fit_report.replace("prefix='D3_'", "prefix='D3_' \n")
259 | props = dict(boxstyle="round", facecolor="wheat", alpha=0.5)
260 |
261 | annotate_report_first = ax.text(
262 | 1.01,
263 | 1,
264 | fit_report,
265 | transform=ax.transAxes,
266 | fontsize=11,
267 | verticalalignment="top",
268 | bbox=props,
269 | )
270 | return annotate_report_first
271 |
272 |
273 | def plot_special_si_components(ax, first_model):
274 | first_result = first_model.fit_result
275 | si_components = filter(lambda x: x.prefix.startswith("Si"), first_result.components)
276 | first_eval_comps = first_model.fit_result.eval_components()
277 | for si_comp in si_components:
278 | si_result = si_comp
279 | ax.plot(
280 | first_model.spectrum.ramanshift,
281 | first_eval_comps[si_comp.prefix],
282 | "b--",
283 | lw=4,
284 | label="Si_substrate",
285 | )
286 | if si_result.params[si_comp.prefix + "fwhm"] > 1:
287 | ax.annotate(
288 | "Si_substrate:\n %.0f" % si_result.params["Si1_center"].value,
289 | xy=(
290 | si_result.params["Si1_center"].value * 0.97,
291 | 0.8 * si_result.params["Si1_height"].value,
292 | ),
293 | xycoords="data",
294 | )
295 |
296 |
297 | def set_axes_labels_and_legend(ax: Axes):
298 | # set axes labels and legend
299 | ax.legend(loc=1)
300 | ax.set_xlabel("Raman shift (cm$^{-1}$)")
301 | ax.set_ylabel("normalized I / a.u.")
302 |
--------------------------------------------------------------------------------
/src/raman_fitting/exports/plotting_raw_data.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | """
4 | Created on Wed Jan 29 14:49:50 2020
5 |
6 | @author: DW
7 | """
8 |
9 | from typing import Dict
10 |
11 |
12 | import matplotlib
13 | import matplotlib.pyplot as plt
14 |
15 | from raman_fitting.models.splitter import RegionNames
16 | from raman_fitting.config.path_settings import (
17 | CLEAN_SPEC_REGION_NAME_PREFIX,
18 | ExportPathSettings,
19 | )
20 | from raman_fitting.exports.plot_formatting import PLOT_REGION_AXES
21 | from raman_fitting.delegating.models import AggregatedSampleSpectrumFitResult
22 |
23 | from loguru import logger
24 |
25 | matplotlib.rcParams.update({"font.size": 14})
26 |
27 |
28 | def raw_data_spectra_plot(
29 | aggregated_spectra: Dict[RegionNames, AggregatedSampleSpectrumFitResult],
30 | export_paths: ExportPathSettings,
31 | ): # pragma: no cover
32 | if not aggregated_spectra:
33 | return
34 | # breakpoint()
35 | sources = list(aggregated_spectra.values())[0].aggregated_spectrum.sources
36 | sample_id = "-".join(set(i.file_info.sample.id for i in sources))
37 |
38 | destfile = export_paths.plots.joinpath(f"{sample_id}_mean.png")
39 | destfile.parent.mkdir(exist_ok=True, parents=True)
40 |
41 | mean_fmt = dict(c="k", alpha=0.7, lw=3)
42 | sources_fmt = dict(alpha=0.4, lw=2)
43 |
44 | _, ax = plt.subplots(2, 3, figsize=(18, 12))
45 |
46 | for spec_source in sources:
47 | for (
48 | source_region_label,
49 | source_region,
50 | ) in spec_source.processed.clean_spectrum.spec_regions.items():
51 | _source_region_name = source_region.region_name.split(
52 | CLEAN_SPEC_REGION_NAME_PREFIX
53 | )[-1]
54 | if _source_region_name not in PLOT_REGION_AXES:
55 | continue
56 | ax_ = ax[PLOT_REGION_AXES[_source_region_name]]
57 | ax_.plot(
58 | source_region.ramanshift,
59 | source_region.intensity,
60 | label=f"{spec_source.file_info.file.stem}",
61 | **sources_fmt,
62 | )
63 | ax_.set_title(_source_region_name)
64 | if _source_region_name in aggregated_spectra:
65 | mean_spec = aggregated_spectra[
66 | _source_region_name
67 | ].aggregated_spectrum.spectrum
68 | # plot the mean aggregated spectrum
69 | ax_.plot(
70 | mean_spec.ramanshift,
71 | mean_spec.intensity,
72 | label=mean_spec.label,
73 | **mean_fmt,
74 | )
75 |
76 | if _source_region_name == RegionNames.full:
77 | ax_.legend(fontsize=10)
78 |
79 | plt.suptitle(f"Mean {sample_id}", fontsize=16)
80 | plt.savefig(
81 | destfile,
82 | dpi=300,
83 | bbox_inches="tight",
84 | )
85 | plt.close()
86 | logger.debug(f"raw_data_spectra_plot saved:\n{destfile}")
87 |
--------------------------------------------------------------------------------
/src/raman_fitting/imports/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyPyDavid/raman-fitting/554760daa97161f7de38b69a2afd4897d2020976/src/raman_fitting/imports/__init__.py
--------------------------------------------------------------------------------
/src/raman_fitting/imports/collector.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | from typing import List, Collection, Tuple
3 | import logging
4 |
5 | from .models import RamanFileInfo
6 |
7 | logger = logging.getLogger(__name__)
8 |
9 |
10 | def collect_raman_file_infos(
11 | raman_files: Collection[Path],
12 | ) -> Tuple[List[RamanFileInfo], List[Path]]:
13 | pp_collection = []
14 | _files = []
15 | _failed_files = []
16 | for file in raman_files:
17 | _files.append(file)
18 | try:
19 | pp_res = RamanFileInfo(**{"file": file})
20 | pp_collection.append(pp_res)
21 | except Exception as exc:
22 | logger.warning(
23 | f"{__name__} collect_raman_file_infos unexpected error for calling RamanFileInfo on\n{file}.\n{exc}"
24 | )
25 | _failed_files.append({"file": file, "error": exc})
26 | if _failed_files:
27 | logger.warning(
28 | f"{__name__} collect_raman_file_infos failed for {len(_failed_files)}."
29 | )
30 |
31 | return pp_collection, _files
32 |
--------------------------------------------------------------------------------
/src/raman_fitting/imports/files/file_finder.py:
--------------------------------------------------------------------------------
1 | from typing import List
2 | import logging
3 | from pathlib import Path
4 | from pydantic import BaseModel, DirectoryPath, Field, model_validator
5 |
6 | logger = logging.getLogger(__name__)
7 |
8 |
9 | class FileFinder(BaseModel):
10 | directory: DirectoryPath
11 | suffixes: List[str] = Field([".txt"])
12 | files: List[Path] = Field(None, init_var=False)
13 |
14 | @model_validator(mode="after")
15 | def parse_metadata_from_filepath(self) -> "FileFinder":
16 | if self.files is None:
17 | files = find_files(self.directory, self.suffixes)
18 | self.files = files
19 |
20 | return self
21 |
22 |
23 | def find_files(directory: Path, suffixes: List[str]) -> List[Path]:
24 | """
25 | Creates a list of all raman type files found in the DATASET_DIR which are used in the creation of the index.
26 | """
27 |
28 | raman_files = []
29 |
30 | for suffix in suffixes:
31 | files = list(directory.rglob(f"*{suffix}"))
32 | raman_files += files
33 |
34 | if not raman_files:
35 | logger.warning(
36 | f"find_files warning: the chose data file dir was empty.\n{directory}\mPlease choose another directory which contains your data files."
37 | )
38 | logger.info(
39 | f"find_files {len(raman_files)} files were found in the chosen data dir:\n\t{directory}"
40 | )
41 | return raman_files
42 |
--------------------------------------------------------------------------------
/src/raman_fitting/imports/files/file_indexer.py:
--------------------------------------------------------------------------------
1 | """Indexer for raman data files"""
2 |
3 | from itertools import filterfalse, groupby
4 | from pathlib import Path
5 | from typing import List, Sequence, TypeAlias
6 |
7 | from loguru import logger
8 | from pydantic import (
9 | BaseModel,
10 | ConfigDict,
11 | Field,
12 | FilePath,
13 | NewPath,
14 | model_validator,
15 | )
16 | from raman_fitting.config import settings
17 | from raman_fitting.imports.collector import collect_raman_file_infos
18 | from raman_fitting.imports.files.utils import (
19 | load_dataset_from_file,
20 | write_dataset_to_file,
21 | )
22 | from raman_fitting.imports.models import RamanFileInfo
23 | from tablib import Dataset
24 |
25 | from raman_fitting.imports.spectrum import SPECTRUM_FILETYPE_PARSERS
26 |
27 | RamanFileInfoSet: TypeAlias = Sequence[RamanFileInfo]
28 |
29 |
30 | class RamanFileIndex(BaseModel):
31 | model_config = ConfigDict(arbitrary_types_allowed=True)
32 |
33 | index_file: NewPath | FilePath | None = Field(None, validate_default=False)
34 | raman_files: RamanFileInfoSet | None = Field(None)
35 | dataset: Dataset | None = Field(None)
36 | force_reindex: bool = Field(False, validate_default=False)
37 | persist_to_file: bool = Field(True, validate_default=False)
38 |
39 | @model_validator(mode="after")
40 | def read_or_load_data(self) -> "RamanFileIndex":
41 | if not any([self.index_file, self.raman_files, self.dataset]):
42 | raise ValueError("Not all fields should be empty.")
43 |
44 | reload_from_file = validate_reload_from_index_file(
45 | self.index_file, self.force_reindex
46 | )
47 | if reload_from_file:
48 | self.dataset = load_dataset_from_file(self.index_file)
49 | if not self.raman_files and self.dataset:
50 | self.raman_files = parse_dataset_to_index(self.dataset)
51 | return self
52 |
53 | if self.raman_files is not None:
54 | dataset_rf = cast_raman_files_to_dataset(self.raman_files)
55 | if self.dataset is not None:
56 | assert (
57 | dataset_rf == self.dataset
58 | ), "Both dataset and raman_files provided and they are different."
59 | self.dataset = dataset_rf
60 |
61 | if self.dataset is not None:
62 | self.raman_files = parse_dataset_to_index(self.dataset)
63 |
64 | if self.raman_files is None and self.dataset is None:
65 | raise ValueError(
66 | "Index error, both raman_files and dataset are not provided."
67 | )
68 |
69 | if self.persist_to_file and self.index_file is not None:
70 | write_dataset_to_file(self.index_file, self.dataset)
71 |
72 | return self
73 |
74 |
75 | def validate_reload_from_index_file(
76 | index_file: Path | None, force_reindex: bool
77 | ) -> bool:
78 | if index_file is None:
79 | logger.debug(
80 | "Index file not provided, index will not be reloaded or persisted."
81 | )
82 | return False
83 | if index_file.exists() and not force_reindex:
84 | return True
85 | elif force_reindex:
86 | logger.warning(
87 | f"Index index_file file {index_file} exists and will be overwritten."
88 | )
89 | else:
90 | logger.info(
91 | "Index index_file file does not exists but was asked to reload from it."
92 | )
93 | return False
94 |
95 |
96 | def cast_raman_files_to_dataset(raman_files: RamanFileInfoSet) -> Dataset:
97 | headers = list(RamanFileInfo.model_fields.keys())
98 | data = Dataset(headers=headers)
99 | for file in raman_files:
100 | data.append(file.model_dump(mode="json").values())
101 | return data
102 |
103 |
104 | def parse_dataset_to_index(dataset: Dataset) -> RamanFileInfoSet:
105 | raman_files = []
106 | for row in dataset:
107 | row_data = dict(zip(dataset.headers, row))
108 | raman_files.append(RamanFileInfo(**row_data))
109 | return raman_files
110 |
111 |
112 | class IndexSelector(BaseModel):
113 | raman_files: Sequence[RamanFileInfo]
114 | sample_ids: List[str] = Field(default_factory=list)
115 | sample_groups: List[str] = Field(default_factory=list)
116 | selection: Sequence[RamanFileInfo] = Field(default_factory=list)
117 |
118 | @model_validator(mode="after")
119 | def make_and_set_selection(self) -> "IndexSelector":
120 | rf_index = self.raman_files
121 | if not any([self.sample_groups, self.sample_ids]):
122 | self.selection = rf_index
123 | logger.debug(
124 | f"{self.__class__.__qualname__} selected {len(self.selection)} of {len(rf_index)}. "
125 | )
126 | return self
127 | else:
128 | rf_index_groups = list(
129 | filter(lambda x: x.sample.group in self.sample_groups, rf_index)
130 | )
131 | _pre_selected_samples = {i.sample.id for i in rf_index_groups}
132 | selected_sample_ids = filterfalse(
133 | lambda x: x in _pre_selected_samples, self.sample_ids
134 | )
135 | rf_index_samples = list(
136 | filter(lambda x: x.sample.id in selected_sample_ids, rf_index)
137 | )
138 | rf_selection_index = rf_index_groups + rf_index_samples
139 | self.selection = rf_selection_index
140 | logger.debug(
141 | f"{self.__class__.__qualname__} selected {len(self.selection)} of {rf_index}. "
142 | )
143 | return self
144 |
145 |
146 | def groupby_sample_group(index: RamanFileInfoSet):
147 | """Generator for Sample Groups, yields the name of group and group of the index SampleGroup"""
148 | grouper = groupby(index, key=lambda x: x.sample.group)
149 | return grouper
150 |
151 |
152 | def groupby_sample_id(index: RamanFileInfoSet):
153 | """Generator for SampleIDs, yields the name of group, name of SampleID and group of the index of the SampleID"""
154 | grouper = groupby(index, key=lambda x: x.sample.id)
155 | return grouper
156 |
157 |
158 | def iterate_over_groups_and_sample_id(index: RamanFileInfoSet):
159 | for grp_name, grp in groupby_sample_group(index):
160 | for sample_id, sgrp in groupby_sample_group(grp):
161 | yield grp_name, grp, sample_id, sgrp
162 |
163 |
164 | def select_index_by_sample_groups(index: RamanFileInfoSet, sample_groups: List[str]):
165 | return filter(lambda x: x.sample.group in sample_groups, index)
166 |
167 |
168 | def select_index_by_sample_ids(index: RamanFileInfoSet, sample_ids: List[str]):
169 | return filter(lambda x: x.sample.id in sample_ids, index)
170 |
171 |
172 | def select_index(
173 | index: RamanFileInfoSet, sample_groups: List[str], sample_ids: List[str]
174 | ):
175 | group_selection = list(select_index_by_sample_groups(index, sample_groups))
176 | sample_selection = list(select_index_by_sample_ids(index, sample_ids))
177 | selection = group_selection + sample_selection
178 | return selection
179 |
180 |
181 | def collect_raman_file_index_info(
182 | raman_files: Sequence[Path] | None = None, **kwargs
183 | ) -> RamanFileInfoSet:
184 | """loops over the files and scrapes the index data from each file"""
185 | raman_files = list(raman_files)
186 | total_files = []
187 | dirs = [i for i in raman_files if i.is_dir()]
188 | files = [i for i in raman_files if i.is_file()]
189 | total_files += files
190 | suffixes = [i.lstrip(".") for i in SPECTRUM_FILETYPE_PARSERS.keys()]
191 | for d1 in dirs:
192 | paths = [path for i in suffixes for path in d1.glob(f"*.{i}")]
193 | total_files += paths
194 | index, files = collect_raman_file_infos(total_files, **kwargs)
195 | logger.info(f"successfully made index {len(index)} from {len(files)} files")
196 | return index
197 |
198 |
199 | def initialize_index_from_source_files(
200 | files: Sequence[Path] | None = None,
201 | index_file: Path | None = None,
202 | force_reindex: bool = False,
203 | ) -> RamanFileIndex:
204 | raman_files = collect_raman_file_index_info(raman_files=files)
205 | # breakpoint()
206 | raman_index = RamanFileIndex(
207 | index_file=index_file, raman_files=raman_files, force_reindex=force_reindex
208 | )
209 | logger.info(
210 | f"index_delegator index prepared with len {len(raman_index.raman_files)}"
211 | )
212 | return raman_index
213 |
214 |
215 | def main():
216 | """test run for indexer"""
217 | index_file = settings.destination_dir.joinpath("index.csv")
218 | raman_files = collect_raman_file_index_info()
219 | try:
220 | index_data = {"file": index_file, "raman_files": raman_files}
221 | raman_index = RamanFileIndex(**index_data)
222 | logger.debug(f"Raman Index len: {len(raman_index.dataset)}")
223 | select_index(raman_index.raman_files, sample_groups=["DW"], sample_ids=["DW38"])
224 | except Exception as e:
225 | logger.error(f"Raman Index error: {e}")
226 | raman_index = None
227 |
228 | return raman_index
229 |
230 |
231 | if __name__ == "__main__":
232 | main()
233 |
--------------------------------------------------------------------------------
/src/raman_fitting/imports/files/index_funcs.py:
--------------------------------------------------------------------------------
1 | import sys
2 |
3 | from pathlib import Path
4 |
5 | from raman_fitting.imports.spectrum.datafile_parsers import load_dataset_from_file
6 |
7 | from loguru import logger
8 |
9 |
10 | def get_dtypes_filepath(index_file):
11 | _dtypes_filepath = index_file.with_name(
12 | index_file.stem + "_dtypes" + index_file.suffix
13 | )
14 | return _dtypes_filepath
15 |
16 |
17 | def export_index(index, index_file):
18 | """saves the index to a defined Index file"""
19 | if index.empty:
20 | logger.info(f"{__name__} Empty index not exported")
21 | return
22 |
23 | if not index_file.parent.exists():
24 | logger.info(f"{__name__} created parent dir: {index_file.parent}")
25 | index_file.parent.mkdir(exist_ok=True, parents=True)
26 |
27 | index.to_csv(index_file)
28 |
29 | _dtypes = index.dtypes.to_frame("dtypes")
30 | _dtypes.to_csv(get_dtypes_filepath(index_file))
31 |
32 | logger.info(
33 | f"{__name__} Succesfully Exported Raman Index file to:\n\t{index_file}\nwith len({len(index)})."
34 | )
35 |
36 |
37 | def load_index(index_file):
38 | """loads the index from from defined Index file"""
39 | if not index_file.exists():
40 | logger.error(
41 | f"Error in load_index: {index_file} does not exists, starting reload index ... "
42 | )
43 | return
44 |
45 | try:
46 | index = load_dataset_from_file(index_file)
47 |
48 | logger.info(
49 | f"Succesfully imported Raman Index file from {index_file}, with len({len(index)})"
50 | )
51 | if len(index) != len(index):
52 | logger.error(
53 | f"""'Error in load_index from {index_file},
54 | \nlength of loaded index not same as number of raman files
55 | \n starting reload index ... """
56 | )
57 |
58 | except Exception as e:
59 | logger.error(
60 | f"Error in load_index from {index_file},\n{e}\n starting reload index ... "
61 | )
62 |
63 |
64 | def index_selection(index, **kwargs):
65 | """
66 | Special selector on the index DataFrame
67 |
68 | Parameters
69 | -------
70 |
71 | index
72 | pd.DataFrame containing the index of files
73 | should contains columns that are given in index_file_sample_cols and index_file_stat_cols
74 | default_selection str
75 | all or '' for empty default
76 | kwargs
77 | checks for keywords suchs as samplegroups, sampleIDs, extra
78 | meant for cli commands
79 |
80 | Returns
81 | -------
82 | index_selection
83 | pd.DataFrame with a selection from the given input parameter index
84 | default returns empty DataFrame
85 |
86 | """
87 | if index is None:
88 | return
89 |
90 | if not kwargs:
91 | return index
92 |
93 | default_selection = kwargs.get("default_selection", "all")
94 | if "normal" not in kwargs.get("run_mode", default_selection):
95 | default_selection = "all"
96 | index_selection = None
97 | logger.info(
98 | f"starting index selection from index({len(index)}) with:\n default selection: {default_selection}\n and {kwargs}"
99 | )
100 |
101 | if not index:
102 | logger.warning("index selection index arg empty")
103 | return
104 |
105 | if default_selection == "all":
106 | index_selection = index.copy()
107 |
108 | if "samplegroups" in kwargs:
109 | index = list(
110 | filter(lambda x: x.sample.group in kwargs.get("samplegroups", []), index)
111 | )
112 | if "sampleIDs" in kwargs:
113 | index = list(
114 | filter(lambda x: x.sample.id in kwargs.get("sampleIDs", []), index)
115 | )
116 |
117 | if "extra" in kwargs:
118 | runq = kwargs.get("run")
119 | if "recent" in runq:
120 | grp = index.sort_values(
121 | "FileCreationDate", ascending=False
122 | ).FileCreationDate.unique()[0]
123 |
124 | index_selection = index.loc[index.FileCreationDate == grp]
125 | index_selection = index_selection.assign(
126 | **{
127 | "DestDir": [
128 | Path(i).joinpath(grp.strftime("%Y-%m-%d"))
129 | for i in index_selection.DestDir.values
130 | ]
131 | }
132 | )
133 |
134 | logger.debug(
135 | f"finished index selection from index({len(index)}) with:\n {default_selection}\n and {kwargs}\n selection len({len(index_selection )})"
136 | )
137 |
138 | if not index_selection:
139 | logger.warning("index selection empty. exiting")
140 | sys.exit()
141 |
142 | return index_selection
143 |
144 |
145 | def test_positions(sample_group_files):
146 | if not sample_group_files:
147 | return
148 |
149 | _files = [i.file for i in sample_group_files]
150 | _positions = [i.sample.position for i in sample_group_files]
151 | if len(set(_files)) != len(set(_positions)):
152 | logger.warning(
153 | f"{sample_group_files[0].sample} Unique files and positions not matching for {sample_group_files}"
154 | )
155 | return sample_group_files
156 |
--------------------------------------------------------------------------------
/src/raman_fitting/imports/files/index_helpers.py:
--------------------------------------------------------------------------------
1 | import hashlib
2 | from pathlib import Path
3 |
4 |
5 | def get_filename_id_from_path(path: Path) -> str:
6 | """
7 | Makes the ID from a filepath
8 |
9 | Parameters
10 | ----------
11 | path : Path
12 | DESCRIPTION.
13 |
14 | Returns
15 | -------
16 | str: which contains hash(parent+suffix)_stem of path
17 |
18 | """
19 |
20 | _parent_suffix_hash = hashlib.sha512(
21 | (str(path.parent) + path.suffix).encode("utf-8")
22 | ).hexdigest()
23 | filename_id = f"{_parent_suffix_hash}_{path.stem}"
24 | return filename_id
25 |
--------------------------------------------------------------------------------
/src/raman_fitting/imports/files/metadata.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | from typing import Dict
3 | from datetime import date
4 | import datetime
5 | from typing import Any
6 |
7 |
8 | from pydantic import (
9 | BaseModel,
10 | FilePath,
11 | PastDatetime,
12 | )
13 |
14 |
15 | class FileMetaData(BaseModel):
16 | file: FilePath
17 | creation_date: date
18 | creation_datetime: PastDatetime
19 | modification_date: date
20 | modification_datetime: PastDatetime
21 | size: int
22 |
23 |
24 | def get_file_metadata(filepath: Path) -> Dict[str, Any]:
25 | """converting creation time and last mod time to datetime object"""
26 | fstat = filepath.stat()
27 | c_t = fstat.st_ctime
28 | m_t = fstat.st_mtime
29 | c_tdate, m_tdate = c_t, m_t
30 |
31 | try:
32 | c_t = datetime.datetime.fromtimestamp(fstat.st_ctime)
33 | m_t = datetime.datetime.fromtimestamp(fstat.st_mtime)
34 | c_tdate = c_t.date()
35 | m_tdate = m_t.date()
36 | except OverflowError:
37 | pass
38 | except OSError:
39 | pass
40 | ret = {
41 | "file": filepath,
42 | "creation_date": c_tdate,
43 | "creation_datetime": c_t,
44 | "modification_date": m_tdate,
45 | "modification_datetime": m_t,
46 | "size": fstat.st_size,
47 | }
48 | return ret
49 |
--------------------------------------------------------------------------------
/src/raman_fitting/imports/files/utils.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | import tablib.exceptions
4 | from tablib import Dataset
5 |
6 | from loguru import logger
7 |
8 |
9 | def write_dataset_to_file(file: Path, dataset: Dataset) -> None:
10 | if file.suffix == ".csv":
11 | with open(file, "w", newline="") as f:
12 | f.write(dataset.export("csv"))
13 | else:
14 | with open(file, "wb", encoding="utf-8") as f:
15 | f.write(dataset.export(file.suffix))
16 | logger.debug(f"Wrote dataset {len(dataset)} to {file}")
17 |
18 |
19 | def load_dataset_from_file(file) -> Dataset:
20 | with open(file, "r", encoding="utf-8") as fh:
21 | try:
22 | imported_data = Dataset().load(fh)
23 | except tablib.exceptions.UnsupportedFormat as e:
24 | logger.warning(f"Read dataset {e} from {file}")
25 | imported_data = Dataset()
26 |
27 | logger.debug(f"Read dataset {len(imported_data)} from {file}")
28 | return imported_data
29 |
--------------------------------------------------------------------------------
/src/raman_fitting/imports/files/validators.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from pathlib import Path
3 |
4 | logger = logging.getLogger(__name__)
5 |
6 |
7 | def validate_filepath(filepath: Path, max_bytesize=10**6) -> Path | None:
8 | if not isinstance(filepath, (Path, str)):
9 | raise TypeError("Argument given is not Path nor str")
10 |
11 | filepath = Path(filepath)
12 |
13 | if not filepath.exists():
14 | logger.warning("File does not exist")
15 | return
16 |
17 | filesize = filepath.stat().st_size
18 | if filesize > max_bytesize:
19 | logger.warning(f"File too large ({filesize})=> skipped")
20 | return
21 | return filepath
22 |
--------------------------------------------------------------------------------
/src/raman_fitting/imports/models.py:
--------------------------------------------------------------------------------
1 | import json
2 | from pydantic import (
3 | BaseModel,
4 | FilePath,
5 | model_validator,
6 | Field,
7 | ConfigDict,
8 | )
9 |
10 | from .samples.sample_id_helpers import extract_sample_metadata_from_filepath
11 |
12 | from .files.metadata import FileMetaData, get_file_metadata
13 | from .files.index_helpers import get_filename_id_from_path
14 | from .samples.models import SampleMetaData
15 |
16 |
17 | class RamanFileInfo(BaseModel):
18 | model_config = ConfigDict(arbitrary_types_allowed=True)
19 |
20 | file: FilePath
21 | filename_id: str = Field(None, init_var=False, validate_default=False)
22 | sample: SampleMetaData | str = Field(None, init_var=False, validate_default=False)
23 | file_metadata: FileMetaData | str = Field(
24 | None, init_var=False, validate_default=False
25 | )
26 |
27 | @model_validator(mode="after")
28 | def set_filename_id(self) -> "RamanFileInfo":
29 | filename_id = get_filename_id_from_path(self.file)
30 | self.filename_id = filename_id
31 | return self
32 |
33 | @model_validator(mode="after")
34 | def parse_and_set_sample_from_file(self) -> "RamanFileInfo":
35 | sample = extract_sample_metadata_from_filepath(self.file)
36 | self.sample = sample
37 | return self
38 |
39 | @model_validator(mode="after")
40 | def parse_and_set_metadata_from_filepath(self) -> "RamanFileInfo":
41 | file_metadata = get_file_metadata(self.file)
42 | self.file_metadata = FileMetaData(**file_metadata)
43 | return self
44 |
45 | @model_validator(mode="after")
46 | def initialize_sample_and_file_from_dict(self) -> "RamanFileInfo":
47 | if isinstance(self.sample, dict):
48 | self.sample = SampleMetaData(**self.sample)
49 | elif isinstance(self.sample, str):
50 | _sample = json.loads(self.sample.replace("'", '"'))
51 | self.sample = SampleMetaData(**_sample)
52 |
53 | if isinstance(self.file_metadata, dict):
54 | self.file_metadata = FileMetaData(**self.file_metadata)
55 | elif isinstance(self.file_metadata, str):
56 | _file_metadata = json.loads(self.file_metadata.replace("'", '"'))
57 | self.file_metadata = SampleMetaData(**_file_metadata)
58 |
59 | return self
60 |
--------------------------------------------------------------------------------
/src/raman_fitting/imports/samples/models.py:
--------------------------------------------------------------------------------
1 | from pydantic import BaseModel
2 |
3 |
4 | class SampleMetaData(BaseModel):
5 | id: str
6 | group: str
7 | position: int = 0
8 |
--------------------------------------------------------------------------------
/src/raman_fitting/imports/samples/sample_id_helpers.py:
--------------------------------------------------------------------------------
1 | from typing import List, Tuple, Optional, Dict
2 | from pathlib import Path
3 |
4 | from .models import SampleMetaData
5 |
6 |
7 | def parse_string_to_sample_id_and_position(
8 | string: str, seps=("_", " ", "-")
9 | ) -> Tuple[str, int]:
10 | """
11 | Parser for the filenames -> finds SampleID and sample position
12 |
13 | Parameters
14 | ----------
15 | # ramanfile_string : str
16 | # The filepath which the is parsed
17 | seps : tuple of str default
18 | ordered collection of seperators tried for split
19 | default : ('_', ' ', '-')
20 |
21 | Returns
22 | -------
23 | tuple of strings
24 | Collection of strings which contains the parsed elements.
25 | """
26 |
27 | split = None
28 | first_sep_match_index = min(
29 | [n for n, i in enumerate(seps) if i in string], default=None
30 | )
31 | first_sep_match = (
32 | seps[first_sep_match_index] if first_sep_match_index is not None else None
33 | )
34 | split = string.split(first_sep_match)
35 | _lensplit = len(split)
36 |
37 | if _lensplit == 0:
38 | sample_id, position = split[0], 0
39 | elif len(split) == 1:
40 | sample_id, position = split[0], 0
41 | elif len(split) == 2:
42 | sample_id = split[0]
43 | _pos_strnum = "".join(i for i in split[1] if i.isnumeric())
44 | if _pos_strnum:
45 | position = int(_pos_strnum)
46 | else:
47 | position = split[1]
48 | elif len(split) >= 3:
49 | sample_id = "_".join(split[0:-1])
50 | position = int("".join(filter(str.isdigit, split[-1])))
51 | position = position or 0
52 | return (sample_id, position)
53 |
54 |
55 | def extract_sample_group_from_sample_id(sample_id: str, max_len=4) -> str:
56 | """adding the extra sample Group key from sample ID"""
57 |
58 | _len = len(sample_id)
59 | _maxalphakey = min(
60 | [n for n, i in enumerate(sample_id) if not str(i).isalpha()], default=_len
61 | )
62 | _maxkey = min((_len, _maxalphakey, max_len))
63 | sample_group_id = "".join([i for i in sample_id[0:_maxkey] if i.isalpha()])
64 | return sample_group_id
65 |
66 |
67 | def overwrite_sample_id_from_mapper(sample_id: str, mapper: dict) -> str:
68 | """Takes an sample_id and potentially overwrites from a mapper dict"""
69 | sample_id_map = mapper.get(sample_id)
70 | if sample_id_map is not None:
71 | return sample_id_map
72 | return sample_id
73 |
74 |
75 | def overwrite_sample_group_id_from_parts(
76 | parts: List[str], sample_group_id: str, mapper: dict
77 | ) -> str:
78 | for k, val in mapper.items():
79 | if k in parts:
80 | sample_group_id = val
81 | return sample_group_id
82 |
83 |
84 | def extract_sample_metadata_from_filepath(
85 | filepath: Path, sample_name_mapper: Optional[Dict[str, Dict[str, str]]] = None
86 | ) -> SampleMetaData:
87 | """parse the sample_id, position and sgrpID from stem"""
88 | stem = filepath.stem
89 | parts = filepath.parts
90 |
91 | sample_id, position = parse_string_to_sample_id_and_position(stem)
92 |
93 | if sample_name_mapper is not None:
94 | sample_id_mapper = sample_name_mapper.get("sample_id", {})
95 | sample_id = overwrite_sample_id_from_mapper(sample_id, sample_id_mapper)
96 | sample_group_id = extract_sample_group_from_sample_id(sample_id)
97 |
98 | if sample_name_mapper is not None:
99 | sample_grp_mapper = sample_name_mapper.get("sample_group_id", {})
100 | sample_group_id = overwrite_sample_group_id_from_parts(
101 | parts, sample_group_id, sample_grp_mapper
102 | )
103 |
104 | sample = SampleMetaData(
105 | **{"id": sample_id, "group": sample_group_id, "position": position}
106 | )
107 | return sample
108 |
--------------------------------------------------------------------------------
/src/raman_fitting/imports/spectrum/__init__.py:
--------------------------------------------------------------------------------
1 | from .datafile_parsers import read_file_with_tablib
2 |
3 | SPECTRUM_FILETYPE_PARSERS = {
4 | ".txt": {
5 | "method": read_file_with_tablib, # load_spectrum_from_txt,
6 | },
7 | ".xlsx": {
8 | "method": read_file_with_tablib, # pd.read_excel,
9 | },
10 | ".csv": {
11 | "method": read_file_with_tablib, # pd.read_csv,
12 | "kwargs": {},
13 | },
14 | ".json": {
15 | "method": read_file_with_tablib,
16 | },
17 | }
18 |
--------------------------------------------------------------------------------
/src/raman_fitting/imports/spectrum/datafile_parsers.py:
--------------------------------------------------------------------------------
1 | from typing import Sequence
2 | from pathlib import Path
3 |
4 | import numpy as np
5 | from tablib import Dataset
6 |
7 | from loguru import logger
8 |
9 |
10 | def filter_data_for_numeric(data: Dataset):
11 | filtered_data = Dataset()
12 | filtered_data.headers = data.headers
13 |
14 | for row in data:
15 | try:
16 | digits_row = tuple(map(float, row))
17 | except ValueError:
18 | continue
19 | except TypeError:
20 | continue
21 |
22 | if not any(i is None for i in digits_row):
23 | filtered_data.append(digits_row)
24 | return filtered_data
25 |
26 |
27 | def load_dataset_from_file(filepath, **kwargs) -> Dataset:
28 | with open(filepath, "r") as fh:
29 | imported_data = Dataset(**kwargs).load(fh)
30 | return imported_data
31 |
32 |
33 | def check_header_keys(dataset: Dataset, header_keys: Sequence[str]):
34 | if set(header_keys) not in set(dataset.headers):
35 | first_row = list(dataset.headers)
36 | dataset.insert(0, first_row)
37 | dataset.headers = header_keys
38 | return dataset
39 |
40 |
41 | def read_file_with_tablib(
42 | filepath: Path, header_keys: Sequence[str], sort_by=None
43 | ) -> Dataset:
44 | data = load_dataset_from_file(filepath)
45 | data = check_header_keys(data, header_keys)
46 | numeric_data = filter_data_for_numeric(data)
47 | sort_by = header_keys[0] if sort_by is None else sort_by
48 | sorted_data = numeric_data.sort(sort_by)
49 | return sorted_data
50 |
51 |
52 | def read_text(filepath, max_bytes=10**6, encoding="utf-8", errors=None):
53 | """additional read text method for raw text data inspection"""
54 | _text = "read_text_method"
55 | filesize = filepath.stat().st_size
56 | if filesize < max_bytes:
57 | try:
58 | _text = filepath.read_text(encoding=encoding, errors=errors)
59 | # _text.splitlines()
60 | except Exception as exc:
61 | # IDEA specify which Exceptions are expected
62 | _text += "\nread_error"
63 | logger.warning(f"file read text error => skipped.\n{exc}")
64 | else:
65 | _text += "\nfile_too_large"
66 | logger.warning(f" file too large ({filesize})=> skipped")
67 |
68 | return _text
69 |
70 |
71 | def use_np_loadtxt(filepath, usecols=(0, 1), **kwargs) -> np.array:
72 | array = np.array([])
73 | try:
74 | array = np.loadtxt(filepath, usecols=usecols, **kwargs)
75 | except IndexError:
76 | logger.debug(f"IndexError called np genfromtxt for {filepath}")
77 | array = np.genfromtxt(filepath, invalid_raise=False)
78 | except ValueError:
79 | logger.debug(f"ValueError called np genfromtxt for {filepath}")
80 | array = np.genfromtxt(filepath, invalid_raise=False)
81 | except Exception as exc:
82 | _msg = f"Can not load data from txt file: {filepath}\n{exc}"
83 | logger.error(_msg)
84 | raise ValueError(_msg) from exc
85 | return array
86 |
--------------------------------------------------------------------------------
/src/raman_fitting/imports/spectrum/spectra_collection.py:
--------------------------------------------------------------------------------
1 | from typing import List
2 |
3 | import numpy as np
4 |
5 | from pydantic import BaseModel, ValidationError, model_validator
6 |
7 | from raman_fitting.models.deconvolution.spectrum_regions import RegionNames
8 | from raman_fitting.models.spectrum import SpectrumData
9 |
10 |
11 | class SpectraDataCollection(BaseModel):
12 | spectra: List[SpectrumData]
13 | region_name: RegionNames
14 | mean_spectrum: SpectrumData | None = None
15 |
16 | @model_validator(mode="after")
17 | def check_spectra_have_same_label(self) -> "SpectraDataCollection":
18 | """checks member of lists"""
19 | labels = set(i.label for i in self.spectra)
20 | if len(labels) > 1:
21 | raise ValidationError(f"Spectra have different labels {labels}")
22 | return self
23 |
24 | @model_validator(mode="after")
25 | def check_spectra_have_same_region(self) -> "SpectraDataCollection":
26 | """checks member of lists"""
27 | region_names = set(i.region_name for i in self.spectra)
28 | if len(region_names) > 1:
29 | raise ValidationError(f"Spectra have different region_names {region_names}")
30 | return self
31 |
32 | @model_validator(mode="after")
33 | def check_spectra_lengths(self) -> "SpectraDataCollection":
34 | unique_lengths_rs = set(len(i.ramanshift) for i in self.spectra)
35 | unique_lengths_int = set(len(i.intensity) for i in self.spectra)
36 | if len(unique_lengths_rs) > 1:
37 | raise ValidationError(
38 | f"The spectra have different ramanshift lengths where they should be the same.\n\t{unique_lengths_rs}"
39 | )
40 | if len(unique_lengths_int) > 1:
41 | raise ValidationError(
42 | f"The spectra have different intensity lengths where they should be the same.\n\t{unique_lengths_int}"
43 | )
44 |
45 | return self
46 |
47 | @model_validator(mode="after")
48 | def set_mean_spectrum(self) -> "SpectraDataCollection":
49 | # wrap this in a ProcessedSpectraCollection model
50 | mean_int = np.mean(np.vstack([i.intensity for i in self.spectra]), axis=0)
51 | mean_ramanshift = np.mean(
52 | np.vstack([i.ramanshift for i in self.spectra]), axis=0
53 | )
54 | source_files = list(set(i.source for i in self.spectra))
55 | _label = "".join(map(str, set(i.label for i in self.spectra)))
56 | mean_spec = SpectrumData(
57 | ramanshift=mean_ramanshift,
58 | intensity=mean_int,
59 | label=f"clean_{self.region_name}_mean",
60 | region_name=self.region_name,
61 | source=source_files,
62 | )
63 | self.mean_spectrum = mean_spec
64 |
--------------------------------------------------------------------------------
/src/raman_fitting/imports/spectrum/validators.py:
--------------------------------------------------------------------------------
1 | from dataclasses import dataclass
2 | import logging
3 |
4 | import pandas as pd
5 | import numpy as np
6 | from tablib import Dataset
7 |
8 | logger = logging.getLogger(__name__)
9 |
10 |
11 | @dataclass
12 | class ValidateSpectrumValues:
13 | spectrum_key: str
14 | min: float
15 | max: float
16 | len: int
17 |
18 | def validate_min(self, spectrum_data: pd.DataFrame):
19 | data_min = min(spectrum_data[self.spectrum_key])
20 | return np.isclose(data_min, self.min, rtol=0.2)
21 |
22 | def validate_max(self, spectrum_data: pd.DataFrame):
23 | data_max = max(spectrum_data[self.spectrum_key])
24 | return data_max <= self.max
25 |
26 | def validate_len(self, spectrum_data: pd.DataFrame):
27 | data_len = len(spectrum_data)
28 | return np.isclose(data_len, self.len, rtol=0.1)
29 |
30 | def validate(self, spectrum_data: pd.DataFrame):
31 | ret = []
32 | for _func in [self.validate_min, self.validate_max, self.validate_len]:
33 | ret.append(_func(spectrum_data))
34 | return all(ret)
35 |
36 |
37 | def validate_spectrum_keys_expected_values(
38 | spectrum_data: Dataset, expected_values: ValidateSpectrumValues
39 | ):
40 | if expected_values.spectrum_key not in spectrum_data.columns:
41 | logger.error(
42 | f"The expected value type {expected_values.spectrum_key} is not in the columns {spectrum_data.columns}"
43 | )
44 | if spectrum_data.empty:
45 | logger.error("Spectrum data is empty")
46 | return
47 |
48 | validation = expected_values.validate(spectrum_data)
49 |
50 | if not validation:
51 | logger.warning(
52 | f"The {expected_values.spectrum_key} of this spectrum does not match the expected values {expected_values}"
53 | )
54 |
--------------------------------------------------------------------------------
/src/raman_fitting/imports/spectrumdata_parser.py:
--------------------------------------------------------------------------------
1 | """
2 | Created on Mon Jul 5 21:09:06 2021
3 |
4 | @author: DW
5 | """
6 |
7 | from dataclasses import dataclass, field
8 | import hashlib
9 |
10 | from pathlib import Path
11 | from functools import partial
12 |
13 | from typing import Callable
14 |
15 | from tablib import Dataset
16 |
17 | from .spectrum.validators import ValidateSpectrumValues
18 | from .files.validators import validate_filepath
19 | from .spectrum import SPECTRUM_FILETYPE_PARSERS
20 |
21 | from raman_fitting.models.spectrum import SpectrumData
22 |
23 | from loguru import logger
24 |
25 |
26 | spectrum_data_keys = ("ramanshift", "intensity")
27 |
28 | ramanshift_expected_values = ValidateSpectrumValues(
29 | spectrum_key="ramanshift", min=-95, max=3650, len=1600
30 | )
31 | intensity_expected_values = ValidateSpectrumValues(
32 | spectrum_key="intensity", min=0, max=1e4, len=1600
33 | )
34 |
35 | spectrum_keys_expected_values = {
36 | "ramanshift": ramanshift_expected_values,
37 | "intensity": intensity_expected_values,
38 | }
39 |
40 |
41 | def get_file_parser(filepath: Path) -> Callable[[Path], Dataset]:
42 | "Get callable file parser function."
43 | suffix = filepath.suffix
44 | parser = SPECTRUM_FILETYPE_PARSERS[suffix]["method"]
45 | kwargs = SPECTRUM_FILETYPE_PARSERS[suffix].get("kwargs", {})
46 | return partial(parser, **kwargs)
47 |
48 |
49 | @dataclass
50 | class SpectrumReader:
51 | """
52 | Reads a spectrum from a 'raw' data file Path or str
53 |
54 | with spectrum_data_keys "ramanshift" and "intensity".
55 | Double checks the values
56 | Sets a hash attribute afterwards
57 | """
58 |
59 | filepath: Path | str
60 | spectrum_data_keys: tuple = field(default=spectrum_data_keys, repr=False)
61 |
62 | spectrum: SpectrumData = field(default=None)
63 | label: str = "raw"
64 | region_name: str = "full"
65 | spectrum_hash: str = field(default=None, repr=False)
66 | spectrum_length: int = field(default=0, init=False)
67 |
68 | def __post_init__(self):
69 | super().__init__()
70 |
71 | self.filepath = validate_filepath(self.filepath)
72 | self.spectrum_length = 0
73 |
74 | if self.filepath is None:
75 | raise ValueError(f"File is not valid. {self.filepath}")
76 | parser = get_file_parser(self.filepath)
77 | parsed_spectrum = parser(self.filepath, self.spectrum_data_keys)
78 | if parsed_spectrum is None:
79 | return
80 | for spectrum_key in parsed_spectrum.headers:
81 | if spectrum_key not in spectrum_keys_expected_values:
82 | continue
83 | validator = spectrum_keys_expected_values[spectrum_key]
84 | valid = validator.validate(parsed_spectrum)
85 | if not valid:
86 | logger.warning(
87 | f"The values of {spectrum_key} of this spectrum are invalid. {validator}"
88 | )
89 | spec_init = {
90 | "label": self.label,
91 | "region_name": self.region_name,
92 | "source": self.filepath,
93 | }
94 | _parsed_spec_dict = {
95 | k: parsed_spectrum[k] for k in spectrum_keys_expected_values.keys()
96 | }
97 | spec_init.update(_parsed_spec_dict)
98 | self.spectrum = SpectrumData(**spec_init)
99 |
100 | self.spectrum_hash = self.get_hash_text(self.spectrum)
101 | self.spectrum_length = len(self.spectrum)
102 |
103 | @staticmethod
104 | def get_hash_text(data, hash_text_encoding="utf-8"):
105 | text = str(data)
106 | text_hash = hashlib.sha256(text.encode(hash_text_encoding)).hexdigest()
107 | return text_hash
108 |
109 | def __repr__(self):
110 | _txt = f"Spectrum({self.filepath.name}, len={self.spectrum_length})"
111 | return _txt
112 |
113 | def quickplot(self):
114 | """Plot for quickly checking the spectrum"""
115 | try:
116 | self.spectrum.plot(x="ramanshift", y="intensity")
117 | except TypeError:
118 | logger.warning("No numeric data to plot")
119 |
--------------------------------------------------------------------------------
/src/raman_fitting/interfaces/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyPyDavid/raman-fitting/554760daa97161f7de38b69a2afd4897d2020976/src/raman_fitting/interfaces/__init__.py
--------------------------------------------------------------------------------
/src/raman_fitting/interfaces/argparse_cli.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | import argparse
5 |
6 | from raman_fitting.config.path_settings import RunModes
7 | from loguru import logger
8 | from .utils import get_package_version
9 |
10 |
11 | def main():
12 | """
13 | The command line interface for raman_fitting
14 | """
15 |
16 | parser = argparse.ArgumentParser(
17 | description="Command-line interface for raman_fitting package main."
18 | )
19 |
20 | parser.add_argument(
21 | "-M",
22 | "-m",
23 | "--run-mode",
24 | type=RunModes,
25 | # choices=,
26 | help="running mode of package, for testing",
27 | default="normal",
28 | )
29 |
30 | parser.add_argument(
31 | "-sIDs",
32 | "--sample_ids",
33 | nargs="+",
34 | default=[],
35 | help="Selection of names of SampleIDs from index to run over.",
36 | )
37 |
38 | parser.add_argument(
39 | "-sGrps",
40 | "--sample_groups",
41 | nargs="+",
42 | default=[],
43 | help="Selection of names of sample groups from index to run over.",
44 | )
45 |
46 | parser.add_argument(
47 | "--fit_model_specific_names",
48 | nargs="+",
49 | default=[],
50 | help="Selection of names of the composite LMfit models to use for fitting.",
51 | )
52 |
53 | parser.add_argument(
54 | "--version",
55 | action="version",
56 | version="%(prog)s {}".format(get_package_version()),
57 | help="Prints out the current version of the raman_fitting distribution, via importlib.metadata.version",
58 | )
59 |
60 | # Execute the parse_args() method
61 | args = parser.parse_args()
62 |
63 | # import the raman_fitting package
64 | import raman_fitting as rf
65 |
66 | extra_kwargs = {}
67 | if args.run_mode == RunModes.EXAMPLES:
68 | extra_kwargs.update(
69 | {"fit_model_specific_names": ["2peaks", "3peaks", "4peaks"]}
70 | )
71 | logger.info(f"Starting raman_fitting with CLI args:\n{args}")
72 | kwargs = {**vars(args), **extra_kwargs}
73 | _main_run = rf.MainDelegator(**kwargs)
74 |
--------------------------------------------------------------------------------
/src/raman_fitting/interfaces/typer_cli.py:
--------------------------------------------------------------------------------
1 | from typing import List, Optional
2 | from typing_extensions import Annotated
3 |
4 | from pathlib import Path
5 | from enum import StrEnum, auto
6 | from loguru import logger
7 | from raman_fitting.config.path_settings import RunModes
8 | from raman_fitting.delegating.main_delegator import MainDelegator
9 | from raman_fitting.imports.files.file_indexer import initialize_index_from_source_files
10 | from .utils import get_package_version
11 |
12 | import typer
13 |
14 |
15 | class MakeTypes(StrEnum):
16 | INDEX = auto()
17 | CONFIG = auto()
18 | EXAMPLE = auto()
19 |
20 |
21 | __version__ = "0.1.0"
22 |
23 |
24 | def version_callback(value: bool):
25 | if value:
26 | package_version = get_package_version()
27 | typer_cli_version = f"Awesome Typer CLI Version: {__version__}"
28 | print(f"{package_version}\n{typer_cli_version}")
29 | raise typer.Exit()
30 |
31 |
32 | app = typer.Typer()
33 | state = {"verbose": False}
34 |
35 |
36 | @app.command()
37 | def run(
38 | models: Annotated[
39 | List[str],
40 | typer.Option(
41 | default_factory=list, help="Selection of models to use for deconvolution."
42 | ),
43 | ],
44 | sample_ids: Annotated[
45 | List[str],
46 | typer.Option(
47 | default_factory=list,
48 | help="Selection of names of SampleIDs from index to run over.",
49 | ),
50 | ],
51 | group_ids: Annotated[
52 | List[str],
53 | typer.Option(
54 | default_factory=list,
55 | help="Selection of names of sample groups from index to run over.",
56 | ),
57 | ],
58 | fit_models: Annotated[
59 | List[str],
60 | typer.Option(
61 | default_factory=list,
62 | help="Selection of names of the composite LMfit models to use for fitting.",
63 | ),
64 | ],
65 | run_mode: Annotated[RunModes, typer.Argument()] = RunModes.NORMAL,
66 | multiprocessing: Annotated[bool, typer.Option("--multiprocessing")] = False,
67 | ):
68 | if run_mode is None:
69 | print("No make run mode passed")
70 | raise typer.Exit()
71 | kwargs = {"run_mode": run_mode, "use_multiprocessing": multiprocessing}
72 | if run_mode == RunModes.EXAMPLES:
73 | kwargs.update(
74 | {
75 | "fit_model_specific_names": [
76 | "2peaks",
77 | "3peaks",
78 | "4peaks",
79 | "2nd_4peaks",
80 | ],
81 | "sample_groups": ["test"],
82 | }
83 | )
84 | logger.info(f"Starting raman_fitting with CLI args:\n{run_mode}")
85 | _main_run = MainDelegator(**kwargs)
86 |
87 |
88 | @app.command()
89 | def make(
90 | make_type: Annotated[MakeTypes, typer.Argument()],
91 | source_files: Annotated[List[Path], typer.Option()],
92 | index_file: Annotated[Path, typer.Option()] = None,
93 | force_reindex: Annotated[bool, typer.Option("--force-reindex")] = False,
94 | ):
95 | if make_type is None:
96 | print("No make type args passed")
97 | raise typer.Exit()
98 | if index_file:
99 | index_file = index_file.resolve()
100 | if make_type == MakeTypes.INDEX:
101 | initialize_index_from_source_files(
102 | files=source_files, index_file=index_file, force_reindex=force_reindex
103 | )
104 |
105 | elif make_type == MakeTypes.CONFIG:
106 | pass # make config
107 |
108 |
109 | @app.callback()
110 | def main(
111 | verbose: bool = False,
112 | version: Annotated[
113 | Optional[bool], typer.Option("--version", callback=version_callback)
114 | ] = None,
115 | ):
116 | """
117 | Manage raman_fitting in the awesome CLI app.
118 | """
119 | if verbose:
120 | print("Will write verbose output")
121 | state["verbose"] = True
122 |
123 |
124 | if __name__ == "__main__":
125 | app()
126 |
--------------------------------------------------------------------------------
/src/raman_fitting/interfaces/utils.py:
--------------------------------------------------------------------------------
1 | def get_package_version() -> str:
2 | try:
3 | import importlib.metadata
4 |
5 | _version = importlib.metadata.version("raman_fitting")
6 | except ImportError:
7 | _version = "version.not.found"
8 |
9 | _version_text = f"raman_fitting version: {_version}"
10 | return _version_text
11 |
--------------------------------------------------------------------------------
/src/raman_fitting/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyPyDavid/raman-fitting/554760daa97161f7de38b69a2afd4897d2020976/src/raman_fitting/models/__init__.py
--------------------------------------------------------------------------------
/src/raman_fitting/models/deconvolution/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyPyDavid/raman-fitting/554760daa97161f7de38b69a2afd4897d2020976/src/raman_fitting/models/deconvolution/__init__.py
--------------------------------------------------------------------------------
/src/raman_fitting/models/deconvolution/base_model.py:
--------------------------------------------------------------------------------
1 | """The members of the validated collection of BasePeaks are assembled here into fitting Models"""
2 |
3 | import logging
4 | from typing import Optional, Dict
5 | from warnings import warn
6 |
7 | from lmfit.models import Model as LMFitModel
8 | from pydantic import (
9 | BaseModel,
10 | Field,
11 | ConfigDict,
12 | model_validator,
13 | )
14 |
15 |
16 | from raman_fitting.models.deconvolution.base_peak import (
17 | BasePeak,
18 | get_peaks_from_peak_definitions,
19 | )
20 | from raman_fitting.models.deconvolution.lmfit_parameter import (
21 | construct_lmfit_model_from_components,
22 | )
23 | from raman_fitting.models.splitter import RegionNames
24 |
25 | logger = logging.getLogger(__name__)
26 |
27 | SUBSTRATE_PEAK = "Si1_peak"
28 | SEP = "+"
29 | SUFFIX = "_"
30 |
31 |
32 | class BaseLMFitModelWarning(UserWarning):
33 | pass
34 |
35 |
36 | class BaseLMFitModel(BaseModel):
37 | """
38 | This Model class combines the collection of valid peaks from BasePeak into a regression model
39 | of type lmfit.model.CompositeModel
40 | that is compatible with the lmfit Model and fit functions.
41 | The model_name, include_substrate and lmfit_model attributes are kept
42 | consistent w.r.t. their meaning when they are set.
43 |
44 | Parameters
45 | --------
46 | verbose_name: string ==> is converted to lmfit Model object
47 | include_substrate: bool ==> toggle between True and False to include a substrate peak
48 |
49 | """
50 |
51 | model_config = ConfigDict(arbitrary_types_allowed=True)
52 |
53 | name: str
54 | peaks: str
55 | peak_collection: Dict[str, BasePeak] = Field(
56 | default_factory=get_peaks_from_peak_definitions,
57 | validate_default=True,
58 | repr=False,
59 | )
60 | lmfit_model: LMFitModel = Field(None, init_var=False, repr=False)
61 | region_name: RegionNames
62 |
63 | @property
64 | def has_substrate(self):
65 | if not self.lmfit_model.components:
66 | return False
67 | comps = set(map(lambda x: x.prefix, self.lmfit_model.components))
68 | substrate_comps = set(
69 | [i.lmfit_model.prefix for i in self.substrate_peaks.values()]
70 | )
71 | return substrate_comps.issubset(comps)
72 |
73 | def add_substrate(self):
74 | if self.has_substrate:
75 | warn(
76 | f"{self.__class__.__name__} already has substrate.",
77 | BaseLMFitModelWarning,
78 | )
79 | return
80 |
81 | for name in self.substrate_peaks.keys():
82 | self.peaks += SEP + name
83 | self.check_lmfit_model()
84 |
85 | def remove_substrate(self):
86 | if not self.has_substrate:
87 | warn(
88 | f"{self.__class__.__name__} has no substrate to remove.",
89 | BaseLMFitModelWarning,
90 | )
91 | return
92 | _peaks = self.peaks.split(SEP)
93 | for name in self.substrate_peaks.keys():
94 | _peaks.remove(name)
95 | self.peaks = SEP.join(_peaks)
96 | self.check_lmfit_model()
97 |
98 | @property
99 | def substrate_peaks(self):
100 | return {k: val for k, val in self.peak_collection.items() if val.is_substrate}
101 |
102 | @model_validator(mode="after")
103 | def check_peaks_in_peak_collection(self) -> "BaseLMFitModel":
104 | peak_names_split = self.peaks.split(SEP)
105 | default_peak_names = self.peak_collection.keys()
106 | valid_peaks = set(peak_names_split).union(set(default_peak_names))
107 | assert valid_peaks
108 | new_peak_names = SEP.join([i for i in peak_names_split if i in valid_peaks])
109 | self.peaks = new_peak_names
110 | return self
111 |
112 | @model_validator(mode="after")
113 | def check_lmfit_model(self) -> "BaseLMFitModel":
114 | lmfit_model = construct_lmfit_model(self.peaks, self.peak_collection)
115 | self.lmfit_model = lmfit_model
116 | return self
117 |
118 |
119 | def construct_lmfit_model(
120 | peaks: str, peak_collection: Dict[str, BasePeak]
121 | ) -> LMFitModel:
122 | peak_names = peaks.split(SEP)
123 | base_peaks = [peak_collection[i] for i in peak_names if i in peak_collection]
124 | if not base_peaks:
125 | raise ValueError(f"Could not find matching peaks for {peaks}")
126 | base_peaks_lmfit = [i.lmfit_model for i in base_peaks]
127 | lmfit_model = construct_lmfit_model_from_components(base_peaks_lmfit)
128 | return lmfit_model
129 |
130 |
131 | def get_models_and_peaks_from_definitions(
132 | models_and_peaks_definitions: Optional[Dict] = None,
133 | ) -> Dict[str, Dict[str, BaseLMFitModel]]:
134 | peak_collection = get_peaks_from_peak_definitions(
135 | peak_definitions=models_and_peaks_definitions
136 | )
137 | models_settings = {
138 | k: val.get("models")
139 | for k, val in models_and_peaks_definitions.items()
140 | if "models" in val
141 | }
142 | all_models = {}
143 | for region_name, region_model_settings in models_settings.items():
144 | if region_model_settings is None:
145 | continue
146 | all_models[region_name] = {}
147 | for model_name, model_peaks in region_model_settings.items():
148 | all_models[region_name][model_name] = BaseLMFitModel(
149 | name=model_name,
150 | peaks=model_peaks,
151 | peak_collection=peak_collection,
152 | region_name=region_name,
153 | )
154 | return all_models
155 |
156 |
157 | def main():
158 | models = get_models_and_peaks_from_definitions()
159 | print("Models: ", len(models))
160 |
161 |
162 | if __name__ == "__main__":
163 | main()
164 |
--------------------------------------------------------------------------------
/src/raman_fitting/models/deconvolution/base_peak.py:
--------------------------------------------------------------------------------
1 | from enum import StrEnum
2 | from typing import List, Optional, Dict
3 |
4 | from pydantic import (
5 | BaseModel,
6 | ConfigDict,
7 | InstanceOf,
8 | Field,
9 | field_validator,
10 | model_validator,
11 | )
12 | from lmfit import Parameters
13 | from lmfit.models import Model
14 |
15 | from raman_fitting.models.deconvolution.lmfit_parameter import (
16 | LMFIT_MODEL_MAPPER,
17 | LMFitParameterHints,
18 | parmeter_to_dict,
19 | )
20 | from raman_fitting.config.default_models import load_config_from_toml_files
21 | from raman_fitting.utils.string_operations import prepare_text_from_param
22 |
23 | ParamHintDict = Dict[str, Dict[str, Optional[float | bool | str]]]
24 |
25 |
26 | class BasePeakWarning(UserWarning): # pragma: no cover
27 | pass
28 |
29 |
30 | PEAK_TYPE_OPTIONS = StrEnum("PEAK_TYPE_OPTIONS", ["Lorentzian", "Gaussian", "Voigt"])
31 |
32 |
33 | def get_lmfit_model_from_peak_type(peak_type: str, prefix: str = "") -> Optional[Model]:
34 | """returns the lmfit model instance according to the chosen peak type and sets the prefix from peak_name"""
35 | model = None
36 |
37 | capitalized = peak_type.capitalize()
38 | try:
39 | lmfit_model_class = LMFIT_MODEL_MAPPER[capitalized]
40 | model = lmfit_model_class(prefix=prefix)
41 | except IndexError:
42 | raise NotImplementedError(
43 | f'This peak type or model "{peak_type}" has not been implemented.'
44 | )
45 | return model
46 |
47 |
48 | class BasePeak(BaseModel):
49 | """
50 | --------
51 | Example usage
52 | --------
53 | Base class for easier definition of typical intensity peaks found in the
54 | raman spectra.
55 |
56 | The go al of is this metaclass is to be able to more easily write
57 | peak class definitions (for possible user input). It tries to find three
58 | fields in the definition, which are requiredfor a LMfit model creation,
59 | namely: peak_name, peak_type and the param hints.
60 |
61 | peak_name:
62 | arbitrary name as prefix for the peak
63 | peak_type:
64 | defines the lineshape of the peak, the following options are implemented:
65 | "Lorentzian", "Gaussian", "Voigt"
66 | params_hints:
67 | initial values for the parameters of the peak, at least
68 | a value for the center position of the peak should be given.
69 |
70 | It tries to find these fields in different sources such as: the class definition
71 | with only class attributes, init attributes or even in the keywords arguments.
72 | The FieldsTracker class instance (fco) keeps track of the definition in different
73 | sources and can check when all are ready. If there are multiple sources with definitions
74 | for the same field than the source with highest priority will be chosen (based on tuple order).
75 | Each field is a propery which validates the assigments.
76 |
77 | Sort of wrapper for lmfit.model definition.
78 | Several of these peaks combined are used to make the lmfit CompositeModel
79 | (composed in the fit_models module), which will be used for the fit.
80 |
81 | --------
82 | Example usage
83 | --------
84 |
85 | "Example class definition with attribute definitions"
86 | class New_peak(metaclass=BasePeak):
87 | "New peak child class for easier definition"
88 |
89 | param_hints = { 'center': {'value': 2435,'min': 2400, 'max': 2550}}
90 | peak_type = 'Voigt' #'Voigt'
91 | peak_name ='R2D2'
92 |
93 | New_peak().lmfit_model ==
94 |
95 | "Example class definition with keyword arguments"
96 |
97 | New_peak = BasePeak('new',
98 | peak_name='D1',
99 | peak_type= 'Lorentzian',
100 | param_hints = { 'center': {'value': 1500}}
101 | )
102 | New_peak()
103 | """
104 |
105 | model_config = ConfigDict(arbitrary_types_allowed=True, from_attributes=True)
106 |
107 | peak_name: str
108 | param_hints: Optional[Parameters | List[LMFitParameterHints] | ParamHintDict] = None
109 | peak_type: Optional[str] = None
110 | is_substrate: Optional[bool] = False
111 | is_for_normalization: Optional[bool] = False
112 | docstring: Optional[str] = Field(None, repr=False)
113 | lmfit_model: Optional[InstanceOf[Model]] = None
114 |
115 | @field_validator("peak_type")
116 | @classmethod
117 | def check_peak_type(cls, v: Optional[str]) -> Optional[str]:
118 | if v is None:
119 | return v
120 | if isinstance(v, str):
121 | try:
122 | v = PEAK_TYPE_OPTIONS[v].name
123 | return v
124 | except KeyError:
125 | raise KeyError(
126 | f"peak_type is not in {map(lambda x: x.name, PEAK_TYPE_OPTIONS)}, but {v}"
127 | )
128 | elif isinstance(v, PEAK_TYPE_OPTIONS):
129 | v = v.name
130 | return v
131 | else:
132 | raise TypeError(f"peak_type is not a string or enum, but {type(v)}")
133 |
134 | @field_validator("param_hints")
135 | @classmethod
136 | def check_param_hints(
137 | cls, v: Optional[Parameters | List[LMFitParameterHints] | ParamHintDict]
138 | ) -> Optional[Parameters]:
139 | if v is None:
140 | return v
141 | if isinstance(v, Parameters):
142 | return v
143 |
144 | if isinstance(v, dict):
145 | valid_p_hints = [LMFitParameterHints(name=k, **val) for k, val in v.items()]
146 |
147 | if isinstance(v, list):
148 | assert all(isinstance(i, LMFitParameterHints) for i in v)
149 |
150 | pars_hints = [i.parameter for i in valid_p_hints]
151 | params = Parameters()
152 | params.add_many(*pars_hints)
153 | return params
154 |
155 | @model_validator(mode="after")
156 | def check_lmfit_model(self) -> "BasePeak":
157 | if self.lmfit_model is not None:
158 | if isinstance(self.lmfit_model, Model):
159 | return self
160 | else:
161 | raise ValueError(
162 | f"lmfit_model is not a Model instance, but {type(self.lmfit_model)}"
163 | )
164 | peak_type = self.peak_type
165 | if peak_type is None:
166 | raise ValueError("peak_type is None")
167 |
168 | lmfit_model = get_lmfit_model_from_peak_type(
169 | peak_type, prefix=self.peak_name_prefix
170 | )
171 | if lmfit_model is None:
172 | raise ValueError("lmfit_model is None")
173 |
174 | if self.param_hints is not None:
175 | for k, v in self.param_hints.items():
176 | par_dict = parmeter_to_dict(v)
177 | lmfit_model.set_param_hint(k, **par_dict)
178 | self.lmfit_model = lmfit_model
179 | return self
180 |
181 | @property
182 | def peak_name_prefix(self):
183 | if not self.peak_name:
184 | return ""
185 | if self.peak_name.endswith("_"):
186 | return self.peak_name
187 | return self.peak_name + "_"
188 |
189 | def __str__(self):
190 | _repr = f"{self.__class__.__name__}('{self.peak_name}'"
191 | if self.lmfit_model is None:
192 | _repr += ": no Model set"
193 | _repr += f", {self.lmfit_model}"
194 | param_text = make_string_from_param_hints(self.param_hints)
195 | _repr += f"{param_text})"
196 | return _repr
197 |
198 |
199 | def make_string_from_param_hints(param_hints: Parameters) -> str:
200 | param_center = param_hints.get("center", {})
201 | text = prepare_text_from_param(param_center)
202 | return text
203 |
204 |
205 | def get_peaks_from_peak_definitions(
206 | peak_definitions: Optional[Dict] = None,
207 | ) -> Dict[str, BasePeak]:
208 | if peak_definitions is None:
209 | peak_definitions = load_config_from_toml_files()
210 | peak_settings = {
211 | k: val.get("peaks") for k, val in peak_definitions.items() if "peaks" in val
212 | }
213 | peak_models = {}
214 | for peak_type, peak_type_defs in peak_settings.items():
215 | if peak_type_defs is None:
216 | continue
217 | for peak_name, peak_def in peak_type_defs.items():
218 | peak_models[peak_name] = BasePeak(**peak_def)
219 | return peak_models
220 |
--------------------------------------------------------------------------------
/src/raman_fitting/models/deconvolution/init_models.py:
--------------------------------------------------------------------------------
1 | from dataclasses import dataclass, field
2 | import logging
3 | from typing import Dict
4 |
5 | from raman_fitting.config.default_models import load_config_from_toml_files
6 | from raman_fitting.models.deconvolution.base_model import (
7 | get_models_and_peaks_from_definitions,
8 | )
9 | from .base_model import BaseLMFitModel
10 |
11 | logger = logging.getLogger(__name__)
12 |
13 |
14 | @dataclass
15 | class InitializeModels:
16 | """
17 | This class will initialize and validate the different fitting models.
18 | The models are of type lmfit.model.CompositeModel and stored in a dict with names
19 | for the models as keys.
20 | """
21 |
22 | model_definitions: dict = field(default_factory=dict)
23 | peaks: dict = field(default_factory=dict)
24 | lmfit_models: Dict[str, Dict[str, BaseLMFitModel]] | None = None
25 |
26 | def __post_init__(self):
27 | self.model_definitions = self.model_definitions or {}
28 | self.peaks = self.peaks or {}
29 | self.lmfit_models = self.lmfit_models or {}
30 | if not self.model_definitions:
31 | self.model_definitions = load_config_from_toml_files()
32 | if not self.lmfit_models and self.model_definitions:
33 | self.lmfit_models = get_models_and_peaks_from_definitions(
34 | self.model_definitions
35 | )
36 |
37 | def __repr__(self):
38 | _t = ", ".join(map(str, self.lmfit_models.keys()))
39 | _t += "\n"
40 | _t += "\n".join(map(str, self.lmfit_models.values()))
41 | return _t
42 |
43 |
44 | def main():
45 | from raman_fitting.config.default_models import (
46 | load_config_from_toml_files,
47 | )
48 |
49 | model_definitions = load_config_from_toml_files()
50 | print("model_definitions: ", model_definitions)
51 | models = InitializeModels()
52 | print(models)
53 | # breakpoint()
54 |
55 |
56 | if __name__ == "__main__":
57 | main()
58 |
--------------------------------------------------------------------------------
/src/raman_fitting/models/deconvolution/lmfit_parameter.py:
--------------------------------------------------------------------------------
1 | import math
2 | from enum import StrEnum
3 | from typing import List, Optional, Dict
4 | from warnings import warn
5 |
6 | from lmfit import Parameter
7 | from lmfit.models import GaussianModel, LorentzianModel, Model, VoigtModel
8 |
9 | from pydantic import (
10 | BaseModel,
11 | ConfigDict,
12 | Field,
13 | model_validator,
14 | )
15 |
16 |
17 | param_hint_dict = Dict[str, Dict[str, Optional[float | bool | str]]]
18 |
19 |
20 | class BasePeakWarning(UserWarning): # pragma: no cover
21 | pass
22 |
23 |
24 | PEAK_TYPE_OPTIONS = StrEnum("PEAK_TYPE_OPTIONS", ["Lorentzian", "Gaussian", "Voigt"])
25 |
26 | LMFIT_PARAM_KWARGS = ("value", "vary", "min", "max", "expr")
27 |
28 |
29 | LMFIT_MODEL_MAPPER = {
30 | "Lorentzian": LorentzianModel,
31 | "Gaussian": GaussianModel,
32 | "Voigt": VoigtModel,
33 | }
34 |
35 |
36 | class LMFitParameterHints(BaseModel):
37 | """
38 | https://github.com/lmfit/lmfit-py/blob/master/lmfit/model.py#L566
39 |
40 | The given hint can include optional bounds and constraints
41 | ``(value, vary, min, max, expr)``, which will be used by
42 | `Model.make_params()` when building default parameters.
43 |
44 | While this can be used to set initial values, `Model.make_params` or
45 | the function `create_params` should be preferred for creating
46 | parameters with initial values.
47 |
48 | The intended use here is to control how a Model should create
49 | parameters, such as setting bounds that are required by the mathematics
50 | of the model (for example, that a peak width cannot be negative), or to
51 | define common constrained parameters.
52 |
53 | Parameters
54 | ----------
55 | name : str
56 | Parameter name, can include the models `prefix` or not.
57 | **kwargs : optional
58 | Arbitrary keyword arguments, needs to be a Parameter attribute.
59 | Can be any of the following:
60 |
61 | - value : float, optional
62 | Numerical Parameter value.
63 | - vary : bool, optional
64 | Whether the Parameter is varied during a fit (default is
65 | True).
66 | - min : float, optional
67 | Lower bound for value (default is ``-numpy.inf``, no lower
68 | bound).
69 | - max : float, optional
70 | Upper bound for value (default is ``numpy.inf``, no upper
71 | bound).
72 | - expr : str, optional
73 | Mathematical expression used to constrain the value during
74 | the fit.
75 |
76 | Example
77 | --------
78 | >>> model = GaussianModel()
79 | >>> model.set_param_hint('sigma', min=0)
80 |
81 | """
82 |
83 | model_config = ConfigDict(arbitrary_types_allowed=True, from_attributes=True)
84 |
85 | name: str
86 | value: Optional[float]
87 | vary: Optional[bool] = True
88 | min: Optional[float] = Field(-math.inf, allow_inf_nan=True)
89 | max: Optional[float] = Field(math.inf, allow_inf_nan=True)
90 | expr: Optional[str] = None
91 | parameter: Optional[Parameter] = Field(None, exclude=True)
92 |
93 | @model_validator(mode="after")
94 | def check_min_max(self) -> "LMFitParameterHints":
95 | min_, max_ = self.min, self.max
96 | if min_ is not None and max_ is not None and min_ > max_:
97 | raise ValueError("Min must be less than max")
98 | return self
99 |
100 | @model_validator(mode="after")
101 | def check_value_min_max(self) -> "LMFitParameterHints":
102 | value, min_, max_ = self.value, self.min, self.max
103 | if value is None:
104 | raise ValueError("Value must not be None")
105 | if min_ is not None:
106 | assert value >= min_
107 | if max_ is not None:
108 | assert value <= max_
109 | if max_ and min_:
110 | assert min_ <= value <= max_
111 | assert min_ < max_
112 | return self
113 |
114 | @model_validator(mode="after")
115 | def check_construct_parameter(self) -> "LMFitParameterHints":
116 | if self.parameter is None:
117 | self.parameter = Parameter(
118 | name=self.name,
119 | value=self.value,
120 | vary=self.vary,
121 | min=self.min,
122 | max=self.max,
123 | expr=self.expr,
124 | )
125 | return self
126 |
127 |
128 | def construct_lmfit_model_from_components(
129 | models: List[Model], sort_on_center=True
130 | ) -> "Model":
131 | """
132 | Construct the lmfit model from a collection of (known) peaks
133 | """
134 | if not models:
135 | raise ValueError("No peaks given to construct lmfit model from.")
136 | if sort_on_center:
137 | models = sort_lmfit_models(models)
138 | lmfit_composite_model = sum(models, models.pop())
139 | return lmfit_composite_model
140 |
141 |
142 | def sort_lmfit_models(
143 | models: List[Model], key: str = "center", reverse: bool = False
144 | ) -> List[Model]:
145 | try:
146 | sorted_models = sorted(
147 | models, key=lambda x: x.param_hints[key]["value"], reverse=reverse
148 | )
149 | except KeyError:
150 | warn(f"Sorting on model on key {key} failed")
151 | return sorted_models
152 |
153 |
154 | def parmeter_to_dict(parameter: Parameter) -> dict:
155 | ret = {k: getattr(parameter, k) for k in LMFIT_PARAM_KWARGS}
156 | ret = {k: v for k, v in ret.items() if v is not None}
157 | return ret
158 |
159 |
160 | DEFAULT_GAMMA_PARAM_HINT = LMFitParameterHints(
161 | name="gamma", value=1, min=1e-05, max=70, vary=False
162 | )
163 |
164 |
165 | def main():
166 | pass
167 | # breakpoint()
168 |
169 |
170 | if __name__ == "__main__":
171 | main()
172 |
--------------------------------------------------------------------------------
/src/raman_fitting/models/deconvolution/spectrum_regions.py:
--------------------------------------------------------------------------------
1 | from enum import StrEnum
2 | from typing import Dict
3 |
4 | from pydantic import BaseModel
5 | from raman_fitting.config.default_models import load_config_from_toml_files
6 |
7 |
8 | def get_default_regions_from_toml_files() -> Dict[str, Dict[str, float]]:
9 | default_regions = (
10 | load_config_from_toml_files().get("spectrum", {}).get("regions", {})
11 | )
12 | return default_regions
13 |
14 |
15 | RegionNames = StrEnum(
16 | "RegionNames", " ".join(get_default_regions_from_toml_files()), module=__name__
17 | )
18 |
19 |
20 | class SpectrumRegionLimits(BaseModel):
21 | name: RegionNames
22 | min: int
23 | max: int
24 | extra_margin: int = 20
25 |
--------------------------------------------------------------------------------
/src/raman_fitting/models/fit_models.py:
--------------------------------------------------------------------------------
1 | from typing import Dict
2 | import time
3 |
4 | from pydantic import BaseModel, model_validator, Field, ConfigDict
5 | from lmfit import Model as LMFitModel
6 | from lmfit.model import ModelResult
7 |
8 | from raman_fitting.models.deconvolution.base_model import BaseLMFitModel
9 | from raman_fitting.models.deconvolution.spectrum_regions import RegionNames
10 | from raman_fitting.models.post_deconvolution.calculate_params import (
11 | calculate_ratio_of_unique_vars_in_results,
12 | )
13 |
14 | from raman_fitting.models.spectrum import SpectrumData
15 |
16 |
17 | class SpectrumFitModel(BaseModel):
18 | model_config = ConfigDict(arbitrary_types_allowed=True)
19 |
20 | spectrum: SpectrumData
21 | model: BaseLMFitModel
22 | region: RegionNames
23 | fit_kwargs: Dict = Field(default_factory=dict, repr=False)
24 | fit_result: ModelResult = Field(None, init_var=False)
25 | param_results: Dict = Field(default_factory=dict)
26 | elapsed_time: float = Field(0, init_var=False, repr=False)
27 |
28 | @model_validator(mode="after")
29 | def match_region_names(self) -> "SpectrumFitModel":
30 | model_region = self.model.region_name
31 | spec_region = self.spectrum.region_name
32 | if model_region != spec_region:
33 | raise ValueError(
34 | f"Region names do not match {model_region} and {spec_region}"
35 | )
36 | return self
37 |
38 | def run_fit(self) -> None:
39 | if "method" not in self.fit_kwargs:
40 | self.fit_kwargs["method"] = "leastsq"
41 | lmfit_model = self.model.lmfit_model
42 | start_time = time.time()
43 | fit_result = call_fit_on_model(lmfit_model, self.spectrum, **self.fit_kwargs)
44 | end_time = time.time()
45 | elapsed_seconds = abs(start_time - end_time)
46 | self.elapsed_time = elapsed_seconds
47 | self.fit_result = fit_result
48 | self.post_process()
49 |
50 | def post_process(self):
51 | if not self.fit_result:
52 | return
53 | param_results = self.fit_result.params.valuesdict()
54 | params_ratio_vars = calculate_ratio_of_unique_vars_in_results(
55 | param_results, raise_exception=False
56 | )
57 | self.param_results["ratios"] = params_ratio_vars
58 |
59 |
60 | def call_fit_on_model(
61 | model: LMFitModel, spectrum: SpectrumData, method="leastsq", **kwargs
62 | ) -> ModelResult:
63 | # ideas: improve fitting loop so that starting parameters from modelX and modelX+Si are shared, faster...
64 | init_params = model.make_params()
65 | x, y = spectrum.ramanshift, spectrum.intensity
66 | out = model.fit(y, init_params, x=x, method=method, **kwargs) # 'leastsq'
67 | return out
68 |
--------------------------------------------------------------------------------
/src/raman_fitting/models/post_deconvolution/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyPyDavid/raman-fitting/554760daa97161f7de38b69a2afd4897d2020976/src/raman_fitting/models/post_deconvolution/__init__.py
--------------------------------------------------------------------------------
/src/raman_fitting/models/post_deconvolution/calculate_params.py:
--------------------------------------------------------------------------------
1 | from inspect import getmembers, isfunction
2 | from typing import Dict, Any
3 |
4 | from raman_fitting.models.post_deconvolution import parameter_ratio_funcs
5 |
6 | RATIO_FUNC_PREFIX = "ratio_"
7 | functions = [
8 | fn
9 | for _, fn in getmembers(parameter_ratio_funcs, isfunction)
10 | if fn.__module__ == parameter_ratio_funcs.__name__
11 | ]
12 | ratio_funcs = list(
13 | filter(lambda x: x.__name__.startswith(RATIO_FUNC_PREFIX), functions)
14 | )
15 |
16 |
17 | def calculate_params_from_results(
18 | combined_results: Dict,
19 | var_name: str,
20 | prefix: str | None = None,
21 | raise_exception=True,
22 | ) -> dict[str, dict[str, Any]]:
23 | results = {}
24 | for ratio_func in ratio_funcs:
25 | try:
26 | label, ratio = ratio_func(combined_results, var_name, prefix=prefix)
27 | func = ratio_func.__name__
28 | results[func] = {"label": label, "ratio": ratio}
29 | except (ValueError, KeyError) as e:
30 | if raise_exception:
31 | raise e from e
32 | continue
33 | return results
34 |
35 |
36 | def calculate_ratio_of_unique_vars_in_results(
37 | results: Dict, raise_exception: bool = True
38 | ) -> dict[Any, dict[str, dict[str, Any]]]:
39 | uniq_vars = set(i.split("_")[-1] for i in results.keys())
40 | var_ratios = {}
41 | for var_name in uniq_vars:
42 | ratios = calculate_params_from_results(
43 | results, var_name, raise_exception=raise_exception
44 | )
45 | var_ratios[var_name] = ratios
46 | return var_ratios
47 |
48 |
49 | def main():
50 | print(functions)
51 | print(list(map(str, ratio_funcs)))
52 |
53 |
54 | if __name__ == "__main__":
55 | main()
56 |
--------------------------------------------------------------------------------
/src/raman_fitting/models/post_deconvolution/parameter_ratio_funcs.py:
--------------------------------------------------------------------------------
1 | from typing import Tuple, List, Dict
2 | from functools import wraps
3 |
4 | from raman_fitting.utils.decorators import decorator_with_kwargs
5 | from raman_fitting.utils.string_operations import join_prefix_suffix
6 |
7 |
8 | def validate_result(result, var_name: str, requires: List[str] | None = None):
9 | req_vars = {join_prefix_suffix(i, var_name) for i in requires}
10 | provided_vars = {join_prefix_suffix(i, var_name) for i in result.keys()}
11 | if provided_vars < req_vars:
12 | raise ValueError(
13 | f"Missing required vars {req_vars} in result: {', '.join(result.keys())}"
14 | )
15 |
16 |
17 | @decorator_with_kwargs
18 | def calculate_ratio(function, requires: List[str] | None = None):
19 | @wraps(function)
20 | def wrapper(result, var_name: str, prefix: str | None = None, **kwargs):
21 | validate_result(result, var_name, requires=requires)
22 | prefix = prefix or ""
23 | return function(result, var_name, prefix=prefix)
24 |
25 | return wrapper
26 |
27 |
28 | def get_var(peak: str, result: Dict, var_name: str):
29 | return result[join_prefix_suffix(peak.upper(), var_name)]
30 |
31 |
32 | @calculate_ratio(requires=["D", "G"])
33 | def ratio_d_to_g(result, var_name: str, prefix: str | None = None) -> Tuple[str, float]:
34 | d_ = get_var("D", result, var_name)
35 | g_ = get_var("G", result, var_name)
36 | ratio = d_ / g_
37 | label = f"{prefix}D/{prefix}G"
38 | return label, ratio
39 |
40 |
41 | @calculate_ratio(requires=["D", "G"])
42 | def ratio_la_d_to_g(
43 | result, var_name: str, prefix: str | None = None
44 | ) -> Tuple[str, float]:
45 | ratio = 4.4 * (ratio_d_to_g(result, var_name, prefix=prefix)[-1]) ** -1
46 | label = f"La_{prefix}G"
47 | return label, ratio
48 |
49 |
50 | @calculate_ratio(requires=["D", "G", "D2"])
51 | def ratio_d_to_gplusd2(
52 | result, var_name: str, prefix: str | None = None
53 | ) -> Tuple[str, float] | None:
54 | d = get_var("D", result, var_name)
55 | g = get_var("G", result, var_name)
56 | d2 = get_var("D2", result, var_name)
57 | ratio = d / (g + d2)
58 | label = f"{prefix}D/({prefix}G+{prefix}D2)"
59 | return label, ratio
60 |
61 |
62 | @calculate_ratio(requires=["D", "G", "D2"])
63 | def ratio_la_d_to_gplusd2(
64 | result, var_name: str, prefix: str | None = None
65 | ) -> Tuple[str, float]:
66 | ratio = 4.4 * (ratio_d_to_gplusd2(result, var_name, prefix=prefix)[-1]) ** -1
67 | label = (f"La_{prefix}G+D2",)
68 | return label, ratio
69 |
70 |
71 | @calculate_ratio(requires=["D2", "G", "D3"])
72 | def ratio_d3_to_gplusd2(
73 | result, var_name: str, prefix: str | None = None
74 | ) -> Tuple[str, float] | None:
75 | d2 = get_var("D2", result, var_name)
76 | d3 = get_var("D3", result, var_name)
77 | g = get_var("G", result, var_name)
78 | ratio = d3 / (g + d2)
79 | label = f"{prefix}D3/({prefix}G+{prefix}D2"
80 | return label, ratio
81 |
82 |
83 | @calculate_ratio(requires=["D3", "G"])
84 | def ratio_d3_to_g(
85 | result, var_name: str, prefix: str | None = None
86 | ) -> Tuple[str, float] | None:
87 | d3 = get_var("D3", result, var_name)
88 | g = get_var("G", result, var_name)
89 | ratio = d3 / g
90 | label = f"{prefix}D3/{prefix}G"
91 | return label, ratio
92 |
93 |
94 | @calculate_ratio(requires=["D4", "G"])
95 | def ratio_d4_to_g(
96 | result, var_name: str, prefix: str | None = None
97 | ) -> Tuple[str, float] | None:
98 | d4 = get_var("D4", result, var_name)
99 | g = get_var("G", result, var_name)
100 | ratio = d4 / g
101 | label = f"{prefix}D4/{prefix}G"
102 | return label, ratio
103 |
104 |
105 | @calculate_ratio(requires=["D1D1", "D"])
106 | def ratio_d1d1_to_d(result, var_name: str, prefix: str | None = None):
107 | d1d1 = get_var("D1D1", result, var_name)
108 | d = get_var("D", result, var_name)
109 | ratio = 8.8 * d1d1 / d
110 | label = f"Leq_{prefix}"
111 | return label, ratio
112 |
113 |
114 | @calculate_ratio(requires=["D1D1", "GD1"])
115 | def ratio_d1d1_to_gd1(
116 | result, var_name: str, prefix: str | None = None
117 | ) -> Tuple[str, float]:
118 | d1d1 = get_var("D1D1", result, var_name)
119 | gd1 = get_var("GD1", result, var_name)
120 | ratio = d1d1 / gd1
121 | label = f"{prefix}D1D1/{prefix}GD1"
122 |
123 | return label, ratio
124 |
125 |
126 | if __name__ == "__main__":
127 | result = {"D_peak": 1, "G_peak": 2, "D1D1_peak": 3}
128 | var_name = "peak"
129 | print(ratio_d_to_g(result, var_name))
130 |
--------------------------------------------------------------------------------
/src/raman_fitting/models/spectrum.py:
--------------------------------------------------------------------------------
1 | from typing import Sequence
2 | import numpy as np
3 |
4 | from pydantic import (
5 | BaseModel,
6 | FilePath,
7 | AwareDatetime,
8 | model_validator,
9 | Field,
10 | )
11 | import pydantic_numpy.typing as pnd
12 |
13 |
14 | class SpectrumData(BaseModel):
15 | ramanshift: pnd.Np1DArrayFp32 = Field(repr=False)
16 | intensity: pnd.Np1DArrayFp32 = Field(repr=False)
17 | label: str
18 | region_name: str | None = None
19 | source: FilePath | Sequence[FilePath] | str | Sequence[str] | None = None
20 |
21 | @model_validator(mode="after")
22 | def validate_equal_length(self):
23 | if len(self.ramanshift) != len(self.intensity):
24 | raise ValueError("Spectrum arrays are not of equal length.")
25 | return self
26 |
27 | @model_validator(mode="after")
28 | def check_if_contains_nan(self):
29 | if np.isnan(self.ramanshift).any():
30 | raise ValueError("Ramanshift contains NaN")
31 |
32 | if np.isnan(self.intensity).any():
33 | raise ValueError("Intensity contains NaN")
34 | return self
35 |
36 | # length is derived property
37 | def __len__(self):
38 | return len(self.ramanshift)
39 |
40 |
41 | class SpectrumMetaData(BaseModel):
42 | sample_id: str
43 | sample_group: str
44 | sample_position: str
45 | creation_date: AwareDatetime
46 | source_file: FilePath # FileStem is derived
47 |
--------------------------------------------------------------------------------
/src/raman_fitting/models/splitter.py:
--------------------------------------------------------------------------------
1 | from typing import Dict, Any
2 | import numpy as np
3 |
4 | from pydantic import BaseModel, model_validator, Field
5 | from .spectrum import SpectrumData
6 | from .deconvolution.spectrum_regions import (
7 | SpectrumRegionLimits,
8 | RegionNames,
9 | get_default_regions_from_toml_files,
10 | )
11 |
12 |
13 | class SplitSpectrum(BaseModel):
14 | spectrum: SpectrumData
15 | region_limits: Dict[str, SpectrumRegionLimits] = Field(None, init_var=None)
16 | spec_regions: Dict[str, SpectrumData] = Field(None, init_var=None)
17 | info: Dict[str, Any] = Field(default_factory=dict)
18 |
19 | @model_validator(mode="after")
20 | def process_spectrum(self) -> "SplitSpectrum":
21 | if self.region_limits is None:
22 | region_limits = get_default_spectrum_region_limits()
23 | self.region_limits = region_limits
24 |
25 | if self.spec_regions is not None:
26 | return self
27 | spec_regions = split_spectrum_data_in_regions(
28 | self.spectrum.ramanshift,
29 | self.spectrum.intensity,
30 | spec_region_limits=self.region_limits,
31 | label=self.spectrum.label,
32 | source=self.spectrum.source,
33 | )
34 | self.spec_regions = spec_regions
35 | return self
36 |
37 | def get_region(self, region_name: RegionNames):
38 | region_name = RegionNames(region_name)
39 | spec_region_keys = [
40 | i for i in self.spec_regions.keys() if region_name.name in i
41 | ]
42 | if len(spec_region_keys) != 1:
43 | raise ValueError(f"Key {region_name} not in {spec_region_keys}")
44 | spec_region_key = spec_region_keys[0]
45 | return self.spec_regions[spec_region_key]
46 |
47 |
48 | def get_default_spectrum_region_limits(
49 | regions_mapping: Dict = None,
50 | ) -> Dict[str, SpectrumRegionLimits]:
51 | if regions_mapping is None:
52 | regions_mapping = get_default_regions_from_toml_files()
53 | regions = {}
54 | for region_name, region_config in regions_mapping.items():
55 | regions[region_name] = SpectrumRegionLimits(name=region_name, **region_config)
56 | return regions
57 |
58 |
59 | def split_spectrum_data_in_regions(
60 | ramanshift: np.array,
61 | intensity: np.array,
62 | spec_region_limits=None,
63 | label=None,
64 | source=None,
65 | ) -> Dict[str, SpectrumData]:
66 | """
67 | For splitting of spectra into the several SpectrumRegionLimits,
68 | the names of the regions are taken from SpectrumRegionLimits
69 | and set as attributes to the instance.
70 | """
71 |
72 | if spec_region_limits is None:
73 | spec_region_limits = get_default_spectrum_region_limits()
74 | spec_regions = {}
75 | for region_name, region in spec_region_limits.items():
76 | # find indices of region in ramanshift array
77 | ind = (ramanshift >= np.min(region.min)) & (ramanshift <= np.max(region.max))
78 | region_lbl = f"region_{region_name}"
79 | if label is not None and label not in region_lbl:
80 | region_lbl = f"{label}_{region_lbl}"
81 | _data = {
82 | "ramanshift": ramanshift[ind],
83 | "intensity": intensity[ind],
84 | "label": region_lbl,
85 | "region_name": region_name,
86 | "source": source,
87 | }
88 | spec_regions[region_lbl] = SpectrumData(**_data)
89 | return spec_regions
90 |
--------------------------------------------------------------------------------
/src/raman_fitting/processing/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyPyDavid/raman-fitting/554760daa97161f7de38b69a2afd4897d2020976/src/raman_fitting/processing/__init__.py
--------------------------------------------------------------------------------
/src/raman_fitting/processing/baseline_subtraction.py:
--------------------------------------------------------------------------------
1 | import logging
2 |
3 | import numpy as np
4 | from scipy.stats import linregress
5 |
6 | from ..models.splitter import SplitSpectrum
7 | from ..models.spectrum import SpectrumData
8 |
9 | logger = logging.getLogger(__name__)
10 |
11 |
12 | def subtract_baseline_per_region(spec: SpectrumData, split_spectrum: SplitSpectrum):
13 | ramanshift = spec.ramanshift
14 | intensity = spec.intensity
15 | region_name = spec.region_name
16 | label = spec.label
17 | regions_data = split_spectrum.spec_regions
18 | region_limits = split_spectrum.region_limits
19 | selected_intensity = intensity
20 | region_config = region_limits[region_name]
21 | region_name_first_order = list(
22 | filter(lambda x: "first_order" in x, regions_data.keys())
23 | )
24 | if (
25 | any((i in region_name or i in label) for i in ("full", "norm"))
26 | and region_name_first_order
27 | ):
28 | selected_intensity = regions_data[region_name_first_order[0]].intensity
29 | region_config = region_limits["first_order"]
30 |
31 | bl_linear = linregress(
32 | ramanshift[[0, -1]],
33 | [
34 | np.mean(selected_intensity[0 : region_config.extra_margin]),
35 | np.mean(selected_intensity[-region_config.extra_margin : :]),
36 | ],
37 | )
38 | i_blcor = intensity - (bl_linear[0] * ramanshift + bl_linear[1])
39 | return i_blcor, bl_linear
40 |
41 |
42 | def subtract_baseline_from_split_spectrum(
43 | split_spectrum: SplitSpectrum = None, label=None
44 | ) -> SplitSpectrum:
45 | _bl_spec_regions = {}
46 | _info = {}
47 | label = "blcorr" if label is None else label
48 | for region_name, spec in split_spectrum.spec_regions.items():
49 | blcorr_int, blcorr_lin = subtract_baseline_per_region(spec, split_spectrum)
50 | new_label = f"{label}_{spec.label}" if label not in spec.label else spec.label
51 | spec = SpectrumData(
52 | **{
53 | "ramanshift": spec.ramanshift,
54 | "intensity": blcorr_int,
55 | "label": new_label,
56 | "region_name": region_name,
57 | "source": spec.source,
58 | }
59 | )
60 | _bl_spec_regions.update(**{region_name: spec})
61 | _info.update(**{region_name: blcorr_lin})
62 | bl_corrected_spectra = split_spectrum.model_copy(
63 | update={"spec_regions": _bl_spec_regions, "info": _info}
64 | )
65 | return bl_corrected_spectra
66 |
--------------------------------------------------------------------------------
/src/raman_fitting/processing/despike.py:
--------------------------------------------------------------------------------
1 | """
2 | Created on Mon May 3 11:10:59 2021
3 |
4 | @author: dw
5 | """
6 |
7 | from typing import Dict, Tuple, Any, Optional
8 | import copy
9 | import logging
10 |
11 | import numpy as np
12 |
13 | from pydantic import BaseModel, Field, model_validator
14 |
15 | from raman_fitting.models.spectrum import SpectrumData
16 |
17 | logger = logging.getLogger(__name__)
18 |
19 |
20 | class SpectrumDespiker(BaseModel):
21 | spectrum: Optional[SpectrumData] = None
22 | threshold_z_value: int = 4
23 | moving_region_size: int = 1
24 | ignore_lims: Tuple[int, int] = (20, 46)
25 | info: Dict = Field(default_factory=dict)
26 | processed_spectrum: SpectrumData = Field(None)
27 |
28 | @model_validator(mode="after")
29 | def process_spectrum(self) -> "SpectrumDespiker":
30 | if self.spectrum is None:
31 | raise ValueError("SpectrumDespiker, spectrum is None")
32 | despiked_intensity, result_info = self.call_despike_spectrum(
33 | self.spectrum.intensity
34 | )
35 | despiked_spec = self.spectrum.model_copy(
36 | update={"intensity": despiked_intensity}, deep=True
37 | )
38 | SpectrumData.model_validate(despiked_spec, from_attributes=True)
39 | self.processed_spectrum = despiked_spec
40 | self.info.update(**result_info)
41 | return self
42 |
43 | def process_intensity(self, intensity: np.ndarray) -> np.ndarray:
44 | despiked_intensity, _ = self.call_despike_spectrum(intensity)
45 | return despiked_intensity
46 |
47 | def call_despike_spectrum(self, intensity: np.ndarray) -> Tuple[np.ndarray, Dict]:
48 | despiked_intensity, result_info = despike_spectrum(
49 | intensity,
50 | self.threshold_z_value,
51 | self.moving_region_size,
52 | ignore_lims=self.ignore_lims,
53 | )
54 | return despiked_intensity, result_info
55 |
56 |
57 | def despike_spectrum(
58 | intensity: np.ndarray,
59 | threshold_z_value: int,
60 | moving_region_size: int,
61 | ignore_lims=(20, 46),
62 | ) -> Tuple[np.ndarray, Dict[str, Any]]:
63 | """
64 | A Despiking algorithm from reference literature:
65 | https://doi.org/10.1016/j.chemolab.2018.06.009
66 |
67 | Parameters
68 | ----------
69 | input_intensity : np.ndarray
70 | The intensity array of which the desipked intensity will be calculated.
71 | info : dict, optional
72 | Extra information for despiking settings are added to this dict.
73 | Attributes
74 | ---------
75 | despiked_intensity : np.ndarray
76 | The resulting array of the despiked intensity of same length as input_intensity.
77 | Notes
78 | --------
79 | Let Y1;...;Yn represent the values of a single Raman spectrum recorded at
80 | equally spaced wavenumbers.
81 | From this series, form the detrended differenced seriesr Yt ...:This simple
82 | data processing step has the effect of annihilating linear and slow movingcurve
83 | linear trends, however,
84 | sharp localised spikes will be preserved.Denote the median and the median absolute
85 | deviation of
86 | D.A. Whitaker, K. Hayes. Chemometrics and Intelligent Laboratory Systems 179 (2018) 82–84
87 | """
88 |
89 | z_intensity = calc_z_value_intensity(intensity)
90 | filtered_z_intensity = filter_z_intensity_values(z_intensity, threshold_z_value)
91 | i_despiked = despike_filter(
92 | intensity, filtered_z_intensity, moving_region_size, ignore_lims=ignore_lims
93 | )
94 | result = {"z_intensity": z_intensity, "filtered_z_intensity": filtered_z_intensity}
95 | return i_despiked, result
96 |
97 |
98 | def calc_z_value_intensity(intensity: np.ndarray) -> np.ndarray:
99 | diff_intensity = np.append(np.diff(intensity), 0) # dYt
100 | median_diff_intensity = np.median(diff_intensity) # dYt_Median
101 | median_abs_deviation = np.median(abs(diff_intensity - median_diff_intensity))
102 | intensity_values_z = (
103 | 0.6745 * (diff_intensity - median_diff_intensity)
104 | ) / median_abs_deviation
105 | return intensity_values_z
106 |
107 |
108 | def filter_z_intensity_values(z_intensity, z_intensityhreshold):
109 | filtered_z_intensity = copy.deepcopy(z_intensity)
110 | filtered_z_intensity[np.abs(z_intensity) > z_intensityhreshold] = np.nan
111 | filtered_z_intensity[0] = filtered_z_intensity[-1] = 0
112 | return filtered_z_intensity
113 |
114 |
115 | def despike_filter(
116 | intensity: np.ndarray,
117 | filtered_z_intensity: np.ndarray,
118 | moving_region_size: int,
119 | ignore_lims=(20, 46),
120 | ):
121 | n = len(intensity)
122 | i_despiked = copy.deepcopy(intensity)
123 | spikes = np.nonzero(np.isnan(filtered_z_intensity))
124 | for i in list(spikes[0]):
125 | if i < ignore_lims[0] or i > ignore_lims[1]:
126 | w = np.arange(
127 | max(0, i - moving_region_size), min(n, i + moving_region_size)
128 | )
129 | w = w[~np.isnan(filtered_z_intensity[w])]
130 | if intensity[w].any():
131 | i_despiked[i] = np.mean(intensity[w])
132 | else:
133 | i_despiked[i] = intensity[i]
134 | return i_despiked
135 |
--------------------------------------------------------------------------------
/src/raman_fitting/processing/filter.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | from dataclasses import dataclass
5 | from typing import Callable, Protocol, Tuple, Dict
6 | import numpy as np
7 | from scipy import signal
8 |
9 | from raman_fitting.models.spectrum import SpectrumData
10 |
11 |
12 | class IntensityProcessor(Protocol):
13 | def process_intensity(self, intensity: np.ndarray) -> np.ndarray: ...
14 |
15 |
16 | @dataclass
17 | class IntensityFilter:
18 | name: str
19 | filter_func: Callable
20 | filter_args: Tuple
21 | filter_kwargs: Dict
22 |
23 | def process_intensity(self, intensity: np.ndarray) -> np.ndarray:
24 | if intensity is None:
25 | raise ValueError("no intensity given to filter")
26 | filtered_intensity = self.filter_func(
27 | intensity, *self.filter_args, **self.filter_kwargs
28 | )
29 | return filtered_intensity
30 |
31 |
32 | available_filters = {
33 | "savgol_filter": IntensityFilter(
34 | "savgol_filter",
35 | signal.savgol_filter,
36 | filter_args=(13, 3),
37 | filter_kwargs=dict(mode="nearest"),
38 | )
39 | }
40 |
41 |
42 | def filter_spectrum(
43 | spectrum: SpectrumData = None, filter_name="savgol_filter"
44 | ) -> SpectrumData:
45 | if filter_name not in available_filters:
46 | raise ValueError(f"Chosen filter {filter_name} not available.")
47 |
48 | filter_class = available_filters[filter_name]
49 | filtered_intensity = filter_class.process_intensity(spectrum.intensity)
50 | label = f"{filter_name}_{spectrum.label}"
51 | filtered_spectrum = spectrum.model_copy(
52 | update={"intensity": filtered_intensity, "label": label}
53 | )
54 | return filtered_spectrum
55 |
56 |
57 | """
58 | Parameters
59 | ----------
60 | ramanshift : array or list
61 | collection of the ramanshift values
62 | intensity : array or list
63 | collection of the intensity values
64 | label : TYPE, optional
65 | DESCRIPTION. The default is "".
66 | **kwargs : TYPE
67 | DESCRIPTION.
68 |
69 | Returns
70 | -------
71 | None.
72 | """
73 |
--------------------------------------------------------------------------------
/src/raman_fitting/processing/normalization.py:
--------------------------------------------------------------------------------
1 | from typing import Optional
2 |
3 | import numpy as np
4 |
5 | from ..models.splitter import SplitSpectrum
6 | from ..models.spectrum import SpectrumData
7 | from ..models.fit_models import SpectrumFitModel, LMFitModel
8 |
9 | from loguru import logger
10 |
11 |
12 | def get_simple_normalization_intensity(split_spectrum: SplitSpectrum) -> float:
13 | norm_spec = split_spectrum.get_region("normalization")
14 | normalization_intensity = np.nanmax(norm_spec.intensity)
15 | return normalization_intensity
16 |
17 |
18 | def get_normalization_factor(
19 | split_spectrum: SplitSpectrum,
20 | norm_method="simple",
21 | normalization_model: LMFitModel = None,
22 | ) -> float:
23 | simple_norm = get_simple_normalization_intensity(split_spectrum)
24 | normalization_intensity = simple_norm
25 |
26 | if "fit" in norm_method and normalization_model is not None:
27 | fit_norm = normalizer_fit_model(
28 | split_spectrum, normalization_model=normalization_model
29 | )
30 | if fit_norm is not None:
31 | normalization_intensity = fit_norm
32 | norm_factor = 1 / normalization_intensity
33 |
34 | return norm_factor
35 |
36 |
37 | def normalize_regions_in_split_spectrum(
38 | split_spectrum: SplitSpectrum, norm_factor: float, label: Optional[str] = None
39 | ) -> SplitSpectrum:
40 | norm_spec_regions = {}
41 | norm_infos = {}
42 | label = split_spectrum.spectrum.label if label is None else label
43 | for region_name, spec in split_spectrum.spec_regions.items():
44 | norm_label = f"{region_name}_{label}" if region_name not in label else label
45 | norm_label = f"norm_{norm_label}" if "norm" not in norm_label else norm_label
46 | # label looks like "norm_regionname_label"
47 | _data = SpectrumData(
48 | **{
49 | "ramanshift": spec.ramanshift,
50 | "intensity": spec.intensity * norm_factor,
51 | "label": norm_label,
52 | "region_name": region_name,
53 | "source": spec.source,
54 | }
55 | )
56 | norm_spec_regions.update(**{region_name: _data})
57 | norm_infos.update(**{region_name: {"normalization_factor": norm_factor}})
58 | norm_spectra = split_spectrum.model_copy(
59 | update={"spec_regions": norm_spec_regions, "info": norm_infos}
60 | )
61 | return norm_spectra
62 |
63 |
64 | def normalize_split_spectrum(
65 | split_spectrum: SplitSpectrum = None,
66 | ) -> SplitSpectrum:
67 | "Normalize the spectrum intensity according to normalization method."
68 | normalization_factor = get_normalization_factor(split_spectrum)
69 | norm_data = normalize_regions_in_split_spectrum(
70 | split_spectrum, normalization_factor
71 | )
72 | return norm_data
73 |
74 |
75 | def normalizer_fit_model(
76 | specrum: SpectrumData, normalization_model: LMFitModel
77 | ) -> float | None:
78 | spec_fit = SpectrumFitModel(spectrum=specrum, model=normalization_model)
79 | spec_fit.run_fit()
80 | if not spec_fit.fit_result:
81 | return
82 | try:
83 | return spec_fit.fit_result.params["G_height"].value
84 | except KeyError as e:
85 | logger.error(e)
86 |
--------------------------------------------------------------------------------
/src/raman_fitting/processing/post_processing.py:
--------------------------------------------------------------------------------
1 | from dataclasses import dataclass
2 | from typing import Protocol
3 |
4 | from raman_fitting.models.spectrum import SpectrumData
5 |
6 | from .baseline_subtraction import subtract_baseline_from_split_spectrum
7 | from .filter import filter_spectrum
8 | from .despike import SpectrumDespiker
9 | from ..models.splitter import SplitSpectrum
10 | from .normalization import normalize_split_spectrum
11 |
12 |
13 | class PreProcessor(Protocol):
14 | def process_spectrum(self, spectrum: SpectrumData = None): ...
15 |
16 |
17 | class PostProcessor(Protocol):
18 | def process_spectrum(self, split_spectrum: SplitSpectrum = None): ...
19 |
20 |
21 | @dataclass
22 | class SpectrumProcessor:
23 | spectrum: SpectrumData
24 | processed: bool = False
25 | clean_spectrum: SplitSpectrum | None = None
26 |
27 | def __post_init__(self):
28 | processed_spectrum = self.process_spectrum()
29 | self.clean_spectrum = processed_spectrum
30 | self.processed = True
31 |
32 | def process_spectrum(self) -> SplitSpectrum:
33 | pre_processed_spectrum = self.pre_process_intensity(spectrum=self.spectrum)
34 | post_processed_spectra = self.post_process_spectrum(
35 | spectrum=pre_processed_spectrum
36 | )
37 | return post_processed_spectra
38 |
39 | def pre_process_intensity(self, spectrum: SpectrumData = None) -> SpectrumData:
40 | filtered_spectrum = filter_spectrum(spectrum=spectrum)
41 | despiker = SpectrumDespiker(spectrum=filtered_spectrum)
42 | return despiker.processed_spectrum
43 |
44 | def post_process_spectrum(self, spectrum: SpectrumData = None) -> SplitSpectrum:
45 | split_spectrum = SplitSpectrum(spectrum=spectrum)
46 | baseline_subtracted = subtract_baseline_from_split_spectrum(
47 | split_spectrum=split_spectrum
48 | )
49 | normalized_spectra = normalize_split_spectrum(
50 | split_spectrum=baseline_subtracted
51 | )
52 | return normalized_spectra
53 |
--------------------------------------------------------------------------------
/src/raman_fitting/types.py:
--------------------------------------------------------------------------------
1 | from typing import TypeAlias, Dict
2 |
3 | from raman_fitting.models.deconvolution.base_model import BaseLMFitModel
4 | from raman_fitting.models.fit_models import SpectrumFitModel
5 |
6 | LMFitModelCollection: TypeAlias = Dict[str, Dict[str, BaseLMFitModel]]
7 | SpectrumFitModelCollection: TypeAlias = Dict[str, Dict[str, SpectrumFitModel]]
8 |
--------------------------------------------------------------------------------
/src/raman_fitting/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyPyDavid/raman-fitting/554760daa97161f7de38b69a2afd4897d2020976/src/raman_fitting/utils/__init__.py
--------------------------------------------------------------------------------
/src/raman_fitting/utils/decorators.py:
--------------------------------------------------------------------------------
1 | from typing import Callable
2 | from functools import wraps, partial
3 | from inspect import signature
4 |
5 |
6 | def decorator_with_kwargs(decorator: Callable) -> Callable:
7 | """
8 | Source: https://gist.github.com/ramonrosa/402af55633e9b6c273882ac074760426
9 | Decorator factory to give decorated decorators the skill to receive
10 | optional keyword arguments.
11 | If a decorator "some_decorator" is decorated with this function:
12 | @decorator_with_kwargs
13 | def some_decorator(decorated_function, kwarg1=1, kwarg2=2):
14 | def wrapper(*decorated_function_args, **decorated_function_kwargs):
15 | '''Modifies the behavior of decorated_function according
16 | to the value of kwarg1 and kwarg2'''
17 | ...
18 | return wrapper
19 | It will be usable in the following ways:
20 | @some_decorator
21 | def func(x):
22 | ...
23 | @some_decorator()
24 | def func(x):
25 | ...
26 | @some_decorator(kwarg1=3) # or other combinations of kwargs
27 | def func(x, y):
28 | ...
29 | :param decorator: decorator to be given optional kwargs-handling skills
30 | :type decorator: Callable
31 | :raises TypeError: if the decorator does not receive a single Callable or
32 | keyword arguments
33 | :raises TypeError: if the signature of the decorated decorator does not
34 | conform to: Callable, **keyword_arguments
35 | :return: modified decorator
36 | :rtype: Callable
37 | """
38 |
39 | @wraps(decorator)
40 | def decorator_wrapper(*args, **kwargs):
41 | if (len(kwargs) == 0) and (len(args) == 1) and callable(args[0]):
42 | return decorator(args[0])
43 | if len(args) == 0:
44 | return partial(decorator, **kwargs)
45 | raise TypeError(
46 | f"{decorator.__name__} expects either a single Callable "
47 | "or keyword arguments"
48 | )
49 |
50 | signature_values = signature(decorator).parameters.values()
51 | signature_args = [
52 | param.name for param in signature_values if param.default == param.empty
53 | ]
54 |
55 | if len(signature_args) != 1:
56 | raise TypeError(
57 | f"{decorator.__name__} signature should be of the form:\n"
58 | f"{decorator.__name__}(function: typing.Callable, "
59 | "kwarg_1=default_1, kwarg_2=default_2, ...) -> Callable"
60 | )
61 |
62 | return decorator_wrapper
63 |
--------------------------------------------------------------------------------
/src/raman_fitting/utils/file_reader.py:
--------------------------------------------------------------------------------
1 | """ Class for reading in files, can be extended for other than txt formats"""
2 |
3 | from pathlib import Path
4 |
5 | import numpy as np
6 |
7 |
8 | class FileReader:
9 | def __init__(self, file_path=Path()):
10 | self._file_path = file_path
11 | self.read_in()
12 |
13 | def read_in(self):
14 | ramanshift, intensity_raw = np.array([]), np.array([])
15 | i = 0
16 | while not ramanshift.any():
17 | try:
18 | ramanshift, intensity_raw = np.loadtxt(
19 | self._file_path, usecols=(0, 1), unpack=True, skiprows=i
20 | )
21 | print(self._file_path, len(ramanshift), len(intensity_raw))
22 | self._skiprows = i
23 | self._read_succes = True
24 | except ValueError:
25 | i += 1
26 |
27 | self.ramanshift = ramanshift
28 | self.intensity_raw = intensity_raw
29 |
--------------------------------------------------------------------------------
/src/raman_fitting/utils/string_operations.py:
--------------------------------------------------------------------------------
1 | from lmfit.parameter import Parameter
2 |
3 |
4 | def join_prefix_suffix(prefix: str, suffix: str) -> str:
5 | prefix_ = prefix.rstrip("_")
6 | suffix_ = suffix.lstrip("_")
7 | if suffix_ in prefix:
8 | return prefix_
9 | return f"{prefix_}_{suffix_}"
10 |
11 |
12 | def prepare_text_from_param(param: Parameter) -> str:
13 | text = ""
14 | if not param:
15 | return text
16 | _ptext = ""
17 | _val = param.value
18 | _min = param.min
19 | if _min != _val:
20 | _ptext += f"{_min} < "
21 | _ptext += f"{_val}"
22 | _max = param.max
23 | if _max != _val:
24 | _ptext += f" > {_max}"
25 | text += f", center : {_ptext}"
26 | return text
27 |
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyPyDavid/raman-fitting/554760daa97161f7de38b69a2afd4897d2020976/tests/__init__.py
--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
1 | """
2 | Configuration file for pytest and commonly used fixtures
3 | """
4 |
5 | import pytest
6 | from raman_fitting.config import settings
7 | from raman_fitting.config.path_settings import InternalPathSettings
8 |
9 | # Global fixtures
10 |
11 |
12 | @pytest.fixture(autouse=True)
13 | def tmp_raman_dir(tmp_path):
14 | d = tmp_path / "raman-fitting"
15 | d.mkdir()
16 | yield d
17 | d.rmdir()
18 |
19 |
20 | @pytest.fixture(autouse=True)
21 | def internal_paths():
22 | return InternalPathSettings()
23 |
24 |
25 | @pytest.fixture(autouse=True)
26 | def example_files(internal_paths):
27 | example_files = list(internal_paths.example_fixtures.rglob("*txt"))
28 | return example_files
29 |
30 |
31 | @pytest.fixture(autouse=True)
32 | def default_definitions(internal_paths):
33 | return settings.default_definitions
34 |
35 |
36 | @pytest.fixture(autouse=True)
37 | def default_models(internal_paths):
38 | return settings.default_models
39 |
40 |
41 | @pytest.fixture(autouse=True)
42 | def default_models_first_order(default_models):
43 | return default_models.get("first_order")
44 |
45 |
46 | @pytest.fixture(autouse=True)
47 | def default_models_second_order(default_models):
48 | return default_models.get("second_order")
49 |
--------------------------------------------------------------------------------
/tests/deconvolution_models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyPyDavid/raman-fitting/554760daa97161f7de38b69a2afd4897d2020976/tests/deconvolution_models/__init__.py
--------------------------------------------------------------------------------
/tests/deconvolution_models/test_base_model.py:
--------------------------------------------------------------------------------
1 | """
2 | Created on Sun Jun 6 09:35:02 2021
3 |
4 | @author: DW
5 | """
6 |
7 | import pytest
8 | from functools import partial
9 |
10 | from pydantic import ValidationError
11 |
12 | from raman_fitting.models.deconvolution.base_model import (
13 | SUBSTRATE_PEAK,
14 | BaseLMFitModel,
15 | )
16 |
17 | SUBSTRATE_PREFIX = SUBSTRATE_PEAK.split("peak")[0]
18 |
19 |
20 | def helper_get_list_components(bm):
21 | _listcompsprefix = partial(map, lambda x,: getattr(x, "prefix"))
22 | _bm_prefix = list(_listcompsprefix(bm.lmfit_model.components))
23 | return _bm_prefix
24 |
25 |
26 | def test_empty_base_model():
27 | with pytest.raises(ValidationError):
28 | BaseLMFitModel()
29 | with pytest.raises(ValidationError):
30 | BaseLMFitModel(name="Test_empty")
31 |
32 | with pytest.raises(ValidationError):
33 | BaseLMFitModel(peaks="A+B")
34 |
35 | with pytest.raises(ValidationError):
36 | BaseLMFitModel(name="Test_empty", peaks="A+B", region_name="full")
37 |
38 |
39 | def test_base_model_2peaks():
40 | bm = BaseLMFitModel(name="Test_2peaks", peaks="K2+D+G", region_name="full")
41 | assert set(helper_get_list_components(bm)) == set(["D_", "G_"])
42 | bm.add_substrate()
43 | assert set(helper_get_list_components(bm)) == set(["D_", "G_", SUBSTRATE_PREFIX])
44 | bm.remove_substrate()
45 | assert set(helper_get_list_components(bm)) == set(["D_", "G_"])
46 |
47 |
48 | def test_base_model_wrong_chars_model_name():
49 | bm = BaseLMFitModel(
50 | name="Test_wrong_chars",
51 | peaks="K2+---////+ +7 +K1111+1D+D2",
52 | region_name="full",
53 | )
54 | assert set(helper_get_list_components(bm)) == set(["D2_"])
55 | bm.add_substrate()
56 | assert set(helper_get_list_components(bm)) == set(["D2_", SUBSTRATE_PREFIX])
57 |
--------------------------------------------------------------------------------
/tests/deconvolution_models/test_base_peaks.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import pytest
3 |
4 | from pydantic import ValidationError
5 | from raman_fitting.models.deconvolution.base_peak import (
6 | BasePeak,
7 | )
8 |
9 |
10 | logger = logging.getLogger(__name__)
11 | logging.captureWarnings(True) # sends these warning to the logger
12 |
13 |
14 | def _error_message_contains(excinfo, testmsg: str, verbose: bool = False):
15 | _fltr_str = [
16 | i if i not in ["(", ")"] else " "
17 | for i in str(excinfo.value)
18 | if i.isalnum() or i in (",", ".", " ", "_", "(", ")")
19 | ]
20 | _cl_str = "".join(map(str, _fltr_str))
21 | _cl_str_split = _cl_str.split(" ")
22 | _test = all(i in _cl_str_split for i in testmsg.split(" "))
23 |
24 | if not _test:
25 | _test = any(i in _cl_str_split for i in testmsg.split(" "))
26 |
27 | if not _test or verbose:
28 | print(list(((i, i in _cl_str_split) for i in testmsg.split(" "))))
29 | print(_cl_str_split)
30 | return _test
31 |
32 |
33 | def test_basepeak_initialization():
34 | with pytest.raises(ValidationError):
35 | BasePeak()
36 | with pytest.raises(ValidationError):
37 | BasePeak(peak_name="test")
38 | with pytest.raises(ValidationError):
39 | BasePeak(peak_type="Voigt")
40 | test_peak = BasePeak(peak_name="test", peak_type="Voigt")
41 | assert test_peak.peak_name == "test"
42 |
43 |
44 | @pytest.mark.skip(reason="TODO: add field validations")
45 | def test_empty_base_class_with_kwargs_raises():
46 | eb = BasePeak(peak_type="Voigt", peak_name="test")
47 |
48 | assert eb.peak_type == "Voigt"
49 |
50 | # add in field validation str_length
51 | with pytest.raises(ValueError) as excinfo:
52 | eb.peak_name = 10 * "emptytest"
53 | assert _error_message_contains(excinfo, "value for peak_name is too long 90")
54 |
55 | # add built in field validation for peak_type
56 | with pytest.raises(ValueError) as excinfo:
57 | eb.peak_type = "VoigtLorentzian"
58 | assert _error_message_contains(
59 | excinfo,
60 | ''''Multiple options ['Lorentzian', 'Voigt'] for misspelled value "VoigtLorentzian"''',
61 | )
62 |
63 |
64 | def test_base_class_good_with_init_extra_tests():
65 | td1_kwargs = dict(
66 | peak_type="Voigt",
67 | peak_name="D1D1",
68 | param_hints={
69 | "center": {"value": 2650, "min": 2600, "max": 2750},
70 | "sigma": {"value": 60, "min": 1, "max": 200},
71 | "amplitude": {"value": 14, "min": 1e-03, "max": 100},
72 | },
73 | )
74 |
75 | td1 = BasePeak(**td1_kwargs)
76 | assert td1.peak_type == "Voigt"
77 | assert td1.peak_name == "D1D1"
78 | peakmod = ""
79 | assert str(td1.lmfit_model) == peakmod
80 | # _class_str = f"center : 2600 < 2650 > 2750"
81 | # assertIn(_class_str, str(td1))
82 | # dont test attr setters
83 | # td1.peak_name = "R2D2"
84 | # assert td1.lmfit_model.prefix == "R2D2_"
85 |
86 |
87 | def test_base_class_good_with_init():
88 | d1_kwargs = dict(
89 | peak_name="D1D1",
90 | peak_type="Gaussian",
91 | param_hints={
92 | "center": {"value": 2650, "min": 2600, "max": 2750},
93 | "sigma": {"value": 60, "min": 1, "max": 200},
94 | "amplitude": {"value": 14, "min": 1e-03, "max": 100},
95 | },
96 | )
97 |
98 | td1 = BasePeak(**d1_kwargs)
99 | assert td1.peak_name == d1_kwargs["peak_name"]
100 |
101 |
102 | def test_base_class_good_with_init_added_method():
103 | tkwargs = dict(
104 | peak_type="Lorentzian",
105 | peak_name="D1D1",
106 | param_hints={
107 | "center": {"value": 2650, "min": 2600, "max": 2750},
108 | "sigma": {"value": 60, "min": 1, "max": 200},
109 | "amplitude": {"value": 14, "min": 1e-03, "max": 100},
110 | },
111 | )
112 |
113 | td1m = BasePeak(**tkwargs)
114 | assert td1m.peak_type == tkwargs["peak_type"]
115 |
116 |
117 | def test_base_class_good_with_attributes_and_init():
118 | tkwargs = dict(
119 | param_hints={
120 | "center": {"value": 2435, "min": 2400, "max": 2550},
121 | "sigma": {"value": 30, "min": 1, "max": 200},
122 | "amplitude": {"value": 2, "min": 1e-03, "max": 100},
123 | },
124 | peak_type="Voigt",
125 | peak_name="R2D2",
126 | )
127 |
128 | nca = BasePeak(**tkwargs)
129 | _center_value = nca.lmfit_model.param_hints["center"]["value"]
130 | assert _center_value == 2435
131 |
132 |
133 | def test_base_class_good_with_attributes_no_init():
134 | tkwargs = dict(
135 | param_hints={
136 | "center": {"value": 2435, "min": 2400, "max": 2550},
137 | "sigma": {"value": 30, "min": 1, "max": 200},
138 | "amplitude": {"value": 2, "min": 1e-03, "max": 100},
139 | },
140 | peak_type="Voigt",
141 | peak_name="R2D2",
142 | )
143 |
144 | ncni = BasePeak(**tkwargs)
145 | assert ncni.param_hints["center"].value == 2435
146 | assert ncni.lmfit_model.param_hints["center"]["value"] == 2435
147 |
148 |
149 | def test_base_class_good_with_attributes_init_collision_values():
150 | tkwargs = dict(
151 | param_hints={
152 | "center": {"value": 2435, "min": 2400, "max": 2550},
153 | "sigma": {"value": 30, "min": 1, "max": 200},
154 | "amplitude": {"value": 2, "min": 1e-03, "max": 100},
155 | },
156 | peak_type="Voigt",
157 | peak_name="R2D2",
158 | )
159 | nci = BasePeak(**tkwargs)
160 | assert nci.peak_type == "Voigt"
161 | assert nci.lmfit_model.param_hints["center"]["value"] == 2435
162 |
--------------------------------------------------------------------------------
/tests/deconvolution_models/test_fit_models.py:
--------------------------------------------------------------------------------
1 | import math
2 |
3 | import pytest
4 |
5 | from raman_fitting.models.fit_models import SpectrumFitModel
6 | from raman_fitting.imports.spectrumdata_parser import SpectrumReader
7 | from raman_fitting.processing.post_processing import SpectrumProcessor
8 |
9 |
10 | @pytest.fixture
11 | def clean_spec(example_files) -> None:
12 | file = [i for i in example_files if "_pos4" in i.stem][0]
13 | specread = SpectrumReader(file)
14 |
15 | spectrum_processor = SpectrumProcessor(specread.spectrum)
16 | clean_spec_1st_order = spectrum_processor.clean_spectrum.spec_regions[
17 | "savgol_filter_raw_region_first_order"
18 | ]
19 | clean_spec_1st_order.region_name = "first_order"
20 | return clean_spec_1st_order
21 |
22 |
23 | def test_fit_first_order(clean_spec, default_models):
24 | spectrum = clean_spec
25 | test_component = "center"
26 |
27 | for model_name, test_model in default_models["first_order"].items():
28 | # with subTest(model_name=model_name, test_model=test_model):
29 | spec_fit = SpectrumFitModel(
30 | **{"spectrum": spectrum, "model": test_model, "region": "first_order"}
31 | )
32 | spec_fit.run_fit()
33 | for component in test_model.lmfit_model.components:
34 | # with subTest(component=component):
35 | peak_component = f"{component.prefix}{test_component}"
36 | fit_value = spec_fit.fit_result.best_values[peak_component]
37 | init_value = spec_fit.fit_result.init_values[peak_component]
38 | assert math.isclose(fit_value, init_value, rel_tol=0.05)
39 | assert spec_fit.fit_result.success
40 |
--------------------------------------------------------------------------------
/tests/deconvolution_models/test_peak_validation.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyPyDavid/raman-fitting/554760daa97161f7de38b69a2afd4897d2020976/tests/deconvolution_models/test_peak_validation.py
--------------------------------------------------------------------------------
/tests/delegating/__init__.py:
--------------------------------------------------------------------------------
1 | #
2 |
--------------------------------------------------------------------------------
/tests/delegating/test_main_delegator.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | from raman_fitting.config.path_settings import RunModes
4 | from raman_fitting.delegating.main_delegator import MainDelegator
5 |
6 |
7 | @pytest.fixture(scope="module")
8 | def delegator():
9 | return MainDelegator(run_mode=RunModes.PYTEST)
10 |
11 |
12 | def test_initialize_models(delegator):
13 | assert "first_order" in delegator.lmfit_models
14 | assert "first_order" in delegator.selected_models
15 | with pytest.raises(KeyError):
16 | delegator.select_fitting_model("no_name", "no model")
17 |
18 |
19 | def test_delegator_index(delegator):
20 | assert delegator.index
21 | assert len(delegator.index.raman_files) == 5
22 | selection = delegator.select_samples_from_index()
23 | assert len(delegator.index.raman_files) == len(selection)
24 |
25 |
26 | def test_main_run(delegator):
27 | assert delegator.results
28 |
--------------------------------------------------------------------------------
/tests/empty.toml:
--------------------------------------------------------------------------------
1 | # Empty config file
2 |
--------------------------------------------------------------------------------
/tests/exporting/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | """
4 | Created on Fri May 28 09:04:40 2021
5 |
6 | @author: zmg
7 | """
8 |
--------------------------------------------------------------------------------
/tests/exporting/test_plotting.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | #!/usr/bin/env python3
3 | # -*- coding: utf-8 -*-
4 | """
5 | Created on Fri May 14 09:29:16 2021
6 |
7 | @author: zmg
8 | """
9 | # flake8: noqa
10 |
11 | import pytest
12 |
13 | from raman_fitting.models.deconvolution.init_models import InitializeModels
14 | from raman_fitting.exports.plot_formatting import (
15 | get_cmap_list,
16 | assign_colors_to_peaks,
17 | DEFAULT_COLOR,
18 | COLOR_BLACK,
19 | )
20 |
21 |
22 | # class PeakModelAnnotation(unittest.TestCase):
23 | @pytest.fixture()
24 | def initialized_models():
25 | return InitializeModels()
26 |
27 |
28 | def test_get_cmap_list():
29 | assert get_cmap_list(0) == None
30 | _cmap = get_cmap_list(50)
31 | assert _cmap == [DEFAULT_COLOR] * 50
32 | _cmap = get_cmap_list(5)
33 | assert len(_cmap) >= 5
34 | _cmap1 = get_cmap_list(5, default_color=COLOR_BLACK)
35 | assert _cmap1 == [COLOR_BLACK] * 5
36 |
37 |
38 | def test_assign_colors_to_peaks(initialized_models):
39 | for order_type, model_collection in initialized_models.lmfit_models.items():
40 | for model_name, model in model_collection.items():
41 | annotated_models = assign_colors_to_peaks(model.lmfit_model.components)
42 | prefixes = set([i.prefix for i in model.lmfit_model.components])
43 | assert prefixes == set(annotated_models.keys())
44 |
--------------------------------------------------------------------------------
/tests/indexing/__init__.py:
--------------------------------------------------------------------------------
1 | #
2 |
--------------------------------------------------------------------------------
/tests/indexing/test_filename_parser.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | from raman_fitting.imports.models import RamanFileInfo
4 | from raman_fitting.imports.samples.sample_id_helpers import (
5 | overwrite_sample_id_from_mapper,
6 | overwrite_sample_group_id_from_parts,
7 | )
8 |
9 |
10 | from raman_fitting.imports.samples.sample_id_helpers import (
11 | parse_string_to_sample_id_and_position,
12 | )
13 |
14 | example_parse_fixture = {
15 | "errEMP2_1.txt": ("errEMP2", 1),
16 | "errTS2_pos1.txt": ("errTS2", 1),
17 | "Si_spectrum01.txt": ("Si", 1),
18 | "testDW38C_pos1.txt": ("testDW38C", 1),
19 | "testDW38C_pos2.txt": ("testDW38C", 2),
20 | "testDW38C_pos3.txt": ("testDW38C", 3),
21 | "testDW38C_pos4.txt": ("testDW38C", 4),
22 | "DW_AB_CD-EF_GE_pos3": ("DW_AB_CD-EF_GE", 3),
23 | "DW99-pos3": ("DW99", 3),
24 | "Si": ("Si", 0),
25 | }
26 |
27 |
28 | # class TestFilenameParser(unittest.TestCase):
29 | result_attr = "parse_result"
30 | sample_id_name_mapper = {}
31 | sGrp_name_mapper = {}
32 |
33 |
34 | @pytest.fixture()
35 | def path_parsers(example_files):
36 | path_parsers_ = []
37 | for fn in example_files:
38 | path_parsers_.append(RamanFileInfo(**{"file": fn}))
39 | return path_parsers_
40 |
41 |
42 | def test_ramanfileinfo(path_parsers):
43 | assert all(isinstance(i, RamanFileInfo) for i in path_parsers)
44 |
45 |
46 | def test_sample_id_name_mapper():
47 | for k, val in sample_id_name_mapper.items():
48 | _mapval = overwrite_sample_id_from_mapper(k, sample_id_name_mapper)
49 | assert _mapval == val
50 |
51 |
52 | def test_overwrite_sample_id_from_mapper():
53 | assert "TEST" == overwrite_sample_group_id_from_parts([], "TEST", sGrp_name_mapper)
54 | for k, val in sGrp_name_mapper.items():
55 | empty_path_parts = RamanFileInfo(file=f"{k}/TEST.txt")
56 | assert val == overwrite_sample_group_id_from_parts(
57 | empty_path_parts.parts, "TEST", sGrp_name_mapper
58 | )
59 |
60 |
61 | def test_parse_string_to_sample_id_and_position():
62 | for file, _expected in example_parse_fixture.items():
63 | assert parse_string_to_sample_id_and_position(file) == _expected
64 |
--------------------------------------------------------------------------------
/tests/indexing/test_indexer.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | from raman_fitting.config.path_settings import (
4 | get_run_mode_paths,
5 | RunModes,
6 | )
7 | from raman_fitting.imports.files.file_indexer import (
8 | RamanFileIndex,
9 | initialize_index_from_source_files,
10 | )
11 | from raman_fitting.imports.models import RamanFileInfo
12 |
13 | run_mode = RunModes.PYTEST
14 | run_paths = get_run_mode_paths(run_mode)
15 |
16 |
17 | @pytest.fixture
18 | def index(example_files, internal_paths, tmp_raman_dir):
19 | pytest_fixtures_files = list(internal_paths.pytest_fixtures.rglob("*txt"))
20 | index_file = internal_paths.temp_index_file
21 | all_test_files = example_files + pytest_fixtures_files
22 | index = initialize_index_from_source_files(
23 | index_file=index_file, files=all_test_files, force_reindex=True
24 | )
25 | return index
26 |
27 |
28 | def test_index_make_examples(index, example_files):
29 | assert isinstance(index, RamanFileIndex)
30 | assert isinstance(index.raman_files[0], RamanFileInfo)
31 | assert len(index.dataset) > 1
32 | assert len(index.dataset) == len(example_files)
33 |
34 |
35 | # @unittest.skip("export_index not yet implemented")
36 | def test_load_index(index):
37 | index.index_file.exists()
38 | new_index = RamanFileIndex(index_file=index.index_file, force_reindex=False)
39 | assert isinstance(new_index, RamanFileIndex)
40 |
--------------------------------------------------------------------------------
/tests/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyPyDavid/raman-fitting/554760daa97161f7de38b69a2afd4897d2020976/tests/models/__init__.py
--------------------------------------------------------------------------------
/tests/models/test_base_peak.py:
--------------------------------------------------------------------------------
1 | from raman_fitting.models.deconvolution.base_peak import BasePeak
2 |
3 |
4 | def test_initialize_base_peaks(
5 | default_definitions, default_models_first_order, default_models_second_order
6 | ):
7 | peaks = {}
8 |
9 | peak_items = {
10 | **default_definitions["first_order"]["peaks"],
11 | **default_definitions["second_order"]["peaks"],
12 | }.items()
13 | for k, v in peak_items:
14 | peaks.update({k: BasePeak(**v)})
15 |
16 | peak_d = BasePeak(**default_definitions["first_order"]["peaks"]["D"])
17 | assert (
18 | peak_d.peak_name
19 | == default_definitions["first_order"]["peaks"]["D"]["peak_name"]
20 | )
21 | assert (
22 | peak_d.peak_type
23 | == default_definitions["first_order"]["peaks"]["D"]["peak_type"]
24 | )
25 | assert (
26 | peak_d.lmfit_model.components[0].prefix
27 | == default_definitions["first_order"]["peaks"]["D"]["peak_name"] + "_"
28 | )
29 | assert (
30 | peak_d.param_hints["center"].value
31 | == default_definitions["first_order"]["peaks"]["D"]["param_hints"]["center"][
32 | "value"
33 | ]
34 | )
35 |
--------------------------------------------------------------------------------
/tests/models/test_calculate_params.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | from raman_fitting.models.post_deconvolution.calculate_params import ratio_funcs
4 |
5 |
6 | result_first = {"D_center": 1, "G_center": 2, "D1D1_center": 3}
7 | first_peaks = "G+D+D2+D3+D4+D5"
8 | result_second = (
9 | {"D4D4 +D1D1+GD1+D2D2"},
10 | {"D_center": 1, "G_center": 2, "D1D1_center": 3},
11 | )
12 | var_name = "peak"
13 |
14 |
15 | @pytest.fixture
16 | def list_of_ratio_funcs():
17 | return list(ratio_funcs)
18 |
19 |
20 | @pytest.fixture
21 | def results_first(default_models_first_order):
22 | return {
23 | k: val.get("value")
24 | for k, val in default_models_first_order[
25 | "5peaks"
26 | ].lmfit_model.param_hints.items()
27 | if "value" in val
28 | }
29 |
30 |
31 | @pytest.fixture
32 | def results_second(default_models_second_order):
33 | return {
34 | k: val.get("value")
35 | for k, val in default_models_second_order[
36 | "2nd_4peaks"
37 | ].lmfit_model.param_hints.items()
38 | if "value" in val
39 | }
40 |
41 |
42 | def test_calculate_params_keyerror(list_of_ratio_funcs, results_first):
43 | var_name = "no_var"
44 | with pytest.raises(KeyError):
45 | list_of_ratio_funcs[0](results_first, var_name)
46 |
47 |
48 | def test_calculate_params_from_results(
49 | results_first, results_second, list_of_ratio_funcs
50 | ):
51 | combined_results = {**results_first, **results_second}
52 |
53 | prefix = ""
54 | var_name = "center"
55 |
56 | results = {}
57 | for ratio_func in list_of_ratio_funcs:
58 | label, ratio = ratio_func(combined_results, var_name, prefix=prefix)
59 |
60 | func = ratio_func.__name__
61 | results[func] = {"label": label, "ratio": ratio}
62 | assert results
63 | assert results["ratio_d_to_g"]["ratio"] < 1
64 | assert results["ratio_d_to_g"]["label"] == "D/G"
65 | for k, val in results.items():
66 | assert val["label"]
67 | assert val["ratio"] > 0
68 |
--------------------------------------------------------------------------------
/tests/models/test_fit_models.py:
--------------------------------------------------------------------------------
1 | from raman_fitting.imports.spectrumdata_parser import SpectrumReader
2 | from raman_fitting.models.fit_models import SpectrumFitModel
3 | from raman_fitting.processing.post_processing import SpectrumProcessor
4 |
5 |
6 | def test_fit_model(example_files, default_models_first_order):
7 | file = [i for i in example_files if "_pos4" in i.stem][0]
8 |
9 | specread = SpectrumReader(file)
10 |
11 | spectrum_processor = SpectrumProcessor(specread.spectrum)
12 | clean_spec_1st_order = spectrum_processor.clean_spectrum.spec_regions[
13 | "savgol_filter_raw_region_first_order"
14 | ]
15 | clean_spec_1st_order.region_name = "first_order"
16 |
17 | model_2peaks = default_models_first_order["2peaks"]
18 | spec_fit = SpectrumFitModel(
19 | spectrum=clean_spec_1st_order,
20 | model=model_2peaks,
21 | region=clean_spec_1st_order.region_name,
22 | )
23 | spec_fit.run_fit()
24 | assert spec_fit.fit_result.success
25 | assert spec_fit.fit_result.best_values
26 | assert spec_fit.param_results["ratios"]["center"]["ratio_d_to_g"]["ratio"] < 1
27 | assert spec_fit.param_results["ratios"]["center"]["ratio_la_d_to_g"]["ratio"] < 10
28 | d_amp_ = spec_fit.fit_result.best_values["D_amplitude"]
29 | g_amp_ = spec_fit.fit_result.best_values["G_amplitude"]
30 | dg_ratio = d_amp_ / g_amp_
31 | assert (
32 | spec_fit.param_results["ratios"]["amplitude"]["ratio_d_to_g"]["ratio"]
33 | == dg_ratio
34 | )
35 |
--------------------------------------------------------------------------------
/tests/processing/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | """
4 | Created on Fri May 28 09:04:45 2021
5 |
6 | @author: zmg
7 | """
8 |
--------------------------------------------------------------------------------
/tests/processing/test_cleaner.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | import numpy as np
4 | from raman_fitting.processing.despike import SpectrumDespiker
5 |
6 |
7 | int_arrays = (
8 | np.array([1, 2, 3, 4, 5]),
9 | np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]),
10 | np.array([2, 2, 2, 2, 2, 2, 30, 20, 2, 2, 2, 2, 2, 2])
11 | )
12 |
13 | @pytest.mark.parametrize('array', int_arrays)
14 | def test_despiker(array):
15 | despiker = SpectrumDespiker.model_construct()
16 |
17 | desp_int = despiker.process_intensity(array)
18 | assert len(desp_int) == len(array)
19 |
--------------------------------------------------------------------------------
/tests/processing/test_spectrum_constructor.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | from raman_fitting.imports.spectrumdata_parser import SpectrumReader
4 | from raman_fitting.models.deconvolution.spectrum_regions import RegionNames
5 |
6 |
7 | def test_spectrum_data_loader_empty():
8 | with pytest.raises(ValueError):
9 | SpectrumReader("empty.txt")
10 |
11 |
12 | def test_spectrum_data_loader_file(example_files):
13 | for file in example_files:
14 | sprdr = SpectrumReader(file)
15 | assert len(sprdr.spectrum.intensity) == 1600
16 | assert len(sprdr.spectrum.ramanshift) == 1600
17 | assert sprdr.spectrum.source == file
18 | assert sprdr.spectrum.region_name == RegionNames.full
19 |
--------------------------------------------------------------------------------
/tests/test_fixtures/empty-lines_1.txt:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 | end
15 |
--------------------------------------------------------------------------------
/todos.md:
--------------------------------------------------------------------------------
1 | # List of IDEAs for the raman fitting code
2 | ```py
3 | # IDEA change version definition
4 |
5 | # IDEA list:
6 | # improved logger, each module needs a getlogger(name)
7 | # IDEA future daemonize the fitting process for using the package and dropping files in the datafiles folder
8 | # IDEA add docs with Sphinx, readthedocs
9 | # IDEA improve AsyncIO into main delegator processes
10 | # IDEA fix plotting because of DeprecationWarning in savefig
11 | # IDEA add database for spectrum data storage
12 | # IDEA future GUI webinterface
13 |
14 | # IDEA improve fitting loop so that starting parameters from modelX and modelX+Si are shared, faster...
15 | ```
16 |
--------------------------------------------------------------------------------