├── tests
    ├── __init__.py
    ├── test.json
    ├── test.csv
    ├── test_datasets.py
    ├── test_utils.py
    ├── test_duration_estimator.py
    ├── test_nr_transform.py
    ├── test_state_space.py
    ├── test_roundtrip.py
    ├── test_cohort_estimator.py
    └── test_model.py
├── examples
    ├── __init__.py
    ├── python
    │   ├── __init__.py
    │   ├── README.md
    │   ├── characterize_datasets.py
    │   ├── deterministic_paths.py
    │   ├── state_space_operations.py
    │   ├── compare_estimators.py
    │   ├── adjust_nr_state.py
    │   ├── matrix_set_operations.py
    │   ├── matrix_set_lendingclub.py
    │   ├── fix_multiperiod_matrix.py
    │   ├── estimate_matrix.py
    │   ├── matrix_lendingclub.py
    │   ├── credit_curves.py
    │   ├── example_list.csv
    │   ├── generate_full_multiperiod_set.py
    │   ├── matrix_operations.py
    │   ├── empirical_transition_matrix.py
    │   ├── matrix_from_duration_data.py
    │   ├── data_cleaning_example.py
    │   └── matrix_from_cohort_data.py
    ├── sankey.png
    ├── estimation.png
    ├── overview.png
    ├── scatterplot.png
    ├── scatterplot2.png
    ├── credit_curves.png
    ├── single_entity.png
    ├── TransitionMatrix.png
    ├── sampled_histories.png
    ├── scale_conversions.png
    ├── monthly_credit_curves.png
    ├── transition_probabilities.png
    ├── JLT.json
    └── JLT.csv
├── test.json
├── test.csv
├── transitionMatrix
    ├── generators
    │   └── __init__.py
    ├── statespaces
    │   └── __init__.py
    ├── estimators
    │   ├── kaplan_meier_estimator.py
    │   ├── simple_estimator.py
    │   └── __init__.py
    ├── utils
    │   ├── __init__.py
    │   └── converters.py
    └── creditratings
    │   └── creditcurve.py
├── docs
    ├── source
    │   ├── changelog.rst
    │   ├── _static
    │   │   ├── Architecture_Overview.png
    │   │   └── custom.css
    │   ├── simple_estimator.rst
    │   ├── modules.rst
    │   ├── transitionMatrix.generators.rst
    │   ├── aalen-johansen_estimator.rst
    │   ├── transitionMatrix.statespaces.rst
    │   ├── transitionMatrix.visualization.rst
    │   ├── preprocessing.rst
    │   ├── credit_ratings.rst
    │   ├── transitionMatrix.utils.rst
    │   ├── postprocessing.rst
    │   ├── state_spaces.rst
    │   ├── federation.rst
    │   ├── transitionMatrix.creditratings.rst
    │   ├── data_generators.rst
    │   ├── withdrawn_ratings.rst
    │   ├── datasets.rst
    │   ├── credit_curves.rst
    │   ├── index.rst
    │   ├── transitionMatrix.rst
    │   ├── example_with_jlt.rst
    │   ├── cohort_estimator.rst
    │   ├── transitionMatrix.estimators.rst
    │   ├── multi-period_transitions.rst
    │   ├── examples.rst
    │   ├── visualization.rst
    │   ├── testing.rst
    │   ├── predefined_rating_scales.rst
    │   ├── estimators.rst
    │   ├── cohorts.rst
    │   ├── basic_operations.rst
    │   ├── roadmap.rst
    │   ├── description.rst
    │   └── data_formats.rst
    └── Makefile
├── .github
    └── FUNDING.yml
├── requirements.txt
├── datasets
    ├── nr_test_case.xlsx
    ├── test.csv
    ├── JLT.csv
    ├── JLT.json
    ├── dataset_list.csv
    ├── synthetic_data1.csv
    ├── synthetic_data.csv
    ├── sp_1981-2016.csv
    ├── sp 2017.csv
    └── scenario_data.csv
├── justfile
├── test_upload.sh
├── Makefile
├── MANIFEST.in
├── setup.cfg
├── requirements-dev.txt
├── test.py
├── .readthedocs.yaml
├── run_examples.py
├── .gitignore
├── setup.py
├── README.md
└── CHANGELOG.rst


/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/examples/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/test.json:
--------------------------------------------------------------------------------
1 | [[1.0,0.0],[0.0,1.0]]


--------------------------------------------------------------------------------
/tests/test.json:
--------------------------------------------------------------------------------
1 | [[1.0,0.0],[0.0,1.0]]


--------------------------------------------------------------------------------
/test.csv:
--------------------------------------------------------------------------------
1 | 0,1
2 | 1.0,0.0
3 | 0.0,1.0
4 | 


--------------------------------------------------------------------------------
/transitionMatrix/generators/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/transitionMatrix/statespaces/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/test.csv:
--------------------------------------------------------------------------------
1 | 0,1
2 | 1.0,0.0
3 | 0.0,1.0
4 | 


--------------------------------------------------------------------------------
/docs/source/changelog.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../../CHANGELOG.rst


--------------------------------------------------------------------------------
/examples/python/__init__.py:
--------------------------------------------------------------------------------
1 | # init funny all those empty inits
2 | 
3 | 


--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | # These are supported funding model platforms
2 | 
3 | github: [open-risk]
4 | 
5 | 


--------------------------------------------------------------------------------
/examples/sankey.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/open-risk/transitionMatrix/HEAD/examples/sankey.png


--------------------------------------------------------------------------------
/examples/estimation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/open-risk/transitionMatrix/HEAD/examples/estimation.png


--------------------------------------------------------------------------------
/examples/overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/open-risk/transitionMatrix/HEAD/examples/overview.png


--------------------------------------------------------------------------------
/examples/scatterplot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/open-risk/transitionMatrix/HEAD/examples/scatterplot.png


--------------------------------------------------------------------------------
/examples/scatterplot2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/open-risk/transitionMatrix/HEAD/examples/scatterplot2.png


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | Jinja2
2 | matplotlib
3 | numpy
4 | pandas
5 | Pillow
6 | requests
7 | scipy
8 | statsmodels
9 | 


--------------------------------------------------------------------------------
/datasets/nr_test_case.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/open-risk/transitionMatrix/HEAD/datasets/nr_test_case.xlsx


--------------------------------------------------------------------------------
/examples/credit_curves.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/open-risk/transitionMatrix/HEAD/examples/credit_curves.png


--------------------------------------------------------------------------------
/examples/single_entity.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/open-risk/transitionMatrix/HEAD/examples/single_entity.png


--------------------------------------------------------------------------------
/justfile:
--------------------------------------------------------------------------------
1 | docs:
2 |    sphinx-build docs/source docs/build/html
3 | 
4 | show:
5 |    start docs/build/html/index.html


--------------------------------------------------------------------------------
/examples/TransitionMatrix.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/open-risk/transitionMatrix/HEAD/examples/TransitionMatrix.png


--------------------------------------------------------------------------------
/examples/sampled_histories.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/open-risk/transitionMatrix/HEAD/examples/sampled_histories.png


--------------------------------------------------------------------------------
/examples/scale_conversions.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/open-risk/transitionMatrix/HEAD/examples/scale_conversions.png


--------------------------------------------------------------------------------
/examples/monthly_credit_curves.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/open-risk/transitionMatrix/HEAD/examples/monthly_credit_curves.png


--------------------------------------------------------------------------------
/examples/transition_probabilities.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/open-risk/transitionMatrix/HEAD/examples/transition_probabilities.png


--------------------------------------------------------------------------------
/docs/source/_static/Architecture_Overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/open-risk/transitionMatrix/HEAD/docs/source/_static/Architecture_Overview.png


--------------------------------------------------------------------------------
/test_upload.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | rm -rf dist/*
3 | python3 setup.py sdist bdist_egg
4 | twine upload --repository-url https://test.pypi.org/legacy/ dist/*
5 | 


--------------------------------------------------------------------------------
/datasets/test.csv:
--------------------------------------------------------------------------------
 1 | ID,Time,State
 2 | 1,4,1
 3 | 2,3,1
 4 | 3,3,0
 5 | 4,4,1
 6 | 5,2,1
 7 | 6,0,1
 8 | 7,2,1
 9 | 1,14,2
10 | 2,13,2
11 | 3,13,1
12 | 4,14,2
13 | 5,12,2
14 | 6,10,2
15 | 7,12,2


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | autopep8:
2 | 	autopep8 --ignore E501,E241,W690 --in-place --recursive --aggressive transitionMatrix/
3 | 
4 | lint:
5 | 	flake8 transitionMatrix
6 | 
7 | autolint: autopep8 lint
8 | 
9 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include examples *
2 | recursive-include datasets *
3 | recursive-include tests *
4 | include datasets/*.csv
5 | include datasets/*.json
6 | include MANIFEST.in
7 | include LICENSE.txt
8 | include description.rst


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [metadata]
 2 | license_file = LICENSE.txt
 3 | 
 4 | [sdist]
 5 | formats = gztar
 6 | 
 7 | [bdist_wheel]
 8 | universal=1
 9 | 
10 | [tool:pytest]
11 | norecursedirs = .* docs build docs examples wheel
12 | testpaths = tests


--------------------------------------------------------------------------------
/docs/source/simple_estimator.rst:
--------------------------------------------------------------------------------
1 | Simple Estimator
2 | ========================
3 | 
4 | The estimation of a transition matrix is one of the core functionalities of transitionMatrix. The two main estimators currently implemented are:
5 | 
6 | 


--------------------------------------------------------------------------------
/docs/source/modules.rst:
--------------------------------------------------------------------------------
 1 | API
 2 | ==============================
 3 | 
 4 | The transitionMatrix package structure and API.
 5 | 
 6 | .. warning:: The library is still being expanded / refactored. Significant structure and API changes are likely.
 7 | 
 8 | .. toctree::
 9 |    :maxdepth: 2
10 | 
11 |    transitionMatrix


--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
 1 | Jinja2
 2 | matplotlib
 3 | numpy
 4 | pandas
 5 | Pillow
 6 | requests
 7 | scipy
 8 | statsmodels
 9 | Sphinx
10 | sphinx-rtd-theme
11 | sphinxcontrib-applehelp
12 | sphinxcontrib-devhelp
13 | sphinxcontrib-htmlhelp
14 | sphinxcontrib-jsmath
15 | sphinxcontrib-qthelp
16 | sphinxcontrib-serializinghtml
17 | pytest


--------------------------------------------------------------------------------
/docs/source/transitionMatrix.generators.rst:
--------------------------------------------------------------------------------
 1 | Generators SubPackage
 2 | ============================
 3 | 
 4 | This subpackage implements test data generation
 5 | 
 6 | transitionMatrix.generators contents
 7 | ---------------------------------------------
 8 | 
 9 | .. automodule:: transitionMatrix.generators.dataset_generators
10 |     :members:
11 |     :undoc-members:
12 |     :show-inheritance:
13 | 
14 | 


--------------------------------------------------------------------------------
/examples/JLT.json:
--------------------------------------------------------------------------------
1 | [[0.891,0.0963,0.0078,0.0019,0.003,0.0,0.0,0.0],[0.0086,0.901,0.0747,0.0099,0.0029,0.0029,0.0,0.0],[0.0009,0.0291,0.8894,0.0649,0.0101,0.0045,0.0,0.0009],[0.0006,0.0043,0.0656,0.8427,0.0644,0.016,0.0018,0.0045],[0.0004,0.0022,0.0079,0.0719,0.7764,0.1043,0.0127,0.0241],[0.0,0.0019,0.0031,0.0066,0.0517,0.8246,0.0435,0.0685],[0.0,0.0,0.0116,0.0116,0.0203,0.0754,0.6493,0.2319],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0]]


--------------------------------------------------------------------------------
/docs/source/aalen-johansen_estimator.rst:
--------------------------------------------------------------------------------
1 | Aalen-Johansen Estimator
2 | ========================
3 | 
4 | The Aalen-Johansen estimator is a multi-state (matrix) version of the Kaplan–Meier estimator for the hazard of a survival process. The estimator can be used to estimate the transition probability matrix of a Markov process with a finite number of states. `See <https://www.openriskmanual.org/wiki/Aalen-Johansen_Estimator>`_
5 | 


--------------------------------------------------------------------------------
/docs/source/transitionMatrix.statespaces.rst:
--------------------------------------------------------------------------------
 1 | State Spaces SubPackage
 2 | ============================
 3 | 
 4 | This subpackage implements state space functionality
 5 | 
 6 | 
 7 | transitionMatrix.statespaces.statespace module
 8 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 9 | 
10 | .. automodule:: transitionMatrix.statespaces.statespace
11 |     :members:
12 |     :undoc-members:
13 |     :show-inheritance:


--------------------------------------------------------------------------------
/docs/source/_static/custom.css:
--------------------------------------------------------------------------------
 1 | /* override table width restrictions */
 2 | @media screen and (min-width: 767px) {
 3 | 
 4 |   .wy-table-responsive table td {
 5 |     /* !important prevents the common CSS stylesheets from
 6 |        overriding this as on RTD they are loaded after this stylesheet */
 7 |     white-space: normal !important;
 8 |   }
 9 | 
10 |   .wy-table-responsive {
11 |     overflow: visible !important;
12 |   }
13 | 
14 | }


--------------------------------------------------------------------------------
/docs/source/transitionMatrix.visualization.rst:
--------------------------------------------------------------------------------
 1 | Visualization subpackage
 2 | ============================
 3 | 
 4 | This subpackage implements visualization functionality
 5 | 
 6 | .. warning:: not yet implemented
 7 | 
 8 | transitionMatrix.visualization contents
 9 | ---------------------------------------------
10 | 
11 | .. automodule:: transitionMatrix.visualization
12 |     :members:
13 |     :undoc-members:
14 |     :show-inheritance:
15 | 


--------------------------------------------------------------------------------
/datasets/JLT.csv:
--------------------------------------------------------------------------------
 1 | 0,1,2,3,4,5,6,7
 2 | 0.891,0.0963,0.0078,0.0019,0.003,0.0,0.0,0.0
 3 | 0.0086,0.901,0.0747,0.0099,0.0029,0.0029,0.0,0.0
 4 | 0.0009,0.0291,0.8894,0.0649,0.0101,0.0045,0.0,0.0009
 5 | 0.0006,0.0043,0.0656,0.8427,0.0644,0.016,0.0018,0.0045
 6 | 0.0004,0.0022,0.0079,0.0719,0.7764,0.1043,0.0127,0.0241
 7 | 0.0,0.0019,0.0031,0.0066,0.0517,0.8246,0.0435,0.0685
 8 | 0.0,0.0,0.0116,0.0116,0.0203,0.0754,0.6493,0.2319
 9 | 0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
10 | 


--------------------------------------------------------------------------------
/examples/JLT.csv:
--------------------------------------------------------------------------------
 1 | 0,1,2,3,4,5,6,7
 2 | 0.891,0.0963,0.0078,0.0019,0.003,0.0,0.0,0.0
 3 | 0.0086,0.901,0.0747,0.0099,0.0029,0.0029,0.0,0.0
 4 | 0.0009,0.0291,0.8894,0.0649,0.0101,0.0045,0.0,0.0009
 5 | 0.0006,0.0043,0.0656,0.8427,0.0644,0.016,0.0018,0.0045
 6 | 0.0004,0.0022,0.0079,0.0719,0.7764,0.1043,0.0127,0.0241
 7 | 0.0,0.0019,0.0031,0.0066,0.0517,0.8246,0.0435,0.0685
 8 | 0.0,0.0,0.0116,0.0116,0.0203,0.0754,0.6493,0.2319
 9 | 0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
10 | 


--------------------------------------------------------------------------------
/docs/source/preprocessing.rst:
--------------------------------------------------------------------------------
 1 | Preprocessing
 2 | ===================
 3 | 
 4 | The preprocessing stage includes preparatory steps leading up to the matrix :ref:`Estimation` to produce a transition matrix (or matrix set).
 5 | 
 6 | The precise steps required depend on the sources of data, the nature of data, use specific requirements (best practices, regulation etc) and, not least, the desired estimation method.
 7 | 
 8 | .. toctree::
 9 |    :maxdepth: 2
10 | 
11 |    data_formats
12 |    state_spaces
13 |    cohorts


--------------------------------------------------------------------------------
/docs/source/credit_ratings.rst:
--------------------------------------------------------------------------------
 1 | Credit Ratings
 2 | ======================
 3 | 
 4 | Working with credit data is a core use case of transitionMatrix. Functionality that is specific to credit ratings is generally grouped in the **credit ratings** subpackage (although the distinction of what is generic and what credit specific is not always clear).
 5 | 
 6 | The following sections document various credit rating related activities. General documentation about `credit rating systems <Category:Credit Rating System>`_
 7 | 
 8 | 
 9 | .. toctree::
10 |    :maxdepth: 2
11 |    :caption: Contents:
12 | 
13 |    predefined_rating_scales
14 |    withdrawn_ratings
15 |    credit_curves
16 | 


--------------------------------------------------------------------------------
/docs/source/transitionMatrix.utils.rst:
--------------------------------------------------------------------------------
 1 | Utilities SubPackage
 2 | =================================
 3 | 
 4 | This subpackage collects various utilities
 5 | 
 6 | transitionMatrix.utils.converters module
 7 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 8 | 
 9 | .. automodule:: transitionMatrix.utils.converters
10 |     :members:
11 |     :undoc-members:
12 |     :show-inheritance:
13 | 
14 | 
15 | transitionMatrix.utils.preprocessing module
16 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
17 | 
18 | .. automodule:: transitionMatrix.utils.preprocessing
19 |     :members:
20 |     :undoc-members:
21 |     :show-inheritance:
22 | 
23 | 
24 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SPHINXPROJ    = transitionMatrix
 8 | SOURCEDIR     = source
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


--------------------------------------------------------------------------------
/docs/source/postprocessing.rst:
--------------------------------------------------------------------------------
 1 | Post-processing
 2 | ===================
 3 | 
 4 | The post-processing stage includes steps and activities after the estimation of a transition matrix. The precise steps required depend on specific circumstances but might involve some of the following:
 5 | 
 6 | 
 7 | * "Fixing" a matrix by correcting deficiencies linked to data quality
 8 | * Obtaining the infinitesimal generator of a matrix, a powerful tool for further analysis
 9 | * Working with multi-period matrices
10 | * Visualizing transition datasets and transition frequencies
11 | 
12 | .. toctree::
13 |    :maxdepth: 2
14 | 
15 |    basic_operations
16 |    example_with_jlt
17 |    multi-period_transitions
18 |    visualization
19 | 


--------------------------------------------------------------------------------
/docs/source/state_spaces.rst:
--------------------------------------------------------------------------------
 1 | State Spaces
 2 | ==============================
 3 | 
 4 | A State Space is a fundamental concept in probability theory and computer science representing the possible configurations for a modelled system
 5 | 
 6 | The StateSpace object stores a state space structure as a List of tuples. The first two elements of each tuple contain the index (base-0) and label of the state space respectively.
 7 | 
 8 | Additional fields are reserved for further characterisation
 9 | 
10 | 
11 | Example: Map credit ratings between systems
12 | """"""""""""""""""""""""""""""""""""""""""""
13 | 
14 | * Script: state_space_operations.py
15 | 
16 | Example workflows for converting data from one credit rating system to another using an established mapping table
17 | 
18 | 


--------------------------------------------------------------------------------
/docs/source/federation.rst:
--------------------------------------------------------------------------------
 1 | Federation
 2 | ======================
 3 | 
 4 | Credit Rating Ontology
 5 | ----------------------
 6 | 
 7 | The Credit Ratings Ontology is a framework that aims to represent and categorize knowledge about Credit Rating Agencies and related data (Credit Ratings) using semantic web information technologies.
 8 | 
 9 | This is a new project, related resources can be found here:
10 | 
11 | * `Online documentation <https://www.openriskmanual.org/ns/cro/index-en.html>`_
12 | * `Blog post <https://www.openriskmanagement.com/risk-management-ontologies/>`_
13 | * `Course <https://www.openriskacademy.com/course/view.php?id=60>`_
14 | * `Repo with ontology usage examples <https://github.com/open-risk/Academy-Course-PYT26060>`_
15 | 
16 | .. note:: transitionMatrix functionality to federate semantically annotated credit data is planned


--------------------------------------------------------------------------------
/examples/python/README.md:
--------------------------------------------------------------------------------
 1 | Example Scripts
 2 | ===============
 3 | 
 4 | - [adjust_nr_state.py](adjust_nr_state.py)
 5 | - [credit_curves.py](credit_curves.py)
 6 | - [empirical_transition_matrix.py](empirical_transition_matrix.py)
 7 | - [fix_multiperiod_matrix.py](fix_multiperiod_matrix.py)
 8 | - [generate_full_multiperiod_set.py](generate_full_multiperiod_set.py)
 9 | - [generate_synthetic_data.py](generate_synthetic_data.py)
10 | - [generate_visuals.py](generate_visuals.py)
11 | - [matrix_from_cohort_data.py](matrix_from_cohort_data.py)
12 | - [matrix_from_duration_data.py](matrix_from_duration_data.py)
13 | - [matrix_lendingclub.py](matrix_lendingclub.py)
14 | - [matrix_operations.py](matrix_operations.py)
15 | - [matrix_set_lendingclub.py](matrix_set_lendingclub.py)
16 | - [matrix_set_operations.py](matrix_set_operations.py)
17 | - [state_space_operations.py](state_space_operations.py)
18 | 


--------------------------------------------------------------------------------
/transitionMatrix/estimators/kaplan_meier_estimator.py:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | # (c) 2017-2024 Open Risk, all rights reserved
 4 | #
 5 | # TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included
 6 | # in the source distribution of TransitionMatrix. This is notwithstanding any licenses of
 7 | # third-party software included in this distribution. You may not use this file except in
 8 | # compliance with the License.
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software distributed under
11 | # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
12 | # either express or implied. See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from __future__ import print_function
16 | 
17 | from transitionMatrix.estimators import DurationEstimator
18 | 
19 | 
20 | class KaplanMeierEstimator(DurationEstimator):
21 |     pass
22 | 


--------------------------------------------------------------------------------
/docs/source/transitionMatrix.creditratings.rst:
--------------------------------------------------------------------------------
 1 | Credit Ratings SubPackage
 2 | ============================
 3 | 
 4 | This subpackage collects credit rating specific functionality
 5 | 
 6 | 
 7 | transitionMatrix.creditratings.creditcurve module
 8 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 9 | 
10 | .. automodule:: transitionMatrix.creditratings.creditcurve
11 |     :members:
12 |     :undoc-members:
13 |     :show-inheritance:
14 | 
15 | 
16 | transitionMatrix.creditratings.creditsystems module
17 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
18 | 
19 | .. automodule:: transitionMatrix.creditratings.creditsystems
20 |     :members:
21 |     :undoc-members:
22 |     :show-inheritance:
23 | 
24 | 
25 | transitionMatrix.creditratings.predefined module
26 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
27 | 
28 | .. automodule:: transitionMatrix.creditratings.predefined
29 |     :members:
30 |     :undoc-members:
31 |     :show-inheritance:
32 | 


--------------------------------------------------------------------------------
/docs/source/data_generators.rst:
--------------------------------------------------------------------------------
 1 | Data Generators
 2 | ===================
 3 | 
 4 | The transitionMatrix distribution includes a number of data generators to support testing / training objectives.
 5 | 
 6 | * **exponential_transitions**: Generate continuous time events from exponential distribution and uniform sampling from state space. Suitable for testing cohorting algorithms and duration based estimators.
 7 | * **markov_chain**: Generate discrete events from a markov chain matrix in Compact data format. Suitable for testing cohort based estimators
 8 | * **long_format**: Generate continuous events from a markov chain matrix in Long data format. Suitable for testing duration based estimators
 9 | * **portfolio_lables**: Generate a collection of credit rating states emulating a snapshot of portfolio data. Suitable for mappings and transformations of credit rating states
10 | 
11 | 
12 | .. note:: Do not confuse *data generators* with *matrix generators*
13 | 
14 | Data Generation Examples
15 | -------------------------
16 | 
17 | All data data generation examples are in script examples/python/generate_synthetic_data.py
18 | 


--------------------------------------------------------------------------------
/docs/source/withdrawn_ratings.rst:
--------------------------------------------------------------------------------
 1 | Withdrawn Ratings
 2 | ========================
 3 | 
 4 | Withdrawn ratings are a common issue that needs to be handled in the context of estimating transition matrices. See `right censoring issues <https://www.openriskmanual.org/wiki/Withdrawn_Ratings>`_
 5 | 
 6 | Adjust NR (Not Rated) States
 7 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 8 | 
 9 | Adjusting for NR states can be done via the :meth:`transitionMatrix.model.TransitionMatrix.remove` method.
10 | 
11 | 
12 | Single Period Matrix
13 | """"""""""""""""""""""""""""
14 | Example of using transitionMatrix to adjust the (not-rated) NR state. Input data are the Standard and Poor's historical data (1981 - 2016) for corporate credit rating migrations. Example of handling
15 | 
16 | * Script: examples/python/adjust_nr_states.py
17 | 
18 | 
19 | Multi-period Matrix
20 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
21 | 
22 | * Script: examples/python/fix_multiperiod_matrix.py
23 | 
24 | Example of using transitionMatrix to detect and solve various pathologies that might be affecting transition matrix data
25 | 
26 | 
27 | 
28 | 
29 | 


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | # (c) 2017-2024 Open Risk, all rights reserved
 4 | #
 5 | # TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included
 6 | # in the source distribution of TransitionMatrix. This is notwithstanding any licenses of
 7 | # third-party software included in this distribution. You may not use this file except in
 8 | # compliance with the License.
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software distributed under
11 | # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
12 | # either express or implied. See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | """
17 | run the transitionMatrix test suite
18 | 
19 | """
20 | 
21 | import sys
22 | import unittest
23 | 
24 | from transitionMatrix import source_path
25 | 
26 | sys.path.append(source_path)
27 | 
28 | loader = unittest.TestLoader()
29 | start_dir = source_path + 'tests'
30 | suite = loader.discover(start_dir)
31 | 
32 | runner = unittest.TextTestRunner()
33 | runner.run(suite)
34 | 


--------------------------------------------------------------------------------
/docs/source/datasets.rst:
--------------------------------------------------------------------------------
 1 | Datasets
 2 | ===================
 3 | 
 4 | The transitionMatrix distribution includes a number of datasets to support testing / training objectives. Datasets come in two main types:
 5 | 
 6 | * State Transition Data (used in estimation). There are both dummy (synthetic) examples and some actual data. Transition data are usually in CSV format.
 7 | * Transition Matrices and Multi-period Sets of matrices (again both dummy and actual examples). Transition matrices are usually in JSON format.
 8 | 
 9 | State Transition Data
10 | -------------------------------------------
11 | 
12 | The scripts are located in examples/python. For testing purposes all examples can be run using the run_examples.py script located in the root directory. Some scripts have an example flag that selects alternative input data or estimators.
13 | 
14 | .. csv-table:: List of Transition Datasets
15 |    :header-rows: 1
16 |    :widths: 15 5 5 5 5 15 50
17 |    :file: ../../datasets/dataset_list.csv
18 | 
19 | 
20 | Transition Matrices
21 | --------------------------------------------
22 | 
23 | * generic_monthly
24 | * generic_multiperiod
25 | * JLT
26 | * sp 2017
27 | 
28 | 


--------------------------------------------------------------------------------
/datasets/JLT.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   [
 3 |     0.891,
 4 |     0.0963,
 5 |     0.0078,
 6 |     0.0019,
 7 |     0.003,
 8 |     0.0,
 9 |     0.0,
10 |     0.0
11 |   ],
12 |   [
13 |     0.0086,
14 |     0.901,
15 |     0.0747,
16 |     0.0099,
17 |     0.0029,
18 |     0.0029,
19 |     0.0,
20 |     0.0
21 |   ],
22 |   [
23 |     0.0009,
24 |     0.0291,
25 |     0.8894,
26 |     0.0649,
27 |     0.0101,
28 |     0.0045,
29 |     0.0,
30 |     0.0009
31 |   ],
32 |   [
33 |     0.0006,
34 |     0.0043,
35 |     0.0656,
36 |     0.8427,
37 |     0.0644,
38 |     0.016,
39 |     0.0018,
40 |     0.0045
41 |   ],
42 |   [
43 |     0.0004,
44 |     0.0022,
45 |     0.0079,
46 |     0.0719,
47 |     0.7764,
48 |     0.1043,
49 |     0.0127,
50 |     0.0241
51 |   ],
52 |   [
53 |     0.0,
54 |     0.0019,
55 |     0.0031,
56 |     0.0066,
57 |     0.0517,
58 |     0.8246,
59 |     0.0435,
60 |     0.0685
61 |   ],
62 |   [
63 |     0.0,
64 |     0.0,
65 |     0.0116,
66 |     0.0116,
67 |     0.0203,
68 |     0.0754,
69 |     0.6493,
70 |     0.2319
71 |   ],
72 |   [
73 |     0.0,
74 |     0.0,
75 |     0.0,
76 |     0.0,
77 |     0.0,
78 |     0.0,
79 |     0.0,
80 |     1.0
81 |   ]
82 | ]


--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | # Read the Docs configuration file for Sphinx projects
 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 3 | 
 4 | # Required
 5 | version: 2
 6 | 
 7 | # Set the OS, Python version and other tools you might need
 8 | build:
 9 |   os: ubuntu-22.04
10 |   tools:
11 |     python: "3.10"
12 |     # You can also specify other tool versions:
13 |     # nodejs: "20"
14 |     # rust: "1.70"
15 |     # golang: "1.20"
16 | 
17 | # Build documentation in the "docs/" directory with Sphinx
18 | sphinx:
19 |   configuration: docs/source/conf.py
20 |   # You can configure Sphinx to use a different builder, for instance use the dirhtml builder for simpler URLs
21 |   # builder: "dirhtml"
22 |   # Fail on all warnings to avoid broken references
23 |   # fail_on_warning: true
24 | 
25 | # Optionally build your docs in additional formats such as PDF and ePub
26 | # formats:
27 | #   - pdf
28 | #   - epub
29 | 
30 | # Optional but recommended, declare the Python requirements required
31 | # to build your documentation
32 | # See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
33 | python:
34 |    install:
35 |      - requirements: requirements-dev.txt
36 | 


--------------------------------------------------------------------------------
/docs/source/credit_curves.rst:
--------------------------------------------------------------------------------
 1 | Credit Curves
 2 | ========================
 3 | 
 4 | A Credit Curve denotes a grouping of credit risk metrics (parameters) that provide estimates that a legal entity experiences a Credit Event over different (an increasing sequence of longer) time periods. `See Credit Curves <https://www.openriskmanual.org/wiki/Category:Credit_Curve>`_
 5 | 
 6 | A multi-period matrix and a credit curve are closely related objects (under some circumstances the later can be thought of as a subset of the former). The transitionMatrix package offers the following main functionality concerning credit curves:
 7 | 
 8 | * The :class:`transitionMatrix.creditratings.creditcurve.CreditCurve` class for storing and working with credit curves
 9 | * The :meth:`transitionMatrix.model.TransitionMatrixSet.default_curves` transitionMatrixSet method that extracts from a matrix set the default curve
10 | 
11 | 
12 | Example: Calculate and Plot Credit Curves
13 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
14 | 
15 | Example of using transitionMatrix to calculate and visualize multi-period
16 | 
17 | * Script: examples/python/credit_curves.py
18 | 
19 | .. image:: ../../examples/credit_curves.png


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | .. transitionMatrix documentation master file
 2 | 
 3 | transitionMatrix Documentation
 4 | ===============================
 5 | 
 6 | .. image:: ../../examples/overview.png
 7 | 
 8 | transitionMatrix is a pure Python powered library for the statistical analysis and visualization of state transition phenomena. It can be used to analyze any dataset that captures *timestamped transitions in a discrete state space.*
 9 | 
10 | Use cases include applications in finance (for example credit rating transitions), IT (system state event logs) and more.
11 | 
12 | **NB: transitionMatrix is still in alpha release / active development. If you encounter issues please raise them in our github repository**
13 | 
14 | .. toctree::
15 |    :maxdepth: 2
16 |    :caption: Contents:
17 | 
18 |    description
19 |    getting_started
20 |    data_formats
21 |    datasets
22 |    preprocessing
23 |    credit_ratings
24 |    estimators
25 |    postprocessing
26 |    data_generators
27 |    federation
28 |    examples
29 |    modules
30 |    testing
31 |    roadmap
32 |    changelog
33 | 
34 | 
35 | Indexes and tables
36 | ==================
37 | 
38 | * :ref:`genindex`
39 | * :ref:`modindex`
40 | * :ref:`search`
41 | 


--------------------------------------------------------------------------------
/docs/source/transitionMatrix.rst:
--------------------------------------------------------------------------------
 1 | transitionMatrix Package
 2 | ============================
 3 | 
 4 | The core module
 5 | 
 6 | .. automodule:: transitionMatrix.model
 7 |     :noindex:
 8 | 
 9 | transitionMatrix Classes
10 | ------------------------------
11 | 
12 | TransitionMatrix
13 | ~~~~~~~~~~~~~~~~~~~
14 | 
15 | .. autoclass:: transitionMatrix.model.TransitionMatrix
16 |    :members:
17 | 
18 |    .. automethod:: __new__
19 | 
20 | TransitionMatrixSet
21 | ~~~~~~~~~~~~~~~~~~~
22 | 
23 | .. autoclass:: transitionMatrix.model.TransitionMatrixSet
24 |    :members:
25 | 
26 |    .. automethod:: __init__
27 | 
28 |    .. automethod:: __mul__
29 | 
30 | 
31 | EmpiricalTransitionMatrix
32 | ~~~~~~~~~~~~~~~~~~~~~~~~~~
33 | 
34 | .. todo:: This is future functionality
35 | 
36 | .. autoclass:: transitionMatrix.model.EmpiricalTransitionMatrix
37 |    :members:
38 | 
39 |    .. automethod:: __init__
40 | 
41 | 
42 | transitionMatrix Subpackages
43 | =============================
44 | 
45 | .. toctree::
46 | 
47 |     transitionMatrix.estimators
48 |     transitionMatrix.statespaces
49 |     transitionMatrix.creditratings
50 |     transitionMatrix.generators
51 |     transitionMatrix.visualization
52 |     transitionMatrix.utils
53 | 


--------------------------------------------------------------------------------
/docs/source/example_with_jlt.rst:
--------------------------------------------------------------------------------
 1 | Working with an actual matrix
 2 | ==============================
 3 | 
 4 | The core capability of transitionMatrix is to produce estimated matrices but getting a realistic example requires quite some work. In this section we assume we have estimated one.
 5 | 
 6 | Lets look at a realistic example from the JLT paper
 7 | 
 8 | .. code::
 9 | 
10 |     # Reproduce JLT Generator
11 |     # We load it using different sources
12 |     E = tm.TransitionMatrix(values=JLT)
13 |     E_2 = tm.TransitionMatrix(json_file=dataset_path + "JLT.json")
14 |     E_3 = tm.TransitionMatrix(csv_file=dataset_path + "JLT.csv")
15 |     # Lets check there are no errors
16 |     Error = E - E_3
17 |     print(np.linalg.norm(Error))
18 |     # Lets look at validation and generators"
19 |     # Empirical matrices will not satisfy constraints exactly
20 |     print(E.validate(accuracy=1e-3))
21 |     print(E.characterize())
22 |     print(E.generator())
23 |     Error = E - expm(E.generator())
24 |     # Frobenious norm
25 |     print(np.linalg.norm(Error))
26 |     # L1 norm
27 |     print(np.linalg.norm(Error, 1))
28 |     # Use pandas style API for saving to files
29 |     E.to_csv("JLT.csv")
30 |     E.to_json("JLT.json")
31 | 
32 | 


--------------------------------------------------------------------------------
/docs/source/cohort_estimator.rst:
--------------------------------------------------------------------------------
 1 | Cohort Estimator
 2 | ========================
 3 | 
 4 | A cohort estimator (more accurately a discrete-time estimator) is class of estimators of multi-state transitions that is a simpler alternative to Duration type estimators
 5 | 
 6 | 
 7 | Estimate a Transition Matrix from Cohort Data
 8 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 9 | 
10 | Example workflows using transitionMatrix to estimate a transition matrix from data that are already grouped in cohorts
11 | 
12 | * Script: examples/python/matrix_from_cohort_data.py
13 | * Example ID: 3
14 | 
15 | 
16 | .. code::
17 | 
18 |     data = pd.read_csv(dataset_path + 'synthetic_data6.csv', dtype={'State': str})
19 |     sorted_data = data.sort_values(['ID', 'Timestep'], ascending=[True, True])
20 |     myState = tm.StateSpace()
21 |     myState.generic(2)
22 |     print(myState.validate_dataset(dataset=sorted_data))
23 |     myEstimator = es.CohortEstimator(states=myState, ci={'method': 'goodman', 'alpha': 0.05})
24 |     result = myEstimator.fit(sorted_data)
25 |     myMatrixSet = tm.TransitionMatrixSet(values=result, temporal_type='Incremental')
26 | 
27 |     myEstimator.print(select='Counts', period=0)
28 |     myEstimator.print(select='Frequencies', period=18)


--------------------------------------------------------------------------------
/docs/source/transitionMatrix.estimators.rst:
--------------------------------------------------------------------------------
 1 | Estimators SubPackage
 2 | ======================================
 3 | 
 4 | This subpackage implements the various estimators
 5 | 
 6 | transitionMatrix.estimators.simple\_estimator module
 7 | ----------------------------------------------------
 8 | 
 9 | .. automodule:: transitionMatrix.estimators.simple_estimator
10 |     :members:
11 |     :undoc-members:
12 |     :show-inheritance:
13 | 
14 | 
15 | transitionMatrix.estimators.cohort\_estimator module
16 | ----------------------------------------------------
17 | 
18 | .. automodule:: transitionMatrix.estimators.cohort_estimator
19 |     :members:
20 |     :undoc-members:
21 |     :show-inheritance:
22 | 
23 | 
24 | transitionMatrix.estimators.aalen\_johansen\_estimator module
25 | -------------------------------------------------------------
26 | 
27 | .. automodule:: transitionMatrix.estimators.aalen_johansen_estimator
28 |     :members:
29 |     :undoc-members:
30 |     :show-inheritance:
31 | 
32 | transitionMatrix.estimators.kaplan\_meier\_estimator module
33 | -----------------------------------------------------------
34 | 
35 | .. todo:: This is future functionality
36 | 
37 | .. automodule:: transitionMatrix.estimators.kaplan_meier_estimator
38 |     :members:
39 |     :undoc-members:
40 |     :show-inheritance:
41 | 
42 | 


--------------------------------------------------------------------------------
/datasets/dataset_list.csv:
--------------------------------------------------------------------------------
 1 | File,Format,Events,Entities,States,Generator,Description
 2 | rating_data_raw.csv,Compact,4000,1829,9,Extract,A typical credit rating dataset
 3 | rating_data.csv,Compact,3780,1642,9,Data cleaning script,A typical credit rating dataset
 4 | scenario_data.csv,Compact,550,50,5,,
 5 | synthetic_data.csv,Compact,100,10,2,,
 6 | synthetic_data1.csv,Compact,100,1,4,Generator(=1),DURATION TYPE DATASETS (Compact format)
 7 | synthetic_data2.csv,Compact,10000,1000,2,Generator(=2),DURATION TYPE DATASETS (Compact format)
 8 | synthetic_data3.csv,Compact,2000,100,7,Generator(=3),DURATION TYPE DATASETS (Compact format)
 9 | synthetic_data4.csv,Compact,10000,1000,8,Generator(=4),Cohort type dataset (Generic Rating Matrix). Offers a semi-realistic example
10 | synthetic_data5.csv,Compact,50000,10000,3,Generator(=5),Large cohort type dataset useful for testing convergence
11 | synthetic_data6.csv,Compact,20000,1000,2,Generator(=6),COHORT TYPE DATASETS
12 | synthetic_data7.csv,Canonical,1295,1000,8,Generator(=7),Duration type datasets in Long Format
13 | synthetic_data8.csv,Canonical,10000,10000,2,Generator(=8),Duration type datasets in Long Format
14 | synthetic_data9.csv,Canonical,1338,1000,8,Generator(=9),Duration type datasets in Long Format
15 | synthetic_data10.csv,Canonical,12000,2000,9,Generator(=10),Credit Rating Migrations in Long Format / Compact Form
16 | test.csv,Compact,14,7,3,,
17 | 


--------------------------------------------------------------------------------
/tests/test_datasets.py:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | # (c) 2017-2024 Open Risk, all rights reserved
 4 | #
 5 | # TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included
 6 | # in the source distribution of TransitionMatrix. This is notwithstanding any licenses of
 7 | # third-party software included in this distribution. You may not use this file except in
 8 | # compliance with the License.
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software distributed under
11 | # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
12 | # either express or implied. See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | import unittest
17 | 
18 | import transitionMatrix as tm
19 | from transitionMatrix import dataset_path
20 | from transitionMatrix.creditratings.predefined import Minimal
21 | 
22 | ACCURATE_DIGITS = 7
23 | 
24 | 
25 | class TestDatasets(unittest.TestCase):
26 |     '''
27 |     Load in-memory matrices
28 |     '''
29 | 
30 |     def test_minimal_matrix(self):
31 |         a = tm.TransitionMatrix(values=Minimal)
32 |         a.validate()
33 |         self.assertEqual(a.dimension, 3)
34 | 
35 |     def test_matrix_set_load_csv(self):
36 |         a = tm.TransitionMatrixSet(csv_file=dataset_path + "sp_1981-2016.csv", temporal_type='Cumulative')
37 |         a.validate()
38 |         self.assertEqual(a.periods, [1, 2, 3, 5, 7, 10, 15, 20])
39 | 
40 | 
41 | if __name__ == "__main__":
42 |     unittest.main()
43 | 


--------------------------------------------------------------------------------
/docs/source/multi-period_transitions.rst:
--------------------------------------------------------------------------------
 1 | Multi-Period Transitions
 2 | ========================
 3 | 
 4 | Th transitionMatrix package adopts a *multi-period paradigm* that is more general than a Markov-Chain framework that imposes the Markov assumption over successive periods. In this direction, the **TransitionMatrixSet object** stores a family of TransitionMatrix objects as a time ordered list. Besides basic storage this structure allows a variety of simultaneous operations on the collection of related matrices
 5 | 
 6 | There are two basic representations of the a multi-period set of transitions:
 7 | 
 8 | - The first (*cumulative form*) is the most fundamental. Each successive (k-th) element stores transition rates from an initial time to timepoint k. This could be for example the input of an empirical transition matrix dataset
 9 | - In the second (*incremental form*) successive elements store transition rates from timepoint k-1 to timepoint k.
10 | 
11 | The TransitionMatrixSet class allows converting between the two representations
12 | 
13 | 
14 | Matrix *Set* Operations
15 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
16 | 
17 | * Script: matrix_set_operations.py
18 | 
19 | Contains examples using transitionMatrix to perform various transition matrix **set** operations (Multi-period measurement context)
20 | 
21 | 
22 | Default Curves
23 | --------------
24 | 
25 | Absorbing states (in credit risk context a borrower default) are particularly important therefore some specific functionality to isolate the corresponding default rate *curve*. (See Also the CreditCurve object)
26 | 
27 | 


--------------------------------------------------------------------------------
/docs/source/examples.rst:
--------------------------------------------------------------------------------
 1 | Usage Examples
 2 | ======================
 3 | 
 4 | The examples directory includes both **standalone python scripts** and **jupyter notebooks** to help you get started. (NB: Currently there are more scripts than notebooks).
 5 | 
 6 | A selection of topics covered:
 7 | 
 8 | - Generating transition matrices from data (using various estimators)
 9 | - Manipulating transition matrices
10 | - Computing and visualizing credit curves corresponding to a set of transition matrices
11 | - Mapping rating states between different rating systems
12 | 
13 | Python Scripts
14 | -------------------------------------------
15 | 
16 | The scripts are located in examples/python. For testing purposes all examples can be run using the run_examples.py script located in the root directory. Some scripts have an example flag that selects alternative input data or estimators.
17 | 
18 | .. csv-table:: List of Example Scripts
19 |    :header-rows: 1
20 |    :widths: 20 5 20 55
21 |    :file: ../../examples/python/example_list.csv
22 | 
23 | 
24 | Jupyter Notebooks
25 | -------------------------------------------
26 | 
27 | * Adjust_NotRated_State.ipynb
28 | * Matrix_Operations.ipynb
29 | * Monthly_from_Annual.ipynb
30 | 
31 | Open Risk Academy Scripts
32 | -------------------------------------------
33 | 
34 | Additional examples are available in the Open Risk Academy course `Analysis of Credit Migration using Python TransitionMatrix <https://www.openriskacademy.com/course/management.php?categoryid=26&courseid=38>`_. The scripts developed in the course are `available here <https://github.com/open-risk/Academy-Course-PYT26038>`_
35 | 


--------------------------------------------------------------------------------
/tests/test_utils.py:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | # (c) 2017-2024 Open Risk, all rights reserved
 4 | #
 5 | # TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included
 6 | # in the source distribution of TransitionMatrix. This is notwithstanding any licenses of
 7 | # third-party software included in this distribution. You may not use this file except in
 8 | # compliance with the License.
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software distributed under
11 | # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
12 | # either express or implied. See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import unittest
16 | 
17 | import pandas as pd
18 | 
19 | import transitionMatrix as tm
20 | from transitionMatrix import source_path
21 | 
22 | ACCURATE_DIGITS = 7
23 | 
24 | 
25 | class TestPreprocessing(unittest.TestCase):
26 | 
27 |     def test_bin_timestamps(self):
28 |         """ Check that grouping events in cohorts does not miss any events"""
29 | 
30 |         dataset_path = source_path + "datasets/"
31 |         data = pd.read_csv(dataset_path + 'synthetic_data1.csv')
32 |         event_count = data['ID'].count()  # the raw event count from the input data
33 |         cohort_data, cohort_intervals = tm.utils.bin_timestamps(data, cohorts=5, remove_stale=False)
34 |         cohort_data['Count'] = cohort_data['Count'].astype(int)  # count of events in cohorted format
35 |         self.assertEqual(event_count, cohort_data['Count'].sum())
36 | 
37 | 
38 | class TestDataSetGenerators(unittest.TestCase):
39 |     pass
40 | 
41 | 
42 | if __name__ == "__main__":
43 |     unittest.main()
44 | 


--------------------------------------------------------------------------------
/transitionMatrix/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | # (c) 2017-2024 Open Risk, all rights reserved
 4 | #
 5 | # TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included
 6 | # in the source distribution of TransitionMatrix. This is notwithstanding any licenses of
 7 | # third-party software included in this distribution. You may not use this file except in
 8 | # compliance with the License.
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software distributed under
11 | # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
12 | # either express or implied. See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """ This module contains various helper classes and functions that do not fit
16 | into any of the main modules of the library
17 | 
18 | """
19 | 
20 | from __future__ import print_function, division
21 | 
22 | from .preprocessing import *
23 | from .converters import *
24 | 
25 | 
26 | def print_matrix(A, format_type='Standard', accuracy=2):
27 |     """ Pretty print a matrix
28 | 
29 |     :param format_type: formatting options (Standard, Percent)
30 |     :type format_type: str
31 |     :param accuracy: number of decimals to display
32 |     :type accuracy: int
33 | 
34 |     """
35 |     for s_in in range(A.shape[0]):
36 |         for s_out in range(A.shape[1]):
37 |             if format_type == 'Standard':
38 |                 format_string = "{0:." + str(accuracy) + "f}"
39 |                 print(format_string.format(A[s_in, s_out]) + ' ', end='')
40 |             elif format_type == 'Percent':
41 |                 print("{0:.2f}%".format(100 * A[s_in, s_out]) + ' ', end='')
42 |         print('')
43 |     print('')
44 | 


--------------------------------------------------------------------------------
/examples/python/characterize_datasets.py:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | # (c) 2017-2024 Open Risk (https://www.openriskmanagement.com)
 4 | #
 5 | # TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included
 6 | # in the source distribution of TransitionMatrix. This is notwithstanding any licenses of
 7 | # third-party software included in this distribution. You may not use this file except in
 8 | # compliance with the License.
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software distributed under
11 | # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
12 | # either express or implied. See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | """ Characterize datasets (Summary statistics etc)
17 | 
18 | """
19 | 
20 | import pprint as pp
21 | 
22 | import pandas as pd
23 | 
24 | from transitionMatrix import source_path
25 | from transitionMatrix.utils import transitions_summary
26 | 
27 | dataset_path = source_path + "datasets/"
28 | 
29 | dataset_list = [
30 |     'rating_data_raw.csv',
31 |     'rating_data.csv',
32 |     'scenario_data.csv',
33 |     'synthetic_data.csv',
34 |     'synthetic_data1.csv',
35 |     'synthetic_data2.csv',
36 |     'synthetic_data3.csv',
37 |     'synthetic_data4.csv',
38 |     'synthetic_data5.csv',
39 |     'synthetic_data6.csv',
40 |     'synthetic_data7.csv',
41 |     'synthetic_data8.csv',
42 |     'synthetic_data9.csv',
43 |     'test.csv'
44 | ]
45 | 
46 | for dataset in dataset_list:
47 |     input_data = pd.read_csv('../../datasets/' + dataset)
48 |     print(dataset)
49 |     pp.pprint(transitions_summary(input_data))
50 |     print(80 * '-')
51 | 
52 | 
53 | def main():
54 |     print("Done")
55 | 
56 | 
57 | if __name__ == "__main__":
58 |     main()
59 | 


--------------------------------------------------------------------------------
/docs/source/visualization.rst:
--------------------------------------------------------------------------------
 1 | Visualization
 2 | ===============
 3 | 
 4 | transitionMatrix aims to support native (Python-based) visualization of various transition related datasets using matplotlib and other native python visualization libraries.
 5 | 
 6 | .. note:: The visualization functionality is not yet refactored into a reusable API. For now the visualization functionality is implemented separately as a demo script.
 7 | 
 8 | 
 9 | Visualization Examples
10 | ----------------------
11 | 
12 | Example workflows using transitionMatrix to generate visualizations of migration phenomena
13 | 
14 | * Script: examples/python/generate_visuals.py
15 | 
16 | Example 1
17 | """"""""""""""""""""""""""""
18 | Plotting the state space trajectory of a single entity
19 | 
20 | .. image:: ../../examples/single_entity.png
21 | 
22 | Example 2
23 | """"""""""""""""""""""""""""
24 | Plotting the state space trajectory of multiple entities
25 | 
26 | .. image:: ../../examples/sampled_histories.png
27 | 
28 | Example 3
29 | """"""""""""""""""""""""""""
30 | Histogram plot of transition frequencies
31 | 
32 | .. image:: ../../examples/estimation.png
33 | 
34 | Example 4
35 | """"""""""""""""""""""""""""
36 | Colored scatterplot of entity transitions over time
37 | 
38 | .. image:: ../../examples/scatterplot.png
39 | 
40 | Example 5
41 | """"""""""""""""""""""""""""
42 | Colored scatterplot of entity transitions over time (alternative form)
43 | 
44 | .. image:: ../../examples/scatterplot2.png
45 | 
46 | Example 6
47 | """"""""""""""""""""""""""""
48 | Visualize a transition matrix using Hinton-style visual
49 | 
50 | .. image:: ../../examples/TransitionMatrix.png
51 | 
52 | Example 7
53 | """"""""""""""""""""""""""""
54 | Visualize a transition matrix using a sankey visual (a logarithmic adaptation that is useful for qualitative insight)
55 | 
56 | .. image:: ../../examples/sankey.png
57 | 


--------------------------------------------------------------------------------
/run_examples.py:
--------------------------------------------------------------------------------
 1 | # (c) 2017-2024 Open Risk, all rights reserved
 2 | #
 3 | # TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included
 4 | # in the source distribution of TransitionMatrix. This is notwithstanding any licenses of
 5 | # third-party software included in this distribution. You may not use this file except in
 6 | # compliance with the License.
 7 | #
 8 | # Unless required by applicable law or agreed to in writing, software distributed under
 9 | # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
10 | # either express or implied. See the License for the specific language governing permissions and
11 | # limitations under the License.
12 | 
13 | 
14 | """ Run all examples for a high-level test that everything is working with the current version of the library
15 | 
16 | """
17 | 
18 | import os
19 | 
20 | examples_path = os.path.join("examples", "python")
21 | filelist = ['adjust_nr_state', 'credit_curves',
22 |             'empirical_transition_matrix', 'fix_multiperiod_matrix', 'generate_synthetic_data',
23 |             'generate_visuals', 'matrix_from_cohort_data', 'matrix_operations', 'matrix_set_operations']
24 | 
25 | # TODO additional examples
26 | # 'matrix_from_duration_data', 'matrix_lendingclub', 'matrix_set_lendingclub',
27 | 
28 | if __name__ == '__main__':
29 | 
30 |     for example in filelist:
31 |         try:
32 |             print('\nExecuting example file: ', example.upper())
33 |             print('-----------------------' + '-' * len(example))
34 |             path = os.path.join(examples_path, example + ".py")
35 |             exec(open(path).read())
36 |         except:
37 |             print('**********************' + '*' * len(example))
38 |             print('ERROR in example file', example)
39 |             print('**********************' + '*' * len(example))
40 |             pass
41 | 


--------------------------------------------------------------------------------
/docs/source/testing.rst:
--------------------------------------------------------------------------------
 1 | Testing
 2 | ==================
 3 | 
 4 | Testing transitionMatrix has two major components:
 5 | 
 6 | * normal code testing aiming to certify the correctness of code execution
 7 | * algorithm testing aiming to validate the correctness of algorithmic implementation
 8 | 
 9 | .. note:: In general algorithmic testing is not as precise as code testing and may be more subject to uncertainties such as numerical accuracy. To make those tests as revealing as possible transitionMatrix implements a number of standardized *round-trip tests*:
10 | 
11 |   * starting with a matrix
12 |   * generating compatible data
13 |   * estimate a matrix from the data
14 |   * comparing the values of input and estimated matrices
15 | 
16 | Running all the examples
17 | ------------------------
18 | Running all the examples is a quick way to check that everything is installed properly, all paths are defined etc. At the root of the distribution:
19 | 
20 | .. code:: bash
21 | 
22 |     python3 run_examples.py
23 | 
24 | 
25 | The file simply iterates and executes a standalone list of :ref:`Usage Examples`.
26 | 
27 | .. code:: python
28 | 
29 |     filelist = ['adjust_nr_state', 'credit_curves', 'empirical_transition_matrix', 'fix_multiperiod_matrix', 'generate_synthetic_data', 'generate_visuals', 'matrix_from_cohort_data', 'matrix_from_duration_data', 'matrix_lendingclub', 'matrix_set_lendingclub', 'matrix_operations', 'matrix_set_operations']
30 | 
31 | .. warning:: The script might generate a number of files / images at random places within the distribution
32 | 
33 | 
34 | Test Suite
35 | -------------
36 | The testing framework is based on unittest. Before you get started and depending on how you obtained / installed the library check:
37 | 
38 | - If required adjust the source directory path in transitionMatrix/__init__
39 | - Unzip the data files in the datasets directory
40 | 
41 | Then run all tests
42 | 
43 | .. code:: bash
44 | 
45 |     python3 test.py
46 | 
47 | For an individual test:
48 | 
49 | .. code:: bash
50 | 
51 |     pytest tests/test_TESTNAME.py
52 | 
53 | 
54 | 


--------------------------------------------------------------------------------
/examples/python/deterministic_paths.py:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | # (c) 2017-2024 Open Risk, all rights reserved
 4 | #
 5 | # TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included
 6 | # in the source distribution of TransitionMatrix. This is notwithstanding any licenses of
 7 | # third-party software included in this distribution. You may not use this file except in
 8 | # compliance with the License.
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software distributed under
11 | # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
12 | # either express or implied. See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | """
17 | Create deterministic transitions
18 | 
19 | """
20 | 
21 | import pandas as pd
22 | 
23 | import transitionMatrix as tm
24 | from transitionMatrix.estimators import cohort_estimator as es
25 | from transitionMatrix.generators import dataset_generators
26 | from transitionMatrix.utils.converters import datetime_to_float, to_compact
27 | 
28 | sequences = [[(0.0, 0), (0.5, 1), (1.0, 2)],
29 |              [(0.0, 1), (0.3, 0), (0.8, 1)],
30 |              [(0.0, 2), (0.2, 1), (0.7, 2)]]
31 | 
32 | replication_count = 10
33 | 
34 | definition = [('0', "A"), ('1', "B"), ('2', "C")]
35 | myState = tm.StateSpace(definition)
36 | 
37 | # myState = tm.StateSpace(definition)
38 | input_data = dataset_generators.deterministic(sequences, replication_count)
39 | print(input_data)
40 | sorted_data = input_data.sort_values(['ID', 'Time'], ascending=[True, True])
41 | cohort_data, cohort_bounds = tm.utils.bin_timestamps(sorted_data, cohorts=100)
42 | print(80*'=')
43 | print(cohort_data)
44 | myEstimator = es.CohortEstimator(states=myState, cohort_bounds=cohort_bounds, ci={'method': 'goodman', 'alpha': 0.05})
45 | result = myEstimator.fit(cohort_data, labels={'Time': 'Time', 'State': 'State', 'ID': 'ID'})
46 | myMatrix = tm.TransitionMatrix(myEstimator.average_matrix)
47 | myEstimator.print(select='Counts')
48 | myMatrix.print_matrix(accuracy=3)


--------------------------------------------------------------------------------
/tests/test_duration_estimator.py:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | # (c) 2017-2024 Open Risk, all rights reserved
 4 | #
 5 | # TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included
 6 | # in the source distribution of TransitionMatrix. This is notwithstanding any licenses of
 7 | # third-party software included in this distribution. You may not use this file except in
 8 | # compliance with the License.
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software distributed under
11 | # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
12 | # either express or implied. See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import unittest
16 | 
17 | import pandas as pd
18 | 
19 | import transitionMatrix as tm
20 | from transitionMatrix import source_path
21 | from transitionMatrix.estimators import aalen_johansen_estimator as aj
22 | 
23 | ACCURATE_DIGITS = 2
24 | 
25 | 
26 | class TestAalenJohansenEstimator(unittest.TestCase):
27 |     """
28 |     Test the estimation of a simple 2x2 transition matrix with absorbing state
29 | 
30 |     .. note: The result is subject to sampling error! Ensure the required accuracy corresponds to the input data size
31 | 
32 |     """
33 | 
34 |     def test_aalenjohansen_simple_transitions(self):
35 |         dataset_path = source_path + "datasets/"
36 |         data = pd.read_csv(dataset_path + 'synthetic_data8.csv')
37 |         sorted_data = data.sort_values(['Time', 'ID'], ascending=[True, True])
38 |         definition = [('0', "G"), ('1', "B")]
39 |         myState = tm.StateSpace(definition)
40 |         myEstimator = aj.AalenJohansenEstimator(states=myState)
41 |         labels = {'Time': 'Time', 'From': 'From', 'To': 'To', 'ID': 'ID'}
42 |         result, times = myEstimator.fit(sorted_data, labels=labels)
43 |         self.assertAlmostEqual(result[0, 0, -1], 0.5, places=ACCURATE_DIGITS, msg=None, delta=None)
44 |         self.assertAlmostEqual(result[0, 1, -1], 0.5, places=ACCURATE_DIGITS, msg=None, delta=None)
45 |         self.assertEqual(result[1, 0, -1], 0.0)
46 |         self.assertEqual(result[1, 1, -1], 1.0)
47 | 


--------------------------------------------------------------------------------
/tests/test_nr_transform.py:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | # (c) 2017-2024 Open Risk, all rights reserved
 4 | #
 5 | # TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included
 6 | # in the source distribution of TransitionMatrix. This is notwithstanding any licenses of
 7 | # third-party software included in this distribution. You may not use this file except in
 8 | # compliance with the License.
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software distributed under
11 | # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
12 | # either express or implied. See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import unittest
16 | import logging
17 | import sys
18 | 
19 | import transitionMatrix as tm
20 | from transitionMatrix import dataset_path
21 | from transitionMatrix.model import TransitionMatrix
22 | from transitionMatrix.creditratings.predefined import SP02, SP02NR
23 | 
24 | # ACCURATE_DIGITS = 7
25 | ACCURATE_DIGITS = 2
26 | 
27 | 
28 | class TestNRTransform(unittest.TestCase):
29 |     """
30 |     1. Load in-memory matrices
31 |     2. Perform NR transformation
32 |     3. Test with S&P Result
33 | 
34 |     .. todo:: SnP result rounding seems large
35 | 
36 |     """
37 | 
38 |     def test_nr_matrix_load(self):
39 |         a = TransitionMatrix(values=SP02NR)
40 |         # messages = a.validate()
41 |         # log = logging.getLogger("Test.testNR")
42 |         # log.debug("messages= %r", messages)
43 |         # self.assertTrue(messages)
44 |         self.assertEqual(a.shape[0], a.shape[1])
45 |         self.assertEqual(a.dimension, 9)
46 | 
47 |     def test_nr_remove(self):
48 |         a = TransitionMatrix(values=SP02NR)
49 |         b = TransitionMatrix(values=SP02)
50 |         a = 0.01 * a
51 |         b = 0.01 * b
52 |         a = a.remove(8, method='noninform')
53 |         for i in range(a.dimension):
54 |             for j in range(a.dimension):
55 |                 self.assertAlmostEqual(a[i, j], b[i, j], places=ACCURATE_DIGITS)
56 | 
57 | 
58 | if __name__ == "__main__":
59 |     logging.basicConfig(stream=sys.stderr)
60 |     logging.getLogger("Test.testNR").setLevel(logging.DEBUG)
61 |     unittest.main()
62 | 


--------------------------------------------------------------------------------
/examples/python/state_space_operations.py:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | # (c) 2017-2024 Open Risk, all rights reserved
 4 | #
 5 | # TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included
 6 | # in the source distribution of TransitionMatrix. This is notwithstanding any licenses of
 7 | # third-party software included in this distribution. You may not use this file except in
 8 | # compliance with the License.
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software distributed under
11 | # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
12 | # either express or implied. See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | """
17 | Examples using transitionMatrix to perform various state space operations
18 | 
19 | """
20 | 
21 | from transitionMatrix.creditratings.creditsystems import SnP_Fitch2Moodys, Moodys2DBRS, SnP_SS
22 | from transitionMatrix.generators import dataset_generators as dg
23 | 
24 | print("Some Basics")
25 | print(80 * "=")
26 | # Let us load a credit rating scale
27 | myState = SnP_SS
28 | # Print the states
29 | print("The States of our starting scale: ", myState.get_states())
30 | # Print the state labels
31 | print("The State Labels: ", myState.get_state_labels())
32 | # Print the complete definition
33 | print("The Full Description: ", myState.definition)
34 | 
35 | # Convert SnP ratings to Moody's and DBRS
36 | # Escape R (regulatory default) and SD (selective default)
37 | print("")
38 | print("Convert labels to other rating scales scales")
39 | print(80 * "=")
40 | for state in myState.get_state_labels():
41 |     if state not in ['R', 'SD/D']:
42 |         print(state, ' ----> (', SnP_Fitch2Moodys[state], Moodys2DBRS[SnP_Fitch2Moodys[state]], ')')
43 | 
44 | print("")
45 | print("Convert data to other scales")
46 | print(80 * "=")
47 | print("Input S&P Labels: ")
48 | # Generate some portfolio data and map to CQS
49 | portfolio = dg.portfolio_labels(myState, 100)
50 | print(portfolio)
51 | print("")
52 | print("Output CQS Labels: ")
53 | mapped_portfolio = []
54 | for label in portfolio:
55 |     mapped_portfolio.append(myState.cqs_map(label))
56 | print(mapped_portfolio)
57 | 
58 | 
59 | def main():
60 |     print("Done")
61 | 
62 | 
63 | if __name__ == "__main__":
64 |     main()
65 | 


--------------------------------------------------------------------------------
/examples/python/compare_estimators.py:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | # (c) 2017-2024 Open Risk, all rights reserved
 4 | #
 5 | # TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included
 6 | # in the source distribution of TransitionMatrix. This is notwithstanding any licenses of
 7 | # third-party software included in this distribution. You may not use this file except in
 8 | # compliance with the License.
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software distributed under
11 | # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
12 | # either express or implied. See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | """
17 | Example workflows using transitionMatrix to estimate a matrix from duration type data
18 | Cohort type dataset (Generic Rating Matrix). Offers a semi-realistic example
19 | 
20 | """
21 | 
22 | import pandas as pd
23 | 
24 | import transitionMatrix as tm
25 | from transitionMatrix import source_path
26 | from transitionMatrix.estimators.aalen_johansen_estimator import AalenJohansenEstimator
27 | from transitionMatrix.estimators.cohort_estimator import CohortEstimator
28 | from transitionMatrix.statespaces.statespace import StateSpace
29 | from transitionMatrix.utils.converters import to_canonical
30 | from transitionMatrix.utils.preprocessing import unique_timestamps
31 | 
32 | dataset_path = source_path + "datasets/"
33 | data = pd.read_csv(dataset_path + 'synthetic_data4.csv', dtype={'State': str})
34 | myState = StateSpace(transition_data=data)
35 | cohort_bounds = unique_timestamps(data)
36 | 
37 | # Estimate matrices using the Cohort estimator
38 | myEstimator = CohortEstimator(states=myState, cohort_bounds=cohort_bounds, ci={'method': 'goodman', 'alpha': 0.05})
39 | result = myEstimator.fit(data)
40 | myMatrixSet = tm.TransitionMatrixSet(values=result, temporal_type='Incremental')
41 | myMatrixSet.cumulate()
42 | myMatrixSet.print_matrix(period=8)
43 | 
44 | # Estimate matrices using the Aalen-Johansen estimator
45 | canonical_data = to_canonical(data)
46 | myEstimator2 = AalenJohansenEstimator(states=myState)
47 | etm, times = myEstimator2.fit(canonical_data)
48 | myMatrix2 = tm.TransitionMatrix(etm[:, :, -1])
49 | print('Cumulative Empirical Matrix')
50 | myMatrix2.print_matrix()
51 | 
52 | 
53 | def main():
54 |     print("Done")
55 | 
56 | 
57 | if __name__ == "__main__":
58 |     main()
59 | 


--------------------------------------------------------------------------------
/examples/python/adjust_nr_state.py:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | # (c) 2017-2024 Open Risk (https://www.openriskmanagement.com)
 4 | #
 5 | # TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included
 6 | # in the source distribution of TransitionMatrix. This is notwithstanding any licenses of
 7 | # third-party software included in this distribution. You may not use this file except in
 8 | # compliance with the License.
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software distributed under
11 | # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
12 | # either express or implied. See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | """ Examples of using transitionMatrix to adjust the NR (not-rated) statistics.
17 | 
18 | Input data are the Standard and Poor's historical data (1981 - 2016) for corporate credit rating migrations
19 | 
20 | """
21 | 
22 | import transitionMatrix as tm
23 | from transitionMatrix import source_path
24 | from transitionMatrix.creditratings.predefined import SP02, SP02NR
25 | from transitionMatrix.utils import print_matrix
26 | 
27 | dataset_path = source_path + "datasets/"
28 | 
29 | example = 1
30 | 
31 | if example == 1:
32 |     a = tm.TransitionMatrix(values=SP02NR)
33 |     b = tm.TransitionMatrix(values=SP02)
34 |     a = 0.01 * a
35 |     b = 0.01 * b
36 |     a = a.remove(8, method='noninform')
37 |     print_matrix(a, format_type='Standard', accuracy=5)
38 |     print_matrix(b, format_type='Standard', accuracy=5)
39 | 
40 | 
41 | elif example == 2:
42 | 
43 |     print("> Load multi-period transitional matrices (cumulative mode) from json file")
44 |     SnP_Set0 = tm.TransitionMatrixSet(json_file=dataset_path + "sp_1981-2016.json", temporal_type='Cumulative')
45 |     print("> Valid Input Matrix? ", SnP_Set0.validate())
46 | 
47 |     print("> Remove NR transitions and redistribute to other states")
48 |     SnP_Set1 = SnP_Set0.remove(8, "noninform")
49 |     print("> Valid Output Matrix? ", SnP_Set1.validate())
50 | 
51 |     #
52 |     # Hurrah, we have an NR adjusted matrix set. Lets save it
53 |     #
54 |     SnP_Set1.to_json(dataset_path + 'sp_NR_adjusted.json', accuracy=5)
55 | 
56 |     # Compare before / after
57 |     SnP_Set0.print_matrix(period=2)
58 |     SnP_Set1.print_matrix(period=2)
59 | 
60 | 
61 | def main():
62 |     print("Done")
63 | 
64 | 
65 | if __name__ == "__main__":
66 |     main()
67 | 


--------------------------------------------------------------------------------
/examples/python/matrix_set_operations.py:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | # (c) 2017-2024 Open Risk, all rights reserved
 4 | #
 5 | # TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included
 6 | # in the source distribution of TransitionMatrix. This is notwithstanding any licenses of
 7 | # third-party software included in this distribution. You may not use this file except in
 8 | # compliance with the License.
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software distributed under
11 | # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
12 | # either express or implied. See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | """ Examples using transitionMatrix to perform operations with transition matrix sets sequences
17 | 
18 | """
19 | 
20 | import transitionMatrix as tm
21 | from transitionMatrix.creditratings.predefined import Generic as T1
22 | 
23 | print("-- Lets seed the set with a 3x3 matrix")
24 | A = tm.TransitionMatrix(values=[[0.6, 0.2, 0.2], [0.2, 0.6, 0.2], [0.2, 0.2, 0.6]])
25 | print(A)
26 | 
27 | print("-- Identical future period transitions in incremental mode")
28 | A_Set = tm.TransitionMatrixSet(values=A, periods=3, method='Copy', temporal_type='Incremental')
29 | print(A_Set.entries)
30 | 
31 | print("-- Identical future period transitions in cumulative mode using the power method")
32 | B_Set = tm.TransitionMatrixSet(values=A, periods=3, method='Power', temporal_type='Cumulative')
33 | print(B_Set.entries)
34 | 
35 | print("-- Lets instantiate the set directly using a list of matrices")
36 | C_Vals = [[[0.75, 0.25], [0.0, 1.0]], [[0.75, 0.25], [0.0, 1.0]]]
37 | C_Set = tm.TransitionMatrixSet(values=C_Vals, temporal_type='Incremental')
38 | print(C_Set.entries)
39 | 
40 | print("-- Validate the constructed sets")
41 | A_Set.validate()
42 | B_Set.validate()
43 | C_Set.validate()
44 | 
45 | print("-- Convert to Cumulative")
46 | A_Set.cumulate()
47 | print(A_Set.entries)
48 | A_Set.validate()
49 | 
50 | print("-- Convert back to Incremental")
51 | A_Set.incremental()
52 | print(A_Set.entries)
53 | A_Set.validate()
54 | 
55 | print("-- Create a multiperiod matrix set and save to json file")
56 | T_Set = tm.TransitionMatrixSet(values=T1, periods=10, method='Power', temporal_type='Cumulative')
57 | T_Set.to_json('Tn.json')
58 | 
59 | 
60 | def main():
61 |     print("Done")
62 | 
63 | 
64 | if __name__ == "__main__":
65 |     main()
66 | 


--------------------------------------------------------------------------------
/examples/python/matrix_set_lendingclub.py:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | # (c) 2017-2024 Open Risk, all rights reserved
 4 | #
 5 | # TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included
 6 | # in the source distribution of TransitionMatrix. This is notwithstanding any licenses of
 7 | # third-party software included in this distribution. You may not use this file except in
 8 | # compliance with the License.
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software distributed under
11 | # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
12 | # either express or implied. See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | """
17 | Example workflow using transitionMatrix to estimate a set of matrix from LendingClub data
18 | Input data are in a special cohort format as the published datasets have some limitations
19 | 
20 | """
21 | 
22 | import pandas as pd
23 | 
24 | import transitionMatrix as tm
25 | from transitionMatrix import source_path
26 | from transitionMatrix.estimators import simple_estimator as es
27 | 
28 | dataset_path = source_path + "datasets/"
29 | 
30 | # Example: LendingClub Style Migration Matrix Set
31 | # Load historical data into pandas frame
32 | # Format:
33 | # Expected Data Format is (ID, State_IN, State_OUT)
34 | 
35 | definition = [('A', "Grade A"), ('B', "Grade B"), ('C', "Grade C"),
36 |               ('D', "Grade D"), ('E', "Grade E"), ('F', "Grade F"),
37 |               ('G', "Grade G"), ('H', "Delinquent"), ('I', "Charged Off"),
38 |               ('J', "Repaid")]
39 | myState = tm.StateSpace(definition)
40 | 
41 | # Load the data sets into a pandas frame in sequence
42 | # Check matrix_lendingclub.py for comments
43 | 
44 | matrix_set = []
45 | for letter in ['a', 'b', 'c', 'd']:
46 |     # store the derived one-period matrices in a list
47 |     data = pd.read_csv(dataset_path + 'LoanStats3' + letter + '_Step2.csv')
48 |     myEstimator = es.SimpleEstimator(states=myState, ci={'method': 'goodman', 'alpha': 0.05})
49 |     result = myEstimator.fit(data)
50 |     myEstimator.summary()
51 |     myMatrix = tm.TransitionMatrix(result)
52 |     myMatrix[7, 9] = 1.0
53 |     myMatrix[8, 9] = 1.0
54 |     myMatrix[9, 9] = 1.0
55 |     matrix_set.append(myMatrix)
56 | 
57 | # collect all matrices in a matrix set
58 | LC_Set = tm.TransitionMatrixSet(values=matrix_set, temporal_type='Incremental')
59 | LC_Set.print_matrix()
60 | 


--------------------------------------------------------------------------------
/docs/source/predefined_rating_scales.rst:
--------------------------------------------------------------------------------
 1 | Predefined Rating Scales
 2 | ========================
 3 | 
 4 | The transitionMatrix package supports a variety of credit rating scales. They are grouped together in :mod:`transitionMatrix.creditratings.creditsystems`.
 5 | 
 6 | The key ones are described here in more detail.
 7 | 
 8 | 
 9 | Rating Scales currently covered
10 | --------------------------------
11 | 
12 | The focus of the current selection is on *long-term issuer* ratings scales (others will be added):
13 | 
14 | - AM Best Europe-Rating Services Ltd.
15 | - ARC Ratings S.A.
16 | - Cerved Rating Agency S.p.A.
17 | - Creditreform Rating AG
18 | - DBRS Ratings Limited
19 | - Fitch Ratings
20 | - Moody’s Investors Service
21 | - Scope Ratings AG
22 | - Standard & Poor’s Ratings Services
23 | 
24 | Data per Scale
25 | -------------------------------------------
26 | 
27 | Each rating scale is a StateSpace (see :ref:`State Spaces`) and thus inherits the attributes and methods of that object, namely:
28 | 
29 | - The entity defining the scale (the originating entity)
30 | - The full name of the scale (as most originators of rating scales offer multiple scales with different meaning an/or use)
31 | - The definition of the scale (as a list of tuples in the form [('0', 'X1'), ... , ('N-1', 'XN)] where X are the symbols used to denote the credit state
32 | - The CQS (credit quality step) mapping of the scale as defined by regulatory authorities (see next section)
33 | 
34 | 
35 | CQS Mappings
36 | ------------
37 | 
38 | The Credit Quality Step (CQS) denotes a standardised indicator of Credit Risk that is recognized in the European Union
39 | 
40 | * The CQS Credit Rating Scale is based on numbers, ranging from 1 to 6.
41 | * 1 is the highest quality, 6 is the lowest quality
42 | 
43 | The European Supervisory Authorities maintain mappings between credit rating agencies and CQS
44 | 
45 | 
46 | .. note:: Consult the original documents from definitive mappings available at the `EBA Website <https://eba.europa.eu/regulation-and-policy/external-credit-assessment-institutions-ecai/draft-implementing-technical-standards-on-the-mapping-of-ecais-credit-assessments>`_
47 | 
48 | The Rating Agency State Spaces and mappings are obtained from the latest (20 May 2019) Regulatory Reference:
49 | 
50 | ::
51 | 
52 |     JC 2018 11, FINAL REPORT: REVISED DRAFT ITS ON THE MAPPING OF ECAIS’ CREDIT ASSESSMENTS UNDER CRR
53 | 
54 | Example of Label Conversion
55 | """"""""""""""""""""""""""""
56 | Convert labels between credit rating scales
57 | 
58 | .. image:: ../../examples/scale_conversions.png
59 | 
60 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | /.idea
  2 | /docs/build/html/
  3 | .env
  4 | 
  5 | # Byte-compiled / optimized / DLL files
  6 | __pycache__/
  7 | *.py[cod]
  8 | *$py.class
  9 | 
 10 | # C extensions
 11 | *.so
 12 | 
 13 | # Distribution / packaging
 14 | .Python
 15 | build/
 16 | develop-eggs/
 17 | dist/
 18 | downloads/
 19 | eggs/
 20 | .eggs/
 21 | lib/
 22 | lib64/
 23 | parts/
 24 | sdist/
 25 | var/
 26 | wheels/
 27 | pip-wheel-metadata/
 28 | share/python-wheels/
 29 | *.egg-info/
 30 | .installed.cfg
 31 | *.egg
 32 | MANIFEST
 33 | 
 34 | # PyInstaller
 35 | #  Usually these files are written by a python script from a template
 36 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 37 | *.manifest
 38 | *.spec
 39 | 
 40 | # Installer logs
 41 | pip-log.txt
 42 | pip-delete-this-directory.txt
 43 | 
 44 | # Unit test / coverage reports
 45 | htmlcov/
 46 | .tox/
 47 | .nox/
 48 | .coverage
 49 | .coverage.*
 50 | .cache
 51 | nosetests.xml
 52 | coverage.xml
 53 | *.cover
 54 | *.py,cover
 55 | .hypothesis/
 56 | .pytest_cache/
 57 | 
 58 | # Translations
 59 | *.mo
 60 | *.pot
 61 | 
 62 | # Django stuff:
 63 | *.log
 64 | local_settings.py
 65 | db.sqlite3
 66 | db.sqlite3-journal
 67 | 
 68 | # Flask stuff:
 69 | instance/
 70 | .webassets-cache
 71 | 
 72 | # Scrapy stuff:
 73 | .scrapy
 74 | 
 75 | # Sphinx documentation
 76 | docs/_build/
 77 | 
 78 | # PyBuilder
 79 | target/
 80 | 
 81 | # Jupyter Notebook
 82 | .ipynb_checkpoints
 83 | 
 84 | # IPython
 85 | profile_default/
 86 | ipython_config.py
 87 | 
 88 | # pyenv
 89 | .python-version
 90 | 
 91 | # pipenv
 92 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 93 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 94 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 95 | #   install all needed dependencies.
 96 | #Pipfile.lock
 97 | 
 98 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 99 | __pypackages__/
100 | 
101 | # Celery stuff
102 | celerybeat-schedule
103 | celerybeat.pid
104 | 
105 | # SageMath parsed files
106 | *.sage.py
107 | 
108 | # Environments
109 | .env
110 | .venv
111 | env/
112 | venv/
113 | ENV/
114 | env.bak/
115 | venv.bak/
116 | 
117 | # Spyder project settings
118 | .spyderproject
119 | .spyproject
120 | 
121 | # Rope project settings
122 | .ropeproject
123 | 
124 | # mkdocs documentation
125 | /site
126 | 
127 | # mypy
128 | .mypy_cache/
129 | .dmypy.json
130 | dmypy.json
131 | 
132 | # Pyre type checker
133 | .pyre/
134 | 


--------------------------------------------------------------------------------
/examples/python/fix_multiperiod_matrix.py:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | # (c) 2017-2024 Open Risk, all rights reserved
 4 | #
 5 | # TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included
 6 | # in the source distribution of TransitionMatrix. This is notwithstanding any licenses of
 7 | # third-party software included in this distribution. You may not use this file except in
 8 | # compliance with the License.
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software distributed under
11 | # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
12 | # either express or implied. See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | """ Example of using transitionMatrix to detect and solve various pathologies that might be affecting transition matrix data
17 | 
18 | """
19 | 
20 | import numpy as np
21 | 
22 | import transitionMatrix as tm
23 | from transitionMatrix import dataset_path
24 | 
25 | print("> Loading historical multi-period transitional matrices (cumulative mode) from csv file")
26 | SnP_Set0 = tm.TransitionMatrixSet(csv_file=dataset_path + "sp_1981-2016.csv", temporal_type='Cumulative')
27 | print("> Validate")
28 | print(SnP_Set0.validate())
29 | print(
30 |     "> We detect dimensionality problems. The matrices are not square (missing the trivial Default and NR transitions)")
31 | print("> We must fix that to proceed. Augment the matrices in the set by fixing Default and NR transitions")
32 | C_Vals = []
33 | for matrix in SnP_Set0.entries:
34 |     C = tm.TransitionMatrix(values=np.resize(matrix, (9, 9)))
35 |     # set the migration from NR or D state to a rated state to zero
36 |     C[7, 0:9] = 0.0
37 |     C[8, 0:9] = 0.0
38 |     # set the probability of remaining to a D state to unity
39 |     C[7, 7] = 100.0
40 |     # set the probability of remaining to an NR state to unity
41 |     C[8, 8] = 100.0
42 |     C_Vals.append(C)
43 | SnP_Set1 = tm.TransitionMatrixSet(values=C_Vals)
44 | print("> Validate Again")
45 | print(SnP_Set1.validate())
46 | 
47 | print("> Now we have square matrices but the format is not in probabilities!")
48 | print("> Divide all entries by 100")
49 | 
50 | SnP_Set2 = SnP_Set1 * 0.01
51 | # SnP_Set2.print()
52 | print("> Validate Again")
53 | print(SnP_Set2.validate())
54 | 
55 | print("> Hurrah, we have a probability matrix set. Lets save it")
56 | 
57 | SnP_Set2.to_json(dataset_path + 'sp_1981-2016.json', accuracy=5)
58 | 
59 | 
60 | def main():
61 |     print("Done")
62 | 
63 | 
64 | if __name__ == "__main__":
65 |     main()
66 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | # (c) 2017-2024 Open Risk (https://www.openriskmanagement.com)
 4 | #
 5 | # TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included
 6 | # in the source distribution of TransitionMatrix. This is notwithstanding any licenses of
 7 | # third-party software included in this distribution. You may not use this file except in
 8 | # compliance with the License.
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software distributed under
11 | # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
12 | # either express or implied. See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | from codecs import open
17 | 
18 | from setuptools import setup
19 | 
20 | __version__ = '0.5.1'
21 | 
22 | ver = __version__
23 | 
24 | long_descr = open('docs/source/description.rst', 'r', encoding='utf8').read()
25 | 
26 | setup(name='transitionMatrix',
27 |       version=ver,
28 |       description='A Python powered library for statistical analysis and visualization of state transition phenomena',
29 |       long_description=long_descr,
30 |       long_description_content_type='text/x-rst',
31 |       author='Open Risk',
32 |       author_email='info@openriskmanagement.com',
33 |       packages=['transitionMatrix', 'transitionMatrix.estimators', 'transitionMatrix.creditratings',
34 |                 'transitionMatrix.estimators', 'transitionMatrix.generators', 'transitionMatrix.statespaces',
35 |                 'transitionMatrix.utils', 'datasets', 'examples.python'],
36 |       include_package_data=True,
37 |       url='https://github.com/open-risk/transitionMatrix',
38 |       install_requires=[
39 |           'pandas',
40 |           'numpy',
41 |           'scipy',
42 |           'statsmodels',
43 |           'sympy',
44 |           'matplotlib'
45 |       ],
46 |       zip_safe=False,
47 |       provides=['transitionMatrix'],
48 |       classifiers=[
49 |           'Intended Audience :: Developers',
50 |           'Intended Audience :: Science/Research',
51 |           'Intended Audience :: Financial and Insurance Industry',
52 |           'Development Status :: 3 - Alpha',
53 |           'License :: OSI Approved :: Apache Software License',
54 |           'Operating System :: OS Independent',
55 |           'Programming Language :: Python :: 3 :: Only',
56 |           'Programming Language :: Python :: 3.10',
57 |           'Topic :: Scientific/Engineering',
58 |           'Topic :: Scientific/Engineering :: Information Analysis'
59 |       ]
60 | 
61 |       )
62 | 


--------------------------------------------------------------------------------
/tests/test_state_space.py:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | # (c) 2017-2024 Open Risk, all rights reserved
 4 | #
 5 | # TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included
 6 | # in the source distribution of TransitionMatrix. This is notwithstanding any licenses of
 7 | # third-party software included in this distribution. You may not use this file except in
 8 | # compliance with the License.
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software distributed under
11 | # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
12 | # either express or implied. See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | import unittest
17 | 
18 | import pandas as pd
19 | from scipy.linalg import expm
20 | 
21 | import transitionMatrix as tm
22 | from transitionMatrix import source_path
23 | 
24 | ACCURATE_DIGITS = 7
25 | 
26 | 
27 | class TestStateSpace(unittest.TestCase):
28 | 
29 |     def test_instantiate_state(self):
30 |         definition = [('0', "AAA"), ('1', "AA"), ('2', "A"), ('3', "BBB"),
31 |                        ('4', "BB"), ('5', "B"), ('6', "CCC"), ('7', "D")]
32 |         s = tm.StateSpace(definition)
33 |         self.assertEqual(s.definition[0][1], 'AAA')
34 | 
35 |     def test_get_states(self):
36 |         definition = [('0', "AAA"), ('1', "AA"), ('2', "A"), ('3', "BBB"),
37 |                        ('4', "BB"), ('5', "B"), ('6', "CCC"), ('7', "D")]
38 |         s = tm.StateSpace(definition)
39 |         self.assertEqual(s.get_states()[0], '0')
40 | 
41 |     def test_get_state_labels(self):
42 |         definition = [('0', "AAA"), ('1', "AA"), ('2', "A"), ('3', "BBB"),
43 |                        ('4', "BB"), ('5', "B"), ('6', "CCC"), ('7', "D")]
44 |         s = tm.StateSpace(definition)
45 |         self.assertEqual(s.get_state_labels()[0], 'AAA')
46 | 
47 |     def test_generic(self):
48 |         s = tm.StateSpace()
49 |         n = 10
50 |         s.generic(n=n)
51 |         self.assertEqual(s.get_state_labels()[n-1], str(n-1))
52 | 
53 |     def test_validate_dataset(self):
54 |         dataset_path = source_path + "datasets/"
55 |         data = pd.read_csv(dataset_path + 'test.csv', dtype={'State': int})
56 |         # definition = [('0', "Stage 1"), ('1', "Stage 2"), ('2', "Stage 3")]
57 |         definition = [('0', "0"), ('1', "1"), ('2', "2")]
58 |         s = tm.StateSpace(definition)
59 |         self.assertEqual(s.validate_dataset(dataset=data)[0], "Dataset contains the expected states.")
60 | 
61 | 
62 | if __name__ == "__main__":
63 | 
64 |     unittest.main()
65 | 
66 | 


--------------------------------------------------------------------------------
/datasets/synthetic_data1.csv:
--------------------------------------------------------------------------------
  1 | ID,Time,State
  2 | 0,0.032136548960821035,0
  3 | 0,0.0328444887126442,2
  4 | 0,0.1501764800072913,3
  5 | 0,0.1632084066774684,3
  6 | 0,0.2664970065502152,3
  7 | 0,0.372697250597263,0
  8 | 0,0.5924565019914576,1
  9 | 0,0.6955465948842987,1
 10 | 0,0.7363854494147481,2
 11 | 0,0.8074414166206703,3
 12 | 0,0.8963599443933054,3
 13 | 0,1.0878377742419945,2
 14 | 0,1.1675433779261155,3
 15 | 0,1.2672954215676289,3
 16 | 0,1.3109912447211465,2
 17 | 0,1.3495603879374372,2
 18 | 0,1.370858731221019,3
 19 | 0,1.5283947083954086,1
 20 | 0,1.9527668668150056,2
 21 | 0,2.0403493255815226,0
 22 | 0,2.061014920526969,0
 23 | 0,2.232040388139631,2
 24 | 0,2.23350880600617,3
 25 | 0,2.475932497508623,1
 26 | 0,2.5734462429038354,3
 27 | 0,2.8435924407409914,1
 28 | 0,3.0283948592185763,3
 29 | 0,3.0761863088790395,0
 30 | 0,3.1025963817494158,3
 31 | 0,3.1104554859797053,3
 32 | 0,3.460907110718253,2
 33 | 0,3.62096698880944,1
 34 | 0,3.9398462388242517,2
 35 | 0,4.240435835436774,1
 36 | 0,4.400104623276431,1
 37 | 0,4.46527255463521,3
 38 | 0,4.503623943006057,2
 39 | 0,4.55782375521718,1
 40 | 0,4.601418565334436,1
 41 | 0,4.643198686494953,0
 42 | 0,4.704378817300301,0
 43 | 0,4.905177094728069,3
 44 | 0,4.93505558095269,2
 45 | 0,5.0098110911321125,3
 46 | 0,5.041839513634524,0
 47 | 0,5.127515712888846,0
 48 | 0,5.130172859893318,2
 49 | 0,5.233723732230302,0
 50 | 0,5.23942731915469,3
 51 | 0,5.5196674661386735,3
 52 | 0,5.570626019796818,1
 53 | 0,5.66539210462981,1
 54 | 0,5.709583413264629,0
 55 | 0,5.719329296729598,3
 56 | 0,5.766704198443862,0
 57 | 0,5.99396769122474,3
 58 | 0,6.194187788093888,1
 59 | 0,6.208220499730838,0
 60 | 0,6.349330500400409,1
 61 | 0,6.517519409753987,3
 62 | 0,6.552156010261818,3
 63 | 0,6.65736367212415,0
 64 | 0,6.665898712154186,0
 65 | 0,6.735692210858016,1
 66 | 0,6.819200247067685,1
 67 | 0,6.845107006166676,1
 68 | 0,7.229096138465353,3
 69 | 0,7.308196732991204,0
 70 | 0,7.527652532940278,2
 71 | 0,7.865265216314766,1
 72 | 0,7.997095137774468,3
 73 | 0,8.020050939875931,1
 74 | 0,8.245600337188524,3
 75 | 0,8.458249142732146,2
 76 | 0,8.477379865694953,2
 77 | 0,8.772627059837468,1
 78 | 0,8.842277055249099,0
 79 | 0,8.949471730149508,2
 80 | 0,8.980930762708697,2
 81 | 0,9.0649434165816,1
 82 | 0,9.173044255991684,0
 83 | 0,9.26382423823099,2
 84 | 0,9.386244931606567,2
 85 | 0,9.436483889422163,1
 86 | 0,9.438836505765465,0
 87 | 0,9.568464779377974,1
 88 | 0,9.851174740848933,2
 89 | 0,10.000169627963329,0
 90 | 0,10.033727355266604,1
 91 | 0,10.096652658790777,2
 92 | 0,10.228395006959211,3
 93 | 0,10.319379569557384,1
 94 | 0,10.320643943485134,1
 95 | 0,10.378435728441596,1
 96 | 0,10.402515443650238,1
 97 | 0,10.625052026724967,1
 98 | 0,11.095139124670885,0
 99 | 0,11.111732801649607,0
100 | 0,11.15574978115486,2
101 | 0,11.182183676758042,0
102 | 


--------------------------------------------------------------------------------
/docs/source/estimators.rst:
--------------------------------------------------------------------------------
 1 | Estimation
 2 | ========================
 3 | 
 4 | The estimation of a transition matrix is one of the core functionalities of transitionMatrix. Several methods and variations are available in the literature depending on aspects such as:
 5 | 
 6 | * The nature of the observations / data (e.g., whether temporal homogeneity is a valid assumption)
 7 | * Whether or not there are competing risk effects
 8 | * Whether or not observations have coincident values
 9 | * Treating the Right-Censorship of observations (Outcomes beyond the observation window)
10 | * Treating the Left-Truncation of observations (Outcomes prior to the the observation window)
11 | 
12 | Estimator Types
13 | ----------------
14 | * **Cohort Based Methods** that group observations in cohorts
15 | * **Duration** (also Hazard Rate or Intensity) Based Methods that utilize the actual duration of each state
16 | 
17 | The main estimators currently implemented are as follows:
18 | 
19 | 
20 | .. toctree::
21 |    :maxdepth: 1
22 |    :caption: Implemented Estimators
23 | 
24 |    simple_estimator
25 |    cohort_estimator
26 |    aalen-johansen_estimator
27 | 
28 | 
29 | Whichever the estimator choice, the outcome of the estimation is an *Empirical Transition Matrix* (or potentially a matrix set)
30 | 
31 | Implementation Notes
32 | ^^^^^^^^^^^^^^^^^^^^^^
33 | 
34 | * All estimators derive from the highest level *BaseEstimator* class.
35 | * Duration type estimators derive from the *DurationEstimator* class
36 | 
37 | 
38 | Estimation Examples
39 | ----------------------
40 | 
41 | The first example of estimating a transition matrix is covered in the :ref:`Getting Started` section. Here we have a few more examples:
42 | 
43 | 
44 | Estimation Example 1
45 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
46 | 
47 | Example workflows using transitionMatrix to estimate an empirical transition matrix from duration type data. The datasets are produced using examples/generate_synthetic_data.py This example uses the
48 | `Aalen-Johansen estimator <https://www.openriskmanual.org/wiki/Aalen-Johansen_Estimator>`_
49 | 
50 | * Script: examples/python/empirical_transition_matrix.py
51 | 
52 | By setting the example variable the script covers a number of variations:
53 | 
54 | * Version 1: Credit Rating Migration example
55 | * Version 2: Simple 2x2 Matrix for testing
56 | * Version 3: Credit Rating Migration example with timestamps in raw date format
57 | 
58 | 
59 | Plot of estimated transition probabilities
60 | 
61 | .. image:: ../../examples/transition_probabilities.png
62 | 
63 | Estimation Example 2
64 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
65 | 
66 | Example workflows using transitionMatrix to estimate a transition matrix from data that are in duration format. The datasets are first grouped in period cohorts
67 | 
68 | * Script: examples/python/matrix_from_duration_data.py
69 | 
70 | 
71 | 
72 | 
73 | 
74 | 
75 | 
76 | 
77 | 


--------------------------------------------------------------------------------
/datasets/synthetic_data.csv:
--------------------------------------------------------------------------------
  1 | ID,Time,State
  2 | 0,0.06723778542948632,1
  3 | 0,0.09597485751071153,1
  4 | 0,0.12840651857400373,1
  5 | 0,0.08884293081078959,0
  6 | 0,0.017236797703826454,1
  7 | 0,0.04142719971495457,0
  8 | 0,0.060835676140441775,0
  9 | 0,0.14740930981852776,0
 10 | 0,0.167079439827748,1
 11 | 0,0.03147147525189556,0
 12 | 1,0.004968788531962705,1
 13 | 1,0.053293651962532565,0
 14 | 1,0.009651801128417029,1
 15 | 1,0.03048332703605733,1
 16 | 1,0.06638471310209446,0
 17 | 1,0.015776033078408927,1
 18 | 1,0.1021635631003995,0
 19 | 1,0.014436258946810497,1
 20 | 1,0.33924462590246196,1
 21 | 1,0.037562012730767716,0
 22 | 2,0.03951811328208232,1
 23 | 2,0.22774260421673156,0
 24 | 2,0.5404083467427436,0
 25 | 2,0.010541775279632345,1
 26 | 2,0.1244625928538286,1
 27 | 2,0.08656719159933775,0
 28 | 2,0.11748287413740154,0
 29 | 2,0.1996826430920634,1
 30 | 2,0.05975975335994709,1
 31 | 2,0.10746790512691595,0
 32 | 3,0.03535447307255651,1
 33 | 3,0.014017522819624196,1
 34 | 3,0.06477811805559093,0
 35 | 3,0.45733057346998296,1
 36 | 3,0.040897175945409064,1
 37 | 3,0.019462973334769346,1
 38 | 3,0.058580089535660664,0
 39 | 3,0.06769213659128215,1
 40 | 3,0.03638618673069476,0
 41 | 3,0.24097479304202274,0
 42 | 4,0.021460964952152665,0
 43 | 4,0.14254178110884028,0
 44 | 4,0.10235577729249797,1
 45 | 4,0.3566478760015271,1
 46 | 4,0.11519119147622381,0
 47 | 4,0.05284980067354113,0
 48 | 4,0.12539878956421344,1
 49 | 4,0.06759543835987696,0
 50 | 4,0.43413128766899595,1
 51 | 4,0.3302626971706799,1
 52 | 5,0.02446531950030919,1
 53 | 5,0.02143744522917154,1
 54 | 5,0.0535243521033605,1
 55 | 5,0.061210309683403956,0
 56 | 5,0.15324405096610794,0
 57 | 5,0.06922647175316272,0
 58 | 5,0.09047743285921706,1
 59 | 5,0.25173581506157733,1
 60 | 5,0.053053964822302214,1
 61 | 5,0.053615546123845594,1
 62 | 6,0.07553868934120597,0
 63 | 6,0.02186767955671588,0
 64 | 6,0.05402675765240426,1
 65 | 6,0.12432030442705991,0
 66 | 6,0.015009455687755703,1
 67 | 6,0.009406884087957135,0
 68 | 6,0.06256146033856605,0
 69 | 6,0.017157773924665463,1
 70 | 6,0.2420313204877975,1
 71 | 6,0.08117769174618861,1
 72 | 7,0.11538446507115581,0
 73 | 7,0.11580125393382101,1
 74 | 7,0.15062545305165453,0
 75 | 7,0.013037261082576352,0
 76 | 7,0.07262346101160896,1
 77 | 7,0.0032108575734070967,0
 78 | 7,0.09997016226647133,0
 79 | 7,0.1991857270316179,1
 80 | 7,0.1620064688117187,0
 81 | 7,0.05412995405001916,0
 82 | 8,0.12048582884180803,1
 83 | 8,0.07350556198860529,1
 84 | 8,0.021637712152722574,0
 85 | 8,0.10813078548139338,0
 86 | 8,0.03218653490971687,1
 87 | 8,0.10189682146066731,0
 88 | 8,0.008380510023153324,1
 89 | 8,0.07556215382911058,0
 90 | 8,0.08050993047122985,1
 91 | 8,0.011675169933270849,1
 92 | 9,0.021016620610991025,0
 93 | 9,0.010082662513525357,1
 94 | 9,0.1120830245455466,1
 95 | 9,0.06447942096025573,1
 96 | 9,0.004509385352343166,0
 97 | 9,0.08766604704313445,0
 98 | 9,0.013323721563095981,1
 99 | 9,0.06822361317027965,1
100 | 9,0.028054150357278308,0
101 | 9,0.1419454627504486,1
102 | 


--------------------------------------------------------------------------------
/datasets/sp_1981-2016.csv:
--------------------------------------------------------------------------------
 1 | From States,To States,Periods,Tenor,Tenor,Tenor,Tenor,Tenor,Tenor,Tenor,Tenor
 2 | 7,9,8,1,2,3,5,7,10,15,20
 3 | 87.05,9.03,0.53,0.05,0.08,0.03,0.05,0,3.17,,
 4 | 0.52,86.82,8,0.51,0.05,0.07,0.02,0.02,3.99,,
 5 | 0.03,1.77,87.79,5.33,0.32,0.13,0.02,0.06,4.55,,
 6 | 0.01,0.1,3.51,85.56,3.79,0.51,0.12,0.18,6.23,,
 7 | 0.01,0.03,0.12,4.97,76.98,6.92,0.61,0.72,9.63,,
 8 | 0,0.03,0.09,0.19,5.15,74.26,4.46,3.76,12.06,,
 9 | 0,0,0.13,0.19,0.63,12.91,43.97,26.78,15.39,,
10 | 75.74,16.08,1.44,0.11,0.19,0.05,0.11,0.03,6.26,,
11 | 0.91,75.47,14.17,1.31,0.19,0.15,0.02,0.06,7.73,,
12 | 0.04,3.19,77.22,9.24,0.81,0.29,0.05,0.15,9.02,,
13 | 0.02,0.19,6.43,73.67,6.01,1.13,0.22,0.52,11.82,,
14 | 0.01,0.05,0.31,8.79,59.41,10.31,1.1,2.25,17.76,,
15 | 0,0.04,0.16,0.46,8.68,55.13,5.11,8.56,21.86,,
16 | 0,0,0.17,0.54,1.08,16.61,22.03,35.53,24.03,,
17 | 65.51,22.03,2.36,0.32,0.19,0.08,0.11,0.13,9.27,,
18 | 1.21,65.83,18.69,2.11,0.36,0.23,0.03,0.13,11.42,,
19 | 0.06,4.15,68.49,11.83,1.34,0.46,0.1,0.26,13.31,,
20 | 0.02,0.29,8.54,64.33,7.08,1.69,0.3,0.91,16.83,,
21 | 0.01,0.06,0.54,11.22,46.65,11.61,1.28,4.07,24.55,,
22 | 0,0.03,0.23,0.84,10.48,41.37,4.66,12.78,29.62,,
23 | 0,0,0.14,0.61,1.65,16.62,10.9,40.68,29.39,,
24 | 49.58,28.37,4.86,0.81,0.24,0.16,0.08,0.35,15.53,,
25 | 1.49,50.29,24.87,3.71,0.59,0.39,0.04,0.34,18.26,,
26 | 0.08,5.22,54.95,15.13,2.15,0.71,0.16,0.57,21.04,,
27 | 0.03,0.47,10.51,51.02,7.68,2.29,0.4,1.93,25.68,,
28 | 0.01,0.08,1.06,12.72,30.83,11.08,1.32,7.84,35.06,,
29 | 0.01,0.03,0.28,1.63,10.55,24.83,2.99,19.25,40.42,,
30 | 0,0,0.12,0.74,2.98,12.18,2.53,46.96,34.49,,
31 | 38.31,31.58,6.99,1.5,0.3,0.19,0.11,0.53,20.49,,
32 | 1.55,39.26,28.08,4.91,0.79,0.4,0.03,0.57,24.42,,
33 | 0.08,5.46,45.54,16.73,2.71,0.86,0.15,0.98,27.5,,
34 | 0.03,0.61,10.93,42.12,7.4,2.47,0.39,3,33.03,,
35 | 0,0.09,1.43,12.5,21.96,9.75,1.06,11.17,42.04,,
36 | 0.01,0.02,0.38,2.1,8.92,15.71,1.75,24.15,46.96,,
37 | 0,0,0.23,0.97,3.51,7.95,1.48,49.51,36.34,,
38 | 26.01,32.25,9.82,2.87,0.18,0.21,0.06,0.74,27.87,,
39 | 1.32,28.24,29.36,6.82,1.05,0.44,0.03,0.83,31.9,,
40 | 0.11,5.35,35.26,17.61,3,0.99,0.14,1.61,35.92,,
41 | 0.02,0.74,10.85,32.58,6.7,2.46,0.33,4.56,41.77,,
42 | 0.02,0.07,1.79,11.24,14.64,7.75,0.67,15.39,48.43,,
43 | 0,0.04,0.45,2.57,6.87,8.67,0.9,28.71,51.8,,
44 | 0,0,0.18,0.84,3.5,4.53,0.36,50.57,40.01,,
45 | 13.3,29.98,14.94,2.86,0.61,0.44,0.03,0.92,36.9,,
46 | 0.96,16.42,27.4,9.1,1.32,0.66,0.03,1.15,42.95,,
47 | 0.12,3.99,24.47,17.49,3.21,1.21,0.16,2.71,46.64,,
48 | 0,0.77,8.35,23.14,5.49,2.51,0.26,7.65,51.83,,
49 | 0,0.14,1.99,8.31,8.25,5.23,0.44,21.81,53.83,,
50 | 0,0.07,0.51,2.43,3.67,3.89,0.47,36.94,52.02,,
51 | 0,0,0.59,1.07,2.63,1.07,0.2,59.41,35.02,,
52 | 5.72,24.84,18.94,3.59,0.93,0.93,0.04,1.38,43.61,,
53 | 0.68,9.24,22.74,11.76,1.61,0.9,0.06,1.84,51.17,,
54 | 0.11,2.81,18,15.43,3.17,1.51,0.19,3.91,54.87,,
55 | 0,0.71,6.86,18.5,4.01,1.88,0.16,9.66,58.21,,
56 | 0,0.06,1.63,6.85,4.13,3.73,0.46,24.39,58.75,,
57 | 0,0.02,0.44,2.63,2.77,2.1,0.28,36.21,55.54,,
58 | 0,0,0.36,0.72,2.15,0.54,0,56.63,39.61,,
59 | 


--------------------------------------------------------------------------------
/examples/python/estimate_matrix.py:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | # (c) 2017-2024 Open Risk, all rights reserved
 4 | #
 5 | # TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included
 6 | # in the source distribution of TransitionMatrix. This is notwithstanding any licenses of
 7 | # third-party software included in this distribution. You may not use this file except in
 8 | # compliance with the License.
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software distributed under
11 | # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
12 | # either express or implied. See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """
16 | An end-to-end example of estimating a credit rating matrix from historical data using two different estimators
17 | 
18 | """
19 | import pprint as pp
20 | 
21 | import pandas as pd
22 | from scipy.linalg import expm
23 | 
24 | import transitionMatrix as tm
25 | from transitionMatrix.estimators.aalen_johansen_estimator import AalenJohansenEstimator
26 | from transitionMatrix.estimators.cohort_estimator import CohortEstimator
27 | from transitionMatrix.statespaces.statespace import StateSpace
28 | from transitionMatrix.utils import transitions_summary
29 | from transitionMatrix.utils.converters import to_canonical
30 | 
31 | # Load the data into a pandas frame
32 | input_data = pd.read_csv('../../datasets/rating_data.csv')
33 | print('> Transitions Summary Input Data')
34 | pp.pprint(transitions_summary(input_data))
35 | 
36 | # Infer and describe state space
37 | myState = StateSpace(transition_data=input_data)
38 | myState.describe()
39 | print('> The order of states is not important for estimation but it is important for presentation!')
40 | 
41 | # Convert format to canonical form
42 | canonical_data = to_canonical(input_data)
43 | 
44 | # Group the data into temporal cohorts
45 | print(80 * '=')
46 | cohort_data, cohort_intervals = tm.utils.bin_timestamps(input_data, cohorts=5, remove_stale=True)
47 | print('Intervals : ', cohort_intervals)
48 | 
49 | print('> Transitions Summary Cohorted Data')
50 | pp.pprint(transitions_summary(cohort_data))
51 | 
52 | myEstimator = CohortEstimator(states=myState, cohort_bounds=cohort_intervals, ci={'method': 'goodman', 'alpha': 0.05})
53 | 
54 | myEstimator.fit(cohort_data)
55 | 
56 | myMatrix = tm.TransitionMatrix(myEstimator.average_matrix, states=myState)
57 | myMatrix.print_matrix(accuracy=3, format_type='Standard', labels=False)
58 | 
59 | myEstimator2 = AalenJohansenEstimator(states=myState)
60 | labels = {'Time': 'Time', 'From': 'From', 'To': 'To', 'ID': 'ID'}
61 | etm, times = myEstimator2.fit(canonical_data, labels=labels)
62 | myMatrix2 = tm.TransitionMatrix(etm[:, :, -1])
63 | G = myMatrix2.generator()
64 | oneyear = tm.TransitionMatrix(expm(0.2 * G))
65 | oneyear.print_matrix(accuracy=3)
66 | 
67 | 
68 | def main():
69 |     print("Done")
70 | 
71 | 
72 | if __name__ == "__main__":
73 |     main()
74 | 


--------------------------------------------------------------------------------
/examples/python/matrix_lendingclub.py:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | # (c) 2017-2024 Open Risk, all rights reserved
 4 | #
 5 | # TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included
 6 | # in the source distribution of TransitionMatrix. This is notwithstanding any licenses of
 7 | # third-party software included in this distribution. You may not use this file except in
 8 | # compliance with the License.
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software distributed under
11 | # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
12 | # either express or implied. See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | """
17 | Example workflow using transitionMatrix to estimate a matrix from LendingClub data
18 | Input data are in a special cohort format as the published datasets have some limitations
19 | 
20 | """
21 | 
22 | import pandas as pd
23 | 
24 | import transitionMatrix as tm
25 | from transitionMatrix import source_path
26 | from transitionMatrix.estimators import simple_estimator as es
27 | 
28 | dataset_path = source_path + "datasets/"
29 | 
30 | # Example: LendingClub Style Migration Matrix
31 | # Load historical data into pandas frame
32 | # Format:
33 | # Expected Data Format is (ID, State_IN, State_OUT)
34 | 
35 | # Step 1
36 | # Load the data set into a pandas frame
37 | # Make sure state is read as a string and not as integer
38 | print("Step 1")
39 | data = pd.read_csv(dataset_path + 'LoanStats3a_Step2.csv')
40 | # Data is in pandas frame, all pandas methods are available
41 | print(data.describe())
42 | 
43 | # Step 2
44 | # Describe and validate the State Space against the data
45 | print("Step 2")
46 | definition = [('A', "Grade A"), ('B', "Grade B"), ('C', "Grade C"),
47 |               ('D', "Grade D"), ('E', "Grade E"), ('F', "Grade F"),
48 |               ('G', "Grade G"), ('H', "Delinquent"), ('I', "Charged Off"),
49 |               ('J', "Repaid")]
50 | myState = tm.StateSpace(definition)
51 | myState.describe()
52 | labels = {'State': 'State_IN'}
53 | print(myState.validate_dataset(dataset=data, labels=labels))
54 | labels = {'State': 'State_OUT'}
55 | print(myState.validate_dataset(dataset=data, labels=labels))
56 | 
57 | # Step 3
58 | # Estimate matrices using Simple Estimator (Frequency count)
59 | # compute confidence interval using goodman method at 95% confidence level
60 | 
61 | print("Step 3")
62 | myEstimator = es.SimpleEstimator(states=myState, ci={'method': 'goodman', 'alpha': 0.05})
63 | # resulting matrix array is returned as result
64 | result = myEstimator.fit(data)
65 | # confidence levels are stored with the estimator
66 | myEstimator.summary()
67 | 
68 | # Step 4
69 | # Review numerical results
70 | print("Step 4")
71 | myMatrix = tm.TransitionMatrix(result)
72 | myMatrix.print_matrix()
73 | 
74 | # In the LendingClub example we need to fix some matrix rows
75 | # because there are no state_IN observations besides initial grade assignment
76 | myMatrix[7, 9] = 1.0
77 | myMatrix[8, 9] = 1.0
78 | myMatrix[9, 9] = 1.0
79 | print(myMatrix.validate())
80 | print(myMatrix.characterize())
81 | myMatrix.print_matrix()
82 | 
83 | 
84 | def main():
85 |     print("Done")
86 | 
87 | 
88 | if __name__ == "__main__":
89 |     main()
90 | 


--------------------------------------------------------------------------------
/docs/source/cohorts.rst:
--------------------------------------------------------------------------------
 1 | Cohorts
 2 | ===================
 3 | 
 4 | Organizing data in `cohorts <https://www.openriskmanual.org/wiki/Cohort>`_ can be an important step in understating transition data or towards applying a :ref:`cohort estimator`. Cohorts in this context are understood as the grouping of entities within a temporal interval.
 5 | 
 6 | For example, in a credit rating analysis context, cohorts could be groups of annual observations. The implication of cohorting data is that the more granular information embedded in a more precise timestamp is not relevant. It is also possible that input data are only available in cohort form (when the precise timestamp information is not recorded at the source)
 7 | 
 8 | 
 9 | .. note:: Cohorting can bias the estimation in various subtle ways, so it is important that any procedure is well documented.
10 | 
11 | 
12 | 
13 | Cohorting Utilities
14 | --------------------
15 | 
16 | Cohorting utilities are part of :ref:`preprocessing`. Presently the core algorithm is implemented in :func:`transitionMatrix.utils.preprocessing.bin_timestamps`.
17 | 
18 | 
19 | 
20 | 
21 | 
22 | Intermediate Cohort Data Formats
23 | -------------------------------------------
24 | 
25 | The cohort data format is a tabular representation of time series data that records the states (measurements) of multiple entities. Its defining characteristic is that each table row contains data pertaining to one entity at one point in time.
26 | 
27 | The *canonical form* used as input to duration based estimators uses normalized timestamps (from 0 to T_max, where T_max is the last timepoint) and looks as follows:
28 | 
29 |     +----+------+------+----+
30 |     | ID | Time | From | To |
31 |     +----+------+------+----+
32 |     |  1 | 1.1  |   0  | 1  |
33 |     +----+------+------+----+
34 |     |  1 | 2.0  |   1  | 2  |
35 |     +----+------+------+----+
36 |     |  1 | 3.4  |   2  | 3  |
37 |     +----+------+------+----+
38 |     |  1 | 4.0  |   3  | 2  |
39 |     +----+------+------+----+
40 |     |  2 | 1.2  |   0  | 1  |
41 |     +----+------+------+----+
42 |     |  2 | 2.4  |   1  | 2  |
43 |     +----+------+------+----+
44 |     |  2 | 3.5  |   2  | 3  |
45 |     +----+------+------+----+
46 | 
47 | Cohorting Examples
48 | ---------------------
49 | 
50 | 
51 | Cohorting Example 1
52 | ^^^^^^^^^^^^^^^^^^^^^^^^^^
53 | 
54 | An example with limited data (dataset contains only one entity). It is illustrated in script examples/python./matrix_from_duration_data.py with example flag set to 1. Input data set is synthetic_data1.csv
55 | 
56 | The state space is as follows (for brevity we work directly with the integer representation)
57 | 
58 | .. code::
59 | 
60 |     [('0', "A"), ('1', "B"), ('2', "C"), ('3', "D")]
61 | 
62 | The cohorting algorithm that assigns the last state to the cohort results in the following table. We notice that there is alot of movement inside each cohort (high count) and that only two of the states are represented at the cohort level (0 and 1).
63 | 
64 | .. code::
65 | 
66 |        ID  Cohort State       Time  Count
67 |     0   0       0     0   2.061015   21.0
68 |     1   0       1     1   4.400105   14.0
69 |     2   0       2     0   6.665899   28.0
70 |     3   0       3     0   8.842277   14.0
71 |     4   0       4     0  11.111733   21.0
72 |     5   0       5     0  11.182184    2.0
73 | 
74 | 


--------------------------------------------------------------------------------
/examples/python/credit_curves.py:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | # (c) 2017-2024 Open Risk, all rights reserved
 4 | #
 5 | # TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included
 6 | # in the source distribution of TransitionMatrix. This is notwithstanding any licenses of
 7 | # third-party software included in this distribution. You may not use this file except in
 8 | # compliance with the License.
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software distributed under
11 | # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
12 | # either express or implied. See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | """ Compute and Visualize credit curves
17 | 
18 | """
19 | 
20 | import matplotlib.pyplot as plt
21 | 
22 | import transitionMatrix as tm
23 | from transitionMatrix.creditratings.predefined import Generic
24 | 
25 | # Initialize a single period transition matrix
26 | # Generic is a Typical Credit Rating Transition Matrix with seven rating states and one absorbing (Default) state
27 | 
28 | print("> Load the generic transition matrix")
29 | M = tm.TransitionMatrix(values=Generic)
30 | # Lets take a look at the values
31 | M.print_matrix()
32 | M.validate()
33 | 
34 | # The size of the rating scale
35 | Ratings = M.dimension
36 | 
37 | # The Default (absorbing state)
38 | Default = Ratings - 1
39 | 
40 | # Lets extend the matrix into ten periods (assume they represent annual intervals)
41 | # We do this using the power method
42 | Periods = 10
43 | print("> Extend the matrix into 10 periods using the power method")
44 | T = tm.TransitionMatrixSet(values=M, periods=Periods, method='Power', temporal_type='Cumulative')
45 | 
46 | # Lets take a look at what we have created
47 | print("> Display the calculated transition matrix set")
48 | T.print_matrix()
49 | 
50 | # Now lets compute the default curves
51 | # We do this one initial rating state at a time
52 | 
53 | # For example for the best rating (least likely to default) we obtain
54 | print("> Compute the default curves")
55 | incremental_PD, cumulative_PD, hazard_Rate, survival_Rate = T.default_curves(0)
56 | 
57 | # Construct a credit curve set
58 | credit_curves = T.default_curve_set()
59 | credit_curves.print_curve(accuracy=5)
60 | 
61 | # Now lets plot a collection of curves for all ratings
62 | print("> Plot the default curves")
63 | 
64 | curves = []
65 | periods = range(0, Periods)
66 | 
67 | for ri in range(0, Ratings - 1):
68 |     print("RI: ", ri)
69 |     iPD, cPD, hR, sR = T.default_curves(ri)
70 |     # for k in range(0, Periods):
71 |     #     value = cPD[k]
72 |     #     line = [(k, value), (k + 1.0, value)]
73 |     curves.append(cPD)
74 | 
75 | fig, ax = plt.subplots()
76 | for ri in range(0, Ratings - 1):
77 |     ax.plot(periods, curves[ri], label="RI=%d" % (ri,))
78 | 
79 | ax.autoscale()
80 | ax.margins(0.1)
81 | ax.set_xlabel("Periods")
82 | ax.set_ylabel("Cumulative Default Probability")
83 | ax.grid(True)
84 | plt.title("Credit Curves of Generic Transition Matrix")
85 | 
86 | leg = plt.legend(loc='best', ncol=2, mode="expand", shadow=True, fancybox=True)
87 | leg.get_frame().set_alpha(0.5)
88 | 
89 | plt.savefig("credit_curves.png")
90 | 
91 | 
92 | def main():
93 |     print("Done")
94 | 
95 | 
96 | if __name__ == "__main__":
97 |     main()
98 | 


--------------------------------------------------------------------------------
/tests/test_roundtrip.py:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | # (c) 2017-2024 Open Risk, all rights reserved
 4 | #
 5 | # TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included
 6 | # in the source distribution of TransitionMatrix. This is notwithstanding any licenses of
 7 | # third-party software included in this distribution. You may not use this file except in
 8 | # compliance with the License.
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software distributed under
11 | # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
12 | # either express or implied. See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import unittest
16 | 
17 | import pandas as pd
18 | 
19 | import transitionMatrix as tm
20 | from transitionMatrix import source_path
21 | from transitionMatrix.estimators import cohort_estimator as es
22 | from transitionMatrix.generators import dataset_generators
23 | from transitionMatrix.utils import to_canonical
24 | from transitionMatrix.utils.converters import to_compact
25 | 
26 | ACCURATE_DIGITS = 7
27 | 
28 | Identity = [
29 |     [1.0, 0.0, 0.0, 0.0],
30 |     [0.0, 1.0, 0.0, 0.0],
31 |     [0.0, 0.0, 1.0, 0.0],
32 |     [0.0, 0.0, 0.0, 1.0]
33 | ]
34 | 
35 | dataset_path = source_path + "datasets/"
36 | 
37 | class TestRoundTrip(unittest.TestCase):
38 |     """
39 |     Round-trip testing: Identity Matrix Markov Chain
40 |     Generate some trivial identity migrations
41 |     Check that the cohort estimator computes identity
42 | 
43 |     """
44 | 
45 |     def test_roundtrip_identity(self):
46 |         definition = [('0', "A"), ('1', "B"), ('2', "C"), ('3', "D")]
47 |         myState = tm.StateSpace(definition)
48 |         input_data = dataset_generators.long_format(myState, Identity, n=100, timesteps=2, mode='Canonical')
49 |         compact_data = to_compact(input_data)
50 |         cohort_data, cohort_bounds = tm.utils.bin_timestamps(compact_data, cohorts=1)
51 |         sorted_data = cohort_data.sort_values(['ID', 'Time'], ascending=[True, True])
52 |         myEstimator = es.CohortEstimator(states=myState, cohort_bounds=cohort_bounds,
53 |                                          ci={'method': 'goodman', 'alpha': 0.05})
54 |         result = myEstimator.fit(sorted_data, labels={'Time': 'Time', 'State': 'State', 'ID': 'ID'})
55 |         myMatrix = tm.TransitionMatrix(myEstimator.average_matrix)
56 | 
57 |         self.assertAlmostEqual(myMatrix[0, 0], 1.0, places=ACCURATE_DIGITS, msg=None, delta=None)
58 |         self.assertAlmostEqual(myMatrix[1, 1], 1.0, places=ACCURATE_DIGITS, msg=None, delta=None)
59 |         self.assertAlmostEqual(myMatrix[2, 2], 1.0, places=ACCURATE_DIGITS, msg=None, delta=None)
60 |         self.assertAlmostEqual(myMatrix[2, 2], 1.0, places=ACCURATE_DIGITS, msg=None, delta=None)
61 | 
62 |     """
63 |     Round-trip testing: Data Formats
64 |     Load a data set in compact format
65 |     Convert to canonical, back to compact and compare
66 | 
67 |     """
68 | 
69 |     def test_roundtrip_formats(self):
70 |         input_data = pd.read_csv(dataset_path + 'rating_data.csv')
71 |         canonical_data = to_canonical(input_data)
72 |         compact_data = to_compact(canonical_data)
73 | 
74 |         self.assertEqual(len(compact_data.compare(input_data)), 0, msg=None)
75 | 
76 | 
77 | if __name__ == "__main__":
78 |     unittest.main()
79 | 


--------------------------------------------------------------------------------
/examples/python/example_list.csv:
--------------------------------------------------------------------------------
 1 | Script Name,Flag,Input Data,Description
 2 | adjust_nr_state.py,1,,Adjust the NR (not-rated) statistics.
 3 | adjust_nr_state.py,2,,Adjust the NR (not-rated) statistics.
 4 | credit_curves.py,,,Compute and Visualize credit curves
 5 | characterize_datasets.py,,,Load the available datasets and compute various statistics
 6 | compare_estimators.py,,synthetic_data4.csv,Compare the cohort and aalen-johansen estimators on a discrete timestep sample
 7 | data_cleaning_example.py,,rating_data_raw.csv,Prepare transition data sets (data cleansing) using some provided methods
 8 | deterministic_paths.py,,,Create a transition dataset by replicating give trajectories through a graph
 9 | empirical_transition_matrix.py,1,synthetic_data7.csv,Credit Rating Migration example
10 | empirical_transition_matrix.py,2,synthetic_data8.csv,Simple 2x2 Matrix for testing
11 | empirical_transition_matrix.py,3,synthetic_data9.csv,Credit Rating Migration example with timestamps in raw date format
12 | estimate_matrix.py,,rating_data.csv,An end-to-end example of estimating a credit rating matrix from historical data
13 | fix_multiperiod_matrix.py,,sp_1981-2016.csv,Detect and solve various pathologies that might be affecting transition matrix data
14 | generate_full_multiperiod_set.py,,sp_NR_adjusted.json,Use infinitesimal generator methods to generate a full multi-period matrix set.
15 | generate_synthetic_data.py,1,, Generate synthetic data. The first set of examples produces duration type data.
16 | generate_synthetic_data.py,2,,The second set of examples produces cohort type data using markov chain simulation
17 | generate_synthetic_data.py,3,,The second set of examples produces cohort type data using markov chain simulation
18 | generate_visuals.py,6,JLT.json,Plot Transition Probabilities
19 | generate_visuals.py,7,JLT.json,Logarithmic Sankey Diagram of Credit Migration Rates
20 | generate_visuals.py,5,scenario_data.csv,Plot Entity Transitions Plot
21 | generate_visuals.py,1,synthetic_data1.csv,Step Plot of a single observation
22 | generate_visuals.py,4,synthetic_data3.csv,Entity Transitions Plot
23 | generate_visuals.py,2,synthetic_data4.csv,Step Plot of individual observations
24 | generate_visuals.py,3,synthetic_data5.csv,Histogram Plots of transition frequencies
25 | matrix_from_cohort_data.py,3,synthetic_data4.csv,S&P Style Credit Rating Migration Matrix
26 | matrix_from_cohort_data.py,2,synthetic_data5.csv,IFRS 9 Style Migration Matrix (Large sample for testing)
27 | matrix_from_cohort_data.py,1,synthetic_data6.csv,Simplest Absorbing Case for validation
28 | matrix_from_duration_data.py,1,synthetic_data1.csv,Duration example with limited data (dataset contains only one entity)
29 | matrix_from_duration_data.py,2,synthetic_data2.csv,"Duration example n entities with ~10 observations each, [0,1] state, 50%/50% transition matrix"
30 | matrix_from_duration_data.py,3,synthetic_data3.csv,
31 | matrix_lendingclub.py,,,Estimate a matrix from LendingClub data. Input data are in a special cohort format as the published datasets have some limitations
32 | matrix_operations.py,,,Perform various transition matrix operations illustrating the matrix algebra
33 | matrix_set_lendingclub.py,,,Estimate a matrix from LendingClub data. Input data are in a special cohort format as the published datasets have some limitations
34 | matrix_set_operations.py,,,Perform operations with multi-period transition matrix sequences
35 | state_space_operations.py,,,Examples working with state spaces (mappings)
36 | 


--------------------------------------------------------------------------------
/datasets/sp 2017.csv:
--------------------------------------------------------------------------------
 1 | AAA,87.05,5.78,2.56,0.69,0.16,0.24,0.13,0,0.05,0,0.03,0.05,0,0,0.03,0,0.05,0
 2 | AA+,2.42,77.53,11.54,3.78,0.76,0.4,0.2,0.05,0.1,0.05,0,0,0,0,0,0,0,0
 3 | AA,0.44,1.29,80.25,8.71,2.83,1.21,0.39,0.4,0.13,0.08,0.05,0.03,0.02,0.02,0,0.02,0.05,0.02
 4 | AA-,0.04,0.12,3.97,78.01,10.07,2.34,0.61,0.28,0.16,0.07,0.03,0,0,0.03,0.09,0,0,0.03
 5 | A+,0,0.06,0.48,4.58,77.51,9.1,2.29,0.66,0.35,0.09,0.06,0.1,0.01,0.07,0.03,0,0,0.05
 6 | A,0.04,0.05,0.24,0.46,5.26,78.04,7.04,2.57,0.93,0.29,0.12,0.11,0.08,0.1,0.02,0,0.02,0.06
 7 | A-,0.04,0.01,0.07,0.17,0.48,6.72,76.84,7.62,2.22,0.62,0.15,0.15,0.13,0.12,0.03,0.01,0.03,0.07
 8 | BBB+,0,0.01,0.06,0.07,0.23,0.86,7.26,74.4,8.41,1.8,0.41,0.34,0.15,0.18,0.12,0.03,0.07,0.12
 9 | BBB,0.01,0.01,0.05,0.03,0.11,0.34,1.12,7.68,75.01,6.41,1.41,0.66,0.3,0.25,0.13,0.04,0.06,0.17
10 | BBB-,0.01,0.01,0.02,0.05,0.06,0.16,0.31,1.26,9.11,71.63,5.85,2.18,0.92,0.41,0.25,0.17,0.23,0.26
11 | BB+,0.05,0,0,0.03,0.02,0.1,0.08,0.46,1.84,11.51,63.56,7.8,2.95,1.04,0.65,0.26,0.43,0.36
12 | BB,0,0,0.04,0.01,0,0.07,0.05,0.19,0.56,2.26,9.67,64.74,8.13,2.34,1.07,0.35,0.6,0.58
13 | BB-,0,0,0,0.01,0.01,0.01,0.05,0.11,0.25,0.39,1.87,9.34,63.09,8.64,3.19,0.83,0.75,1.05
14 | B+,0,0.01,0,0.03,0,0.03,0.07,0.05,0.06,0.12,0.31,1.51,8.07,63.14,8.91,2.55,1.76,2.15
15 | B,0,0,0.01,0.01,0,0.04,0.05,0.02,0.07,0.04,0.14,0.26,1.28,7.94,61.36,8.55,4.17,3.89
16 | B-,0,0,0,0,0.02,0.04,0,0.08,0.06,0.12,0.1,0.18,0.47,2.32,10.16,53.36,11.77,7.49
17 | CCC,0,0,0,0,0.03,0,0.1,0.06,0.06,0.06,0.03,0.16,0.44,1.08,2.73,9.11,43.97,26.78
18 | ,,,,,,,,,,,,,,,,,,
19 | ,,,,,,,,,,,,,,,,,,
20 | AAA,0.8705,0.0578,0.0256,0.0069,0.0016,0.0024,0.0013,0,0.0005,0,0.0003,0.0005,0,0,0.0003,0,0.0005,0
21 | AA+,0.0242,0.7753,0.1154,0.0378,0.0076,0.004,0.002,0.0005,0.001,0.0005,0,0,0,0,0,0,0,0
22 | AA,0.0044,0.0129,0.8025,0.0871,0.0283,0.0121,0.0039,0.004,0.0013,0.0008,0.0005,0.0003,0.0002,0.0002,0,0.0002,0.0005,0.0002
23 | AA-,0.0004,0.0012,0.0397,0.7801,0.1007,0.0234,0.0061,0.0028,0.0016,0.0007,0.0003,0,0,0.0003,0.0009,0,0,0.0003
24 | A+,0,0.0006,0.0048,0.0458,0.7751,0.091,0.0229,0.0066,0.0035,0.0009,0.0006,0.001,0.0001,0.0007,0.0003,0,0,0.0005
25 | A,0.0004,0.0005,0.0024,0.0046,0.0526,0.7804,0.0704,0.0257,0.0093,0.0029,0.0012,0.0011,0.0008,0.001,0.0002,0,0.0002,0.0006
26 | A-,0.0004,0.0001,0.0007,0.0017,0.0048,0.0672,0.7684,0.0762,0.0222,0.0062,0.0015,0.0015,0.0013,0.0012,0.0003,0.0001,0.0003,0.0007
27 | BBB+,0,0.0001,0.0006,0.0007,0.0023,0.0086,0.0726,0.744,0.0841,0.018,0.0041,0.0034,0.0015,0.0018,0.0012,0.0003,0.0007,0.0012
28 | BBB,0.0001,0.0001,0.0005,0.0003,0.0011,0.0034,0.0112,0.0768,0.7501,0.0641,0.0141,0.0066,0.003,0.0025,0.0013,0.0004,0.0006,0.0017
29 | BBB-,0.0001,0.0001,0.0002,0.0005,0.0006,0.0016,0.0031,0.0126,0.0911,0.7163,0.0585,0.0218,0.0092,0.0041,0.0025,0.0017,0.0023,0.0026
30 | BB+,0.0005,0,0,0.0003,0.0002,0.001,0.0008,0.0046,0.0184,0.1151,0.6356,0.078,0.0295,0.0104,0.0065,0.0026,0.0043,0.0036
31 | BB,0,0,0.0004,0.0001,0,0.0007,0.0005,0.0019,0.0056,0.0226,0.0967,0.6474,0.0813,0.0234,0.0107,0.0035,0.006,0.0058
32 | BB-,0,0,0,0.0001,0.0001,0.0001,0.0005,0.0011,0.0025,0.0039,0.0187,0.0934,0.6309,0.0864,0.0319,0.0083,0.0075,0.0105
33 | B+,0,0.0001,0,0.0003,0,0.0003,0.0007,0.0005,0.0006,0.0012,0.0031,0.0151,0.0807,0.6314,0.0891,0.0255,0.0176,0.0215
34 | B,0,0,0.0001,0.0001,0,0.0004,0.0005,0.0002,0.0007,0.0004,0.0014,0.0026,0.0128,0.0794,0.6136,0.0855,0.0417,0.0389
35 | B-,0,0,0,0,0.0002,0.0004,0,0.0008,0.0006,0.0012,0.001,0.0018,0.0047,0.0232,0.1016,0.5336,0.1177,0.0749
36 | CCC,0,0,0,0,0.0003,0,0.001,0.0006,0.0006,0.0006,0.0003,0.0016,0.0044,0.0108,0.0273,0.0911,0.4397,0.2678
37 | 


--------------------------------------------------------------------------------
/docs/source/basic_operations.rst:
--------------------------------------------------------------------------------
  1 | Basic Operations
  2 | ========================
  3 | 
  4 | The core TransitionMatrix object implements a typical (one period) transition matrix. It supports a variety of operations (more details are documented in the API section)
  5 | 
  6 | - Initialize a matrix (from data, predefined matrices etc)
  7 | - Validate a matrix
  8 | - Attempt to fix a matrix
  9 | - Compute generators, powers etc.
 10 | - Print a matrix
 11 | - Output to json/csv/xlsx formats
 12 | - Output to html format
 13 | 
 14 | 
 15 | Simple Operation Examples
 16 | ----------------------------------------
 17 | 
 18 | .. note:: The script examples/python/matrix_operations.py contains the below and plenty more simple single matrix examples
 19 | 
 20 | 
 21 | Initialize a matrix with values
 22 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 23 | 
 24 | There is a growing list of ways to initialize a transition matrix
 25 | 
 26 | * Initialize a generic matrix of dimension n
 27 | * Any list can be used for initialization (but not all shapes are valid transition matrices!)
 28 | * Any numpy array can be used for initialization (but not all are valid transition matrices!)
 29 | * Values can be loaded from json or csv files
 30 | * The transitionMatrix.creditratings.predefined module includes a number of predefined matrices
 31 | 
 32 | 
 33 | .. code::
 34 | 
 35 |     A = tm.TransitionMatrix(values=[[0.6, 0.2, 0.2], [0.2, 0.6, 0.2], [0.2, 0.2, 0.6]])
 36 |     print(A)
 37 |     A.print_matrix(format_type='Standard', accuracy=2)
 38 | 
 39 |     [[0.6 0.2 0.2]
 40 |      [0.2 0.6 0.2]
 41 |      [0.2 0.2 0.6]]
 42 | 
 43 |     0.60 0.20 0.20
 44 |     0.20 0.60 0.20
 45 |     0.20 0.20 0.60
 46 | 
 47 |     A.print_matrix(format_type='Standard', accuracy=2)
 48 | 
 49 |     60.0% 20.0% 20.0%
 50 |     20.0% 60.0% 20.0%
 51 |     20.0% 20.0% 60.0%
 52 | 
 53 | Both the intrinsic print function and the specific print_matrix will print you the matrix, but the print_matrix method clearly aims to present the values in a more legible formats.
 54 | 
 55 | 
 56 | General Matrix Algebra
 57 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 58 | .. note:: All standard numerical matrix operations are available as per the numpy API.
 59 | 
 60 | Some example operations that leverage the underlying numpy API:
 61 | 
 62 | .. code::
 63 | 
 64 |     E = tm.TransitionMatrix(values=[[0.75, 0.25], [0.0, 1.0]])
 65 |     print(E.validate())
 66 |     # ATTRIBUTES
 67 |     # Getting matrix info (dimensions, shape)
 68 |     print(E.ndim)
 69 |     print(E.shape)
 70 |     # Obtain the matrix transpose
 71 |     print(E.T)
 72 |     # Obtain the matrix inverse
 73 |     print(E.I)
 74 |     # Summation methods:
 75 |     # - along columns
 76 |     print(E.sum(0))
 77 |     # - along rows
 78 |     print(E.sum(1))
 79 |     # Multiplying all elements of a matrix by a scalar
 80 |     print(0.01 * A)
 81 |     # Transition Matrix algebra is very intuitive
 82 |     print(A * A)
 83 |     print(A ** 2)
 84 |     print(A ** 10)
 85 | 
 86 | 
 87 | Validating, Fixing and Characterizing a matrix
 88 | -----------------------------------------------------------
 89 | 
 90 | Validate a Matrix
 91 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 92 | 
 93 | The validate() method of the object checks for required properties of a valid transition matrix:
 94 | 
 95 |     1. check squareness
 96 |     2. check that all values are probabilities (between 0 and 1)
 97 |     3. check that all rows sum to one
 98 | 
 99 | .. code::
100 | 
101 |     C = tm.TransitionMatrix(values=[1.0, 3.0])
102 |     print(C.validate())
103 | 
104 |     [('Matrix Dimensions Differ: ', (1, 2))]
105 | 
106 | 
107 | Characterise a Matrix
108 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
109 | 
110 | The characterise() method attempts to characterise a matrix
111 | 
112 |     1. diagonal dominance


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | [![Documentation Status](https://readthedocs.org/projects/transitionmatrix/badge/?version=latest)](https://transitionmatrix.readthedocs.io/en/latest/?badge=latest)
 2 | ![made-with-python](https://img.shields.io/badge/Made%20with-Python-1f425f.svg)
 3 | [![GitHub license](https://img.shields.io/github/license/Naereen/StrapDown.js.svg)](https://github.com/Naereen/StrapDown.js/blob/master/LICENSE)
 4 | [![Percentage of issues still open](http://isitmaintained.com/badge/open/Naereen/badges.svg)](http://isitmaintained.com/project/Naereen/badges "Percentage of issues still open")
 5 | 
 6 | 
 7 | Intro
 8 | =========================
 9 | transitionMatrix is a Python powered library for the statistical analysis and visualization of state transition phenomena. It can be used to analyze any dataset that captures timestamped transitions in a discrete state space. Use cases include credit rating transitions, system state event logs etc. 
10 | 
11 | You can use transitionMatrix to
12 | 
13 | - Estimate transition matrices from historical event data using a variety of estimators
14 | - Manipulate transition matrices (generators, comparisons etc.)
15 | - Visualize event data and transition matrices
16 | - Provide standardized data sets for testing
17 | - Model transitions using threshold processes
18 | - Map credit ratings using mapping tables between popularly used rating systems 
19 | 
20 | Key Information
21 | ================
22 | 
23 | * Author: [Open Risk](http://www.openriskmanagement.com)
24 | * License: Apache 2.0
25 | * Code Documentation: [Read The Docs](https://transitionmatrix.readthedocs.io/en/latest/index.html)
26 | * Mathematical Documentation: [Open Risk Manual](https://www.openriskmanual.org/wiki/Transition_Matrix)
27 | * Development website: [Github](https://github.com/open-risk/transitionMatrix)
28 | * Project Chat: [Open Risk Commons](https://www.openriskcommons.org/c/open-source/transitionmatrix/15)
29 | 
30 | **NB: transitionMatrix is still in active development. If you encounter issues or have suggestions please raise them in our github repository or come discuss at our discourse server**
31 | 
32 | Support and Training
33 | =========================
34 | 
35 | * The Open Risk Academy has free courses demonstrating the use of the library. The current list is: 
36 |     * [Analysis of Credit Migration using Python TransitionMatrix](https://www.openriskacademy.com/course/view.php?id=38)
37 | * Support for transitionMatrix and other open source libraries developed by [Open Risk](https://www.openriskmanagement.com) is available upon request
38 | 
39 | 
40 | Examples
41 | ========
42 | 
43 | The [code documentation](https://transitionmatrix.readthedocs.io/en/latest/index.html) includes a large number of examples, jupyter notebooks and more. 
44 | 
45 | 
46 | Plotting individual transition trajectories
47 | 
48 | ![single entity](examples/single_entity.png)
49 | 
50 | Sampling transition data
51 | 
52 | ![sampled histories](examples/sampled_histories.png)
53 | 
54 | Estimation of transition matrices using cohort methods
55 | 
56 | ![estimation](examples/estimation.png)
57 | 
58 | Estimation of transition matrices using duration methods
59 | 
60 | ![transition probabilities](examples/transition_probabilities.png)
61 | 
62 | Visualization of a transition matrix
63 | 
64 | ![transition matrix](examples/TransitionMatrix.png)
65 | 
66 | Visualization using a Logarithmic Sankey diagram
67 | 
68 | ![logarithmic sankey](examples/sankey.png)
69 | 
70 | Generating stochastic process transition thresholds
71 | 
72 | ![thresholds](../portfolioAnalytics/examples/Thresholds.png)
73 | 
74 | Stressing Transition Matrices
75 | 
76 | ![stressing transition matrices](../portfolioAnalytics/examples/stressed_density.png)
77 | 
78 | Computation and Visualization of Credit Curves
79 | 
80 | ![credit curves](examples/credit_curves.png)
81 | 
82 | Working with credit states
83 | 
84 | ![image](examples/scale_conversions.png)
85 | 
86 | 


--------------------------------------------------------------------------------
/tests/test_cohort_estimator.py:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | # (c) 2017-2024 Open Risk, all rights reserved
 4 | #
 5 | # TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included
 6 | # in the source distribution of TransitionMatrix. This is notwithstanding any licenses of
 7 | # third-party software included in this distribution. You may not use this file except in
 8 | # compliance with the License.
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software distributed under
11 | # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
12 | # either express or implied. See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import unittest
16 | 
17 | import pandas as pd
18 | 
19 | import transitionMatrix as tm
20 | from transitionMatrix import source_path
21 | from transitionMatrix.estimators import cohort_estimator as es
22 | 
23 | ACCURATE_DIGITS = 2
24 | 
25 | 
26 | class TestSimpleEstimator(unittest.TestCase):
27 |     pass
28 | 
29 | 
30 | class TestCohortEstimator(unittest.TestCase):
31 |     """
32 |     Test the estimation of a simple 3x3 transition matrix with absorbing state
33 | 
34 |     .. note: The result is subject to sampling error! Ensure the required accuracy corresponds to the input data size
35 | 
36 |     """
37 | 
38 |     def test_cohort_estimator_counts(self):
39 |         """
40 |         Test that the total counts constructed by the estimator is the same as the event count in the dataset
41 | 
42 |         """
43 |         dataset_path = source_path + "datasets/"
44 |         data = pd.read_csv(dataset_path + 'synthetic_data5.csv')
45 |         event_count = data['ID'].count()
46 |         # event_count = data[data['Time'] < 4]['ID'].count()
47 |         definition = [('0', "Stage 1"), ('1', "Stage 2"), ('2', "Stage 3")]
48 |         myState = tm.StateSpace(definition)
49 |         sorted_data = data.sort_values(['ID', 'Time'], ascending=[True, True])
50 |         myEstimator = es.CohortEstimator(states=myState, cohort_bounds=[0, 1, 2, 3, 4],
51 |                                          ci={'method': 'goodman', 'alpha': 0.05})
52 |         result = myEstimator.fit(sorted_data)
53 |         self.assertEqual(event_count, myEstimator.counts)
54 | 
55 |     def test_cohort_estimator_matrix(self):
56 |         """
57 |         Test that the estimated matrix is same as the matrix that was used to generate the data
58 | 
59 |         matrix = [[0.8, 0.15, 0.05],
60 |                   [0.1, 0.7, 0.2],
61 |                   [0.0, 0.0, 1.0]]
62 | 
63 |         """
64 |         dataset_path = source_path + "datasets/"
65 |         data = pd.read_csv(dataset_path + 'synthetic_data5.csv')
66 |         definition = [('0', "Stage 1"), ('1', "Stage 2"), ('2', "Stage 3")]
67 |         myState = tm.StateSpace(definition)
68 |         sorted_data = data.sort_values(['ID', 'Time'], ascending=[True, True])
69 |         myEstimator = es.CohortEstimator(states=myState, cohort_bounds=[0, 1, 2, 3, 4],
70 |                                          ci={'method': 'goodman', 'alpha': 0.05})
71 |         result = myEstimator.fit(sorted_data)
72 |         am = myEstimator.average_matrix
73 |         self.assertAlmostEqual(am[0, 0], 0.8, places=ACCURATE_DIGITS, msg=None, delta=None)
74 |         self.assertAlmostEqual(am[0, 1], 0.15, places=ACCURATE_DIGITS, msg=None, delta=None)
75 |         self.assertAlmostEqual(am[0, 2], 0.05, places=ACCURATE_DIGITS, msg=None, delta=None)
76 |         self.assertAlmostEqual(am[1, 0], 0.1, places=ACCURATE_DIGITS, msg=None, delta=None)
77 |         self.assertAlmostEqual(am[1, 1], 0.7, places=ACCURATE_DIGITS, msg=None, delta=None)
78 |         self.assertAlmostEqual(am[1, 2], 0.2, places=ACCURATE_DIGITS, msg=None, delta=None)
79 |         self.assertAlmostEqual(am[2, 0], 0.0, places=ACCURATE_DIGITS, msg=None, delta=None)
80 |         self.assertAlmostEqual(am[2, 1], 0.0, places=ACCURATE_DIGITS, msg=None, delta=None)
81 |         self.assertAlmostEqual(am[2, 2], 1.0, places=ACCURATE_DIGITS, msg=None, delta=None)
82 | 


--------------------------------------------------------------------------------
/examples/python/generate_full_multiperiod_set.py:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | 
  3 | # (c) 2017-2024 Open Risk, all rights reserved
  4 | #
  5 | # TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included
  6 | # in the source distribution of TransitionMatrix. This is notwithstanding any licenses of
  7 | # third-party software included in this distribution. You may not use this file except in
  8 | # compliance with the License.
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software distributed under
 11 | # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
 12 | # either express or implied. See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | 
 16 | """ Example of using the transitionMatrix data generator methods to generate a full multi-period matrix set
 17 | The input data are processed Standard and Poor's matrices for a selection of cumulative observation points
 18 | 
 19 | .. note:: This example requires a substantial amount of custom code!
 20 | 
 21 | """
 22 | 
 23 | from scipy.linalg import expm
 24 | 
 25 | import transitionMatrix as tm
 26 | from transitionMatrix import source_path
 27 | 
 28 | dataset_path = source_path + "datasets/"
 29 | 
 30 | print("> Loading multi-period transitional matrices (cumulative mode) from json file")
 31 | SnP_Set0 = tm.TransitionMatrixSet(json_file=dataset_path + "sp_NR_adjusted.json", temporal_type='Cumulative')
 32 | print("> Validate")
 33 | print(SnP_Set0.validate())
 34 | # SnP_Set0.print(format='Percent')
 35 | 
 36 | print("> Set the timesteps at which we have matrix observations")
 37 | # We skip the 15 and 20 year time points as they require further processing
 38 | SnP_Set0.timesteps = [1, 2, 3, 5, 7, 10]
 39 | print(SnP_Set0.timesteps)
 40 | 
 41 | # we will store the results here
 42 | timesteps = SnP_Set0.timesteps[len(SnP_Set0.timesteps) - 1]
 43 | SnP = tm.TransitionMatrixSet(dimension=8, periods=timesteps)
 44 | 
 45 | print("> Fill in the gaps between periods")
 46 | t_list = SnP_Set0.timesteps
 47 | # TODO Assumption is that first entry starts at 1
 48 | # First matrix
 49 | ts = 1
 50 | SnP.entries[ts - 1] = SnP_Set0.entries[0]
 51 | # Loop over timestep list
 52 | for k in t_list:
 53 |     i = t_list.index(k)
 54 |     # While not at the final matrix
 55 |     if i < len(t_list) - 1:
 56 |         # compute the gap period
 57 |         gap = t_list[i + 1] - t_list[i]
 58 |         # If the gap to next timestep is larger than one period
 59 |         if gap > 1:
 60 |             # Divide right matrix by left matrix to derive forward gap transition matrix the for gap-periods
 61 |             lm = SnP_Set0.entries[i]
 62 |             lm.fix_rowsums()
 63 |             rm = SnP_Set0.entries[i + 1]
 64 |             rm.fix_rowsums()
 65 |             # TODO Fix Negative probabilities for gap transition matrix
 66 |             q = rm * lm.I
 67 |             q.fix_rowsums()
 68 |             #   From gap transition matrix derive gap one-year matrices (via generator)
 69 |             #   Fill in gap years with cumulative matrices
 70 |             q.fix_negativerates()
 71 |             G = q.generator(t=gap)
 72 |             for gap_period in range(1, gap + 1):
 73 |                 gm = expm(gap_period * G)
 74 |                 cm = gm * lm
 75 |                 cm.fix_negativerates()
 76 |                 ts += 1
 77 |                 SnP.entries[ts - 1] = cm
 78 |         # There is no gap, store matrix as is
 79 |         else:
 80 |             ts += 1
 81 |             SnP.entries[ts - 1] = SnP_Set0.entries[i + 1]
 82 |     # Final matrix
 83 |     else:
 84 |         ts = timesteps
 85 |         SnP.entries[ts - 1] = SnP_Set0.entries[i]
 86 | 
 87 | SnP.timesteps = t_list
 88 | SnP.temporal_type = 'Cumulative'
 89 | SnP.print_matrix(accuracy=4)
 90 | # TODO Handle strictly zero transition probabilities
 91 | # TODO Handle non-monotonic transition probabilities
 92 | SnP.to_json(dataset_path + "sp_multiperiod.json", accuracy=8)
 93 | 
 94 | 
 95 | def main():
 96 |     print("Done")
 97 | 
 98 | 
 99 | if __name__ == "__main__":
100 |     main()
101 | 


--------------------------------------------------------------------------------
/docs/source/roadmap.rst:
--------------------------------------------------------------------------------
  1 | Roadmap
  2 | =========================
  3 | 
  4 | transitionMatrix is an ongoing project. Several significant extensions are already in the pipeline. transitionMatrix aims to become the most intuitive and versatile tool to analyse discrete transition data. The **Roadmap** lays out upcoming steps / milestones in this journey. The **Todo** list is a more granular collection of outstanding items.
  5 | 
  6 | You are welcome to contribute to the development of transitionMatrix by creating Issues or Pull Requests on the github repository. Feature requests, bug reports and any other issues are welcome to log at the `Github Repository <https://github.com/open-risk/transitionMatrix/issues>`_
  7 | 
  8 | Discussing general usage of the library is `happening here <https://www.openriskcommons.org/t/analysis-of-credit-migration-using-python-transitionmatrix/74>`_
  9 | 
 10 | 
 11 | 0.5
 12 | --------------------------
 13 | The 0.5 will be the next major release (still considered alpha) that will be available e.g. on PyPI
 14 | 
 15 | 
 16 | 0.4.X
 17 | --------------------------
 18 | 
 19 | The 0.4.X family of updates will focus on rounding out and (above all) documenting a number of functionalities already introduced
 20 | 
 21 | 
 22 | Todo List
 23 | =========================
 24 | 
 25 | A list of todo items, no triaging / prioritisation implied
 26 | 
 27 | Core Architecture and API
 28 | ---------------------------------------------------
 29 | 
 30 | - Introduce exceptions / error handling throughout
 31 | - Solve numpy.matrix deprecation (implement equivalent API in terms of ndarray)
 32 | - Complete testing framework
 33 | 
 34 | Input Data Preprocessing
 35 | ---------------------------------------------------
 36 | 
 37 | - Handing of markov chain transition formats (single entity)
 38 | - Native handling of Wide Data Formats (concrete data sets missing)
 39 | - Generalize cohorting algorithm to user specified function
 40 | 
 41 | Reference Data
 42 | ---------------------------------------------------
 43 | 
 44 | - Additional credit rating scales (e.g short term ratings)
 45 | - Integration with credit rating ontology
 46 | 
 47 | 
 48 | Transition Matrix Analysis Functionality
 49 | ---------------------------------------------------
 50 | 
 51 | - Further validation and characterisation of transition matrices (mobility indexes)
 52 | - Generate random matrix subject to constraints
 53 | - Fixing common problems encountered by empirically estimated transition matrices
 54 | 
 55 | Statistical Analysis Functionality
 56 | ---------------------------------------------------
 57 | 
 58 | - Aalen Johansen Estimator
 59 |     - Covariance calculation
 60 |     - Various other improvements / tests
 61 | - Cohort Estimator
 62 |     - Read Data by labels
 63 |     - Edge cases
 64 | - Kaplan Meier Estimator NEW
 65 |     - (link to survival frameworks)
 66 | - Duration based methods
 67 | - Bootstrap based confidence intervals
 68 | 
 69 | 
 70 | State Space package
 71 | ---------------------------------------------------
 72 | 
 73 | - Multiple absorbing states (competing risks)
 74 | - Automated coarsening of states (merging of similar)
 75 | 
 76 | Credit Rating Related
 77 | ---------------------------------------------------
 78 | - Import data defined according to CRO ontology
 79 | - Absorbing State Identification, Competing Risks
 80 | - Compute hazard rates
 81 | - Characterize hazard rates
 82 | 
 83 | 
 84 | Utilities
 85 | ---------------------------------------------------
 86 | 
 87 | - Continuous time data generation from arbitrary chain
 88 | 
 89 | Further Refactoring of packages
 90 | ---------------------------------------------------
 91 | 
 92 | - Introduce visualization objects / API
 93 | 
 94 | 
 95 | Performance / Big data
 96 | ---------------------------------------------------
 97 | 
 98 | - Handling very large data sets, moving away from in-memory processing
 99 | 
100 | 
101 | Documentation
102 | ---------------------------------------------------
103 | - Sphinx documentation (complete)
104 | - Expand the jupyter notebook collection to (at least) match the standalone scripts
105 | 
106 | Releases / Distribution
107 | ---------------------------------------------------
108 | 
109 | - Adopt regular github/PyPI release schedule
110 | - Conda distribution
111 | 
112 | 


--------------------------------------------------------------------------------
/examples/python/matrix_operations.py:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | 
  3 | # (c) 2017-2024 Open Risk, all rights reserved
  4 | #
  5 | # TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included
  6 | # in the source distribution of TransitionMatrix. This is notwithstanding any licenses of
  7 | # third-party software included in this distribution. You may not use this file except in
  8 | # compliance with the License.
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software distributed under
 11 | # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
 12 | # either express or implied. See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | 
 16 | """
 17 | Examples using transitionMatrix to perform various transition matrix operations.
 18 | 
 19 | """
 20 | 
 21 | import numpy as np
 22 | from scipy.linalg import expm
 23 | 
 24 | import transitionMatrix as tm
 25 | from transitionMatrix import dataset_path
 26 | from transitionMatrix.creditratings.predefined import JLT
 27 | 
 28 | print("> Initialize a 3x3 matrix with values")
 29 | A = tm.TransitionMatrix(values=[[0.6, 0.2, 0.2], [0.2, 0.6, 0.2], [0.2, 0.2, 0.6]])
 30 | print(A)
 31 | A.print_matrix(format_type='Standard', accuracy=2)
 32 | A.print_matrix(format_type='Percent', accuracy=1)
 33 | 
 34 | print("> Initialize a generic matrix of dimension n")
 35 | B = tm.TransitionMatrix(dimension=4)
 36 | print(B)
 37 | 
 38 | print("> Any list can be used for initialization (but not all shapes are valid transition matrices!)")
 39 | C = tm.TransitionMatrix(values=[1.0, 3.0])
 40 | print(C)
 41 | 
 42 | print("> Any numpy array can be used for initialization (but not all are valid transition matrices!)")
 43 | D = tm.TransitionMatrix(values=np.identity(5))
 44 | print(D)
 45 | 
 46 | print("> Values can be loaded from json or csv files")
 47 | F = tm.TransitionMatrix(json_file=dataset_path + "JLT.json")
 48 | print(F)
 49 | 
 50 | print("> Validate that a matrix satisfies probability matrix properties")
 51 | print(A.validate())
 52 | print(B.validate())
 53 | print(C.validate())
 54 | print(D.validate())
 55 | print(F.validate())
 56 | 
 57 | print("> All the numpy.matrix / ndarray functionality is available")
 58 | E = tm.TransitionMatrix(values=[[0.75, 0.25], [0.0, 1.0]])
 59 | print(E.validate())
 60 | # ATTRIBUTES
 61 | # Getting matrix info (dimensions, shape)
 62 | print(E.ndim)
 63 | print(E.shape)
 64 | # Obtain the matrix transpose
 65 | print(E.T)
 66 | # Obtain the matrix inverse
 67 | print(E.I)
 68 | # Summation methods:
 69 | # - along columns
 70 | print(E.sum(0))
 71 | # - along rows
 72 | print(E.sum(1))
 73 | 
 74 | print("> Multiplying all elements of a matrix by a scalar")
 75 | print(0.01 * A)
 76 | 
 77 | print("> Transition Matrix algebra is very intuitive")
 78 | print(A * A)
 79 | print(A ** 2)
 80 | print(A ** 10)
 81 | 
 82 | print("> Lets fix the invalid matrix C")
 83 | # numpy operations that return numpy arrays can be used as follows:
 84 | C = tm.TransitionMatrix(values=np.resize(C, (2, 2)))
 85 | C[0, 1] = 0.0
 86 | C[1, 0] = 0.0
 87 | C[1, 1] = 1.0
 88 | print(C.validate())
 89 | 
 90 | print("> Computing the generator of a transition matrix")
 91 | # Generator of A
 92 | G = A.generator()
 93 | print(A, expm(G))
 94 | 
 95 | print("> Transition matrices properties can be analyzed")
 96 | print(A.characterize())
 97 | 
 98 | print("> Lets look at a realistic example from the JLT paper")
 99 | # Reproduce JLT Generator
100 | # We load it using different sources
101 | E = tm.TransitionMatrix(values=JLT)
102 | E_2 = tm.TransitionMatrix(json_file=dataset_path + "JLT.json")
103 | E_3 = tm.TransitionMatrix(csv_file=dataset_path + "JLT.csv")
104 | # Lets check there are no errors
105 | Error = E - E_3
106 | print(np.linalg.norm(Error))
107 | print("> Lets look at validation and generators")
108 | # Empirical matrices will not satisfy constraints exactly
109 | print(E.validate(accuracy=1e-3))
110 | print(E.characterize())
111 | print(E.generator())
112 | Error = E - expm(E.generator())
113 | # Frobenious norm
114 | print(np.linalg.norm(Error))
115 | # L1 norm
116 | print(np.linalg.norm(Error, 1))
117 | 
118 | print("> Use pandas style API for saving to files")
119 | E.to_csv("JLT.csv")
120 | E.to_json("JLT.json")
121 | 
122 | 
123 | def main():
124 |     print("Done")
125 | 
126 | 
127 | if __name__ == "__main__":
128 |     main()
129 | 


--------------------------------------------------------------------------------
/transitionMatrix/utils/converters.py:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | 
  3 | # (c) 2017-2024 Open Risk (https://www.openriskmanagement.com)
  4 | #
  5 | # TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included
  6 | # in the source distribution of TransitionMatrix. This is notwithstanding any licenses of
  7 | # third-party software included in this distribution. You may not use this file except in
  8 | # compliance with the License.
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software distributed under
 11 | # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
 12 | # either express or implied. See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | """ Converter utilities to help switch between various formats """
 16 | 
 17 | import pandas as pd
 18 | import numpy as np
 19 | 
 20 | 
 21 | def frame_to_array(dataframe):
 22 |     """
 23 |     Convert pandas to numpy array
 24 |     :param dataframe:
 25 |     :return:
 26 |     """
 27 |     event_count = dataframe.shape[0]
 28 |     entity_id = np.empty(event_count, int)
 29 |     entity_state = np.empty(event_count, int)
 30 |     event_time = np.empty(event_count, float)
 31 | 
 32 |     i = 0
 33 |     for row in dataframe.itertuples(index=False):
 34 |         entity_id[i] = row.ID
 35 |         event_time[i] = row.Time
 36 |         entity_state[i] = row.State
 37 |         i += 1
 38 |     return entity_id, event_time, entity_state
 39 | 
 40 | 
 41 | def datetime_to_float(dataframe, time_column='Time', format=None):
 42 |     """datetime_to_float() converts dates from string format to the canonical float format
 43 | 
 44 |     :param time_column: the column label of the observation times
 45 |     :param dataframe: Pandas dataframe with dates in string format
 46 |     :return: Pandas dataframe with dates in float format
 47 |     :rtype: object
 48 | 
 49 |     .. note:: The date string must be recognizable by the pandas to_datetime function.
 50 | 
 51 |     """
 52 | 
 53 |     dataframe[time_column] = dataframe[time_column].apply(
 54 |         lambda x: (pd.to_datetime(x, format=format)))
 55 | 
 56 |     # Find the start and end dates of the sample
 57 |     start_date = dataframe[time_column].min()
 58 |     end_date = dataframe[time_column].max()
 59 |     # Find the total days in the sample
 60 |     total_days = (pd.to_datetime(end_date) - pd.to_datetime(start_date)).days
 61 | 
 62 |     # Apply the transformation
 63 |     # If total_days == 0 simply set to zero
 64 |     if total_days > 0:
 65 |         dataframe[time_column] = dataframe[time_column].apply(
 66 |             lambda x: (pd.to_datetime(x) - pd.to_datetime(start_date)).days / total_days)
 67 |     else:
 68 |         dataframe[time_column] = dataframe[time_column].apply(
 69 |             lambda x: 0.0
 70 |         )
 71 | 
 72 |     return [start_date, end_date, total_days], dataframe
 73 | 
 74 | 
 75 | def to_canonical(dataframe):
 76 |     """to_canonical() converts a dataframe that is in compact form into a canonical form
 77 | 
 78 |     :param dataframe:
 79 |     :return: dataframe
 80 | 
 81 |     """
 82 | 
 83 |     event_count = dataframe.shape[0]
 84 |     entity_id = np.empty(event_count, int)
 85 |     state = np.empty(event_count, int)
 86 |     event_from_state = np.empty(event_count, int)
 87 |     event_to_state = np.empty(event_count, int)
 88 |     event_time = np.empty(event_count, float)
 89 | 
 90 |     i = 0
 91 |     for row in dataframe.itertuples(index=False):
 92 |         entity_id[i] = row.ID
 93 |         event_time[i] = row.Time
 94 |         state[i] = row.State
 95 |         i += 1
 96 | 
 97 |     rows = []
 98 |     # boostrap first event
 99 |     i = 0
100 |     event_from_state[i] = state[i]
101 |     event_to_state[i] = state[i]
102 |     rows.append((entity_id[i], event_time[i], event_from_state[i], event_to_state[i]))
103 |     for i in range(1, event_count):
104 |         if entity_id[i - 1] == entity_id[i]:  # same entity transition
105 |             event_from_state[i] = event_to_state[i - 1]
106 |             event_to_state[i] = state[i]
107 |         else:  # new entity
108 |             event_from_state[i] = state[i]
109 |             event_to_state[i] = state[i]
110 | 
111 |         rows.append((entity_id[i], event_time[i], event_from_state[i], event_to_state[i]))
112 |     return pd.DataFrame(rows, columns=['ID', 'Time', 'From', 'To'])
113 | 
114 | 
115 | def to_compact(dataframe):
116 |     """to_compact() converts a dataframe that is in canonical form into a compact form
117 | 
118 |     :param dataframe:
119 |     :return: dataframe
120 | 
121 |     """
122 | 
123 |     data = dataframe.drop(['From'], axis=1)
124 |     data.rename(columns={'To': 'State'}, inplace=True)
125 | 
126 |     return data
127 | 


--------------------------------------------------------------------------------
/docs/source/description.rst:
--------------------------------------------------------------------------------
  1 | The transitionMatrix Library
  2 | =============================
  3 | 
  4 | .. image:: ../../examples/overview.png
  5 | 
  6 | transitionMatrix is a pure Python powered library for the statistical analysis and visualization of state transition phenomena. It can be used to analyze any dataset that captures *timestamped transitions in a discrete state space.*
  7 | 
  8 | 
  9 | * Author: `Open Risk <http://www.openriskmanagement.com>`_
 10 | * License: Apache 2.0
 11 | * Development Website: `Github <https://github.com/open-risk/transitionMatrix>`_
 12 | * Code Documentation: `Read The Docs <https://transitionmatrix.readthedocs.io/en/latest/>`_
 13 | * Mathematical Documentation: `Open Risk Manual <https://www.openriskmanual.org/wiki/Category:Transition_Matrix>`_
 14 | * Chat: `Open Risk Commons <https://www.openriskcommons.org/c/open-source/transitionmatrix/15>`_
 15 | * Training: `Open Risk Academy <https://www.openriskacademy.com/login/index.php>`_
 16 | * Showcase: `Blog Posts <https://www.openriskmanagement.com/tags/transition-matrix/>`_
 17 | 
 18 | Functionality
 19 | -------------
 20 | 
 21 | You can use transitionMatrix to:
 22 | 
 23 | - **Estimate** transition matrices from historical event data using a variety of estimators
 24 | - **Characterise** transition matrices (identify their key properties)
 25 | - **Visualize** event data and transition matrices
 26 | - **Manipulate** transition matrices (derive generators, perform comparisons, stress transition rates etc.)
 27 | - Access standardized Datasets for testing
 28 | - Extract and work with credit default curves (absorbing states)
 29 | - Map credit ratings using mapping tables
 30 | - More (still to be documented :-)
 31 | 
 32 | Architecture
 33 | ------------
 34 | 
 35 | * transitionMatrix provides intuitive objects for handling transition matrices individually and as sets (based on numpy arrays)
 36 | * supports file input/output in json and csv formats
 37 | * it has a powerful API for handling event data (based on pandas and numpy)
 38 | * supports visualization using matplotlib
 39 | 
 40 | 
 41 | Installation
 42 | =======================
 43 | 
 44 | You can install and use the transitionMatrix package in any system that supports the `Scipy ecosystem of tools <https://scipy.org/install.html>`_
 45 | 
 46 | Dependencies
 47 | -----------------
 48 | 
 49 | - TransitionMatrix requires Python 3 (currently 3.7)
 50 | - It depends on numerical and data processing Python libraries (Numpy, Scipy, Pandas).
 51 | - The Visualization API depends on Matplotlib.
 52 | - The precise dependencies are listed in the requirements.txt file.
 53 | - TransitionMatrix may work with earlier versions of python / these packages but it is not tested.
 54 | 
 55 | From PyPI
 56 | -------------
 57 | 
 58 | .. code:: bash
 59 | 
 60 |     pip3 install transitionMatrix
 61 | 
 62 | From sources
 63 | -------------
 64 | 
 65 | Download the sources in your preferred directory:
 66 | 
 67 | .. code:: bash
 68 | 
 69 |     git clone https://github.com/open-risk/transitionMatrix
 70 | 
 71 | 
 72 | Using virtualenv
 73 | ----------------
 74 | 
 75 | It is advisable to install the package in a virtualenv so as not to interfere with your system's python distribution
 76 | 
 77 | .. code:: bash
 78 | 
 79 |     virtualenv -p python3 tm_test
 80 |     source tm_test/bin/activate
 81 | 
 82 | If you do not have pandas already installed make sure you install it first (this will also install numpy and other required dependencies).
 83 | 
 84 | .. code:: bash
 85 | 
 86 |     pip3 install -r requirements.txt
 87 | 
 88 | Finally issue the install command and you are ready to go!
 89 | 
 90 | .. code:: bash
 91 | 
 92 |     python3 setup.py install
 93 | 
 94 | File structure
 95 | -----------------
 96 | The distribution has the following structure:
 97 | 
 98 | ::
 99 | 
100 |     | transitionMatrix/     Directory with the library source code
101 |     | -- model.py           File with main data structures
102 |     | -- estimators/        Directory with the estimator methods
103 |     | -- statespaces/       Directory with state space objects and methods
104 |     | -- creditratings/     Directory with predefined credit rating structures
105 |     | -- generators/        Directory with data generator methods
106 |     | -- utils/             Directory with helper classes and methods
107 |     | -- examples/          Directory with usage examples
108 |     | ---- python/          Examples as standalone python scripts
109 |     | ---- notebooks/       Examples as jupyter notebooks
110 |     | -- datasets/          Directory with a variety of datasets useful for getting started
111 |     | -- tests/             Directory with the testing suite
112 | 
113 | 
114 | Other similar open source software
115 | -----------------------------------
116 | 
117 | - etm, an R package for estimating empirical transition matrices
118 | - msSurv, an R Package for Nonparametric Estimation of Multistate Models
119 | - msm, Multi-state modelling with R
120 | - mstate, competing risks and multistate models in R
121 | - lifelines, python survival package
122 | 


--------------------------------------------------------------------------------
/tests/test_model.py:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | 
  3 | # (c) 2017-2024 Open Risk, all rights reserved
  4 | #
  5 | # TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included
  6 | # in the source distribution of TransitionMatrix. This is notwithstanding any licenses of
  7 | # third-party software included in this distribution. You may not use this file except in
  8 | # compliance with the License.
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software distributed under
 11 | # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
 12 | # either express or implied. See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | 
 16 | import unittest
 17 | 
 18 | import pandas as pd
 19 | from scipy.linalg import expm
 20 | 
 21 | import transitionMatrix as tm
 22 | from transitionMatrix import source_path
 23 | 
 24 | ACCURATE_DIGITS = 7
 25 | 
 26 | 
 27 | class TestTransitionMatrix(unittest.TestCase):
 28 |     '''
 29 |     Default instance (2x2 identity matrix)
 30 |     '''
 31 |     def test_instantiate_matrix(self):
 32 |         a = tm.TransitionMatrix()
 33 |         self.assertAlmostEqual(a[0, 0], 1.0, places=ACCURATE_DIGITS, msg=None, delta=None)
 34 |         self.assertAlmostEqual(a[0, 1], 0.0, places=ACCURATE_DIGITS, msg=None, delta=None)
 35 |         self.assertAlmostEqual(a[1, 0], 0.0, places=ACCURATE_DIGITS, msg=None, delta=None)
 36 |         self.assertAlmostEqual(a[1, 1], 1.0, places=ACCURATE_DIGITS, msg=None, delta=None)
 37 | 
 38 |         b = tm.TransitionMatrix([[1.0, 3.0], [1.0, 4.0]])
 39 |         self.assertAlmostEqual(b[0, 0], 1.0, places=ACCURATE_DIGITS, msg=None, delta=None)
 40 |         self.assertAlmostEqual(b[0, 1], 3.0, places=ACCURATE_DIGITS, msg=None, delta=None)
 41 |         self.assertAlmostEqual(b[1, 0], 1.0, places=ACCURATE_DIGITS, msg=None, delta=None)
 42 |         self.assertAlmostEqual(b[1, 1], 4.0, places=ACCURATE_DIGITS, msg=None, delta=None)
 43 | 
 44 |     def test_csv_io(self):
 45 |         a = tm.TransitionMatrix()
 46 |         a.to_csv("test.csv")
 47 |         b = tm.TransitionMatrix(csv_file="test.csv")
 48 |         self.assertAlmostEqual(a[0, 0], b[0, 0], places=ACCURATE_DIGITS, msg=None, delta=None)
 49 |         self.assertAlmostEqual(a[0, 1], b[0, 1], places=ACCURATE_DIGITS, msg=None, delta=None)
 50 |         self.assertAlmostEqual(a[1, 0], b[1, 0], places=ACCURATE_DIGITS, msg=None, delta=None)
 51 |         self.assertAlmostEqual(a[1, 1], b[1, 1], places=ACCURATE_DIGITS, msg=None, delta=None)
 52 | 
 53 |     def test_json_io(self):
 54 |         a = tm.TransitionMatrix()
 55 |         a.to_json("test.json")
 56 |         b = tm.TransitionMatrix(json_file="test.json")
 57 |         self.assertAlmostEqual(a[0, 0], b[0, 0], places=ACCURATE_DIGITS, msg=None, delta=None)
 58 |         self.assertAlmostEqual(a[0, 1], b[0, 1], places=ACCURATE_DIGITS, msg=None, delta=None)
 59 |         self.assertAlmostEqual(a[1, 0], b[1, 0], places=ACCURATE_DIGITS, msg=None, delta=None)
 60 |         self.assertAlmostEqual(a[1, 1], b[1, 1], places=ACCURATE_DIGITS, msg=None, delta=None)
 61 | 
 62 |     def test_validation(self):
 63 |         a = tm.TransitionMatrix()
 64 |         self.assertEqual(a.validate(), True)
 65 |         b = tm.TransitionMatrix(values=[1.0, 3.0])
 66 |         self.assertEqual(b.validate()[0][0], 'Matrix Dimensions Differ: ')
 67 |         c = tm.TransitionMatrix(values=[[0.75, 0.25], [0.0, 0.9]])
 68 |         self.assertEqual(c.validate()[0][0], 'Rowsum not equal to one: ')
 69 |         d = tm.TransitionMatrix(values=[[0.75, 0.25], [-0.1, 1.1]])
 70 |         self.assertEqual(d.validate()[0][0], 'Negative Probabilities: ')
 71 | 
 72 |     def test_generator(self):
 73 |         a = tm.TransitionMatrix([[1.0, 3.0], [1.0, 4.0]])
 74 |         self.assertAlmostEqual(a[0, 0], expm(a.generator())[0, 0], places=ACCURATE_DIGITS, msg=None, delta=None)
 75 |         self.assertAlmostEqual(a[0, 1], expm(a.generator())[0, 1], places=ACCURATE_DIGITS, msg=None, delta=None)
 76 |         self.assertAlmostEqual(a[1, 0], expm(a.generator())[1, 0], places=ACCURATE_DIGITS, msg=None, delta=None)
 77 |         self.assertAlmostEqual(a[1, 1], expm(a.generator())[1, 1], places=ACCURATE_DIGITS, msg=None, delta=None)
 78 | 
 79 | 
 80 | class TestTransitionMatrixSet(unittest.TestCase):
 81 | 
 82 |     def test_instantiate_matrix_set(self):
 83 |         periods = 5
 84 |         a = tm.TransitionMatrixSet(dimension=2, periods=periods)
 85 |         self.assertEqual(a.temporal_type, 'Incremental')
 86 |         self.assertAlmostEqual(a.entries[0][0, 0], 1.0, places=ACCURATE_DIGITS, msg=None, delta=None)
 87 |         self.assertAlmostEqual(a.entries[periods-1][0, 0], 1.0, places=ACCURATE_DIGITS, msg=None, delta=None)
 88 |         pass
 89 | 
 90 |     def test_set_validation(self):
 91 |         a = tm.TransitionMatrixSet(dimension=2, periods=5)
 92 |         self.assertEqual(a.validate(), True)
 93 | 
 94 |     def test_set_cumulate_incremental(self):
 95 |         a = tm.TransitionMatrix(values=[[0.6, 0.2, 0.2], [0.2, 0.6, 0.2], [0.2, 0.2, 0.6]])
 96 |         a_set = tm.TransitionMatrixSet(values=a, periods=3, method='Copy', temporal_type='Incremental')
 97 |         b_set = a_set
 98 |         b_set.cumulate()
 99 |         b_set.incremental()
100 |         self.assertAlmostEqual(a_set.entries[2][0, 0], b_set.entries[2][0, 0], places=ACCURATE_DIGITS, msg=None, delta=None)
101 |         pass
102 | 
103 |     def test_set_csv_io(self):
104 |         pass
105 | 
106 |     def test_set_json_io(self):
107 |         pass
108 | 
109 | 
110 | if __name__ == "__main__":
111 | 
112 |     unittest.main()
113 | 
114 | 


--------------------------------------------------------------------------------
/transitionMatrix/estimators/simple_estimator.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # (c) 2017-2024 Open Risk, all rights reserved
  4 | #
  5 | # TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included
  6 | # in the source distribution of TransitionMatrix. This is notwithstanding any licenses of
  7 | # third-party software included in this distribution. You may not use this file except in
  8 | # compliance with the License.
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software distributed under
 11 | # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
 12 | # either express or implied. See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | from __future__ import print_function
 16 | import numpy as np
 17 | 
 18 | from transitionMatrix.estimators import BaseEstimator
 19 | import statsmodels.stats.proportion as st
 20 | 
 21 | 
 22 | class SimpleEstimator(BaseEstimator):
 23 |     """
 24 |     Class for implementing a simple estimator suitable for single period transitions
 25 | 
 26 |     This is useful for testing, getting a first feel about the transition landscape.
 27 | 
 28 |     """
 29 | 
 30 |     def __init__(self, states=None, ci=None):
 31 |         BaseEstimator.__init__(self)
 32 | 
 33 |         if states is not None:
 34 |             self.states = states
 35 |         if ci is not None:
 36 |             assert (ci['method'] in ['goodman', 'sison-glaz', 'binomial'])
 37 |             self.ci_method = ci['method']
 38 |             self.ci_alpha = ci['alpha']
 39 | 
 40 |     def fit(self, data):
 41 |         """
 42 |         Parameters
 43 |         ----------
 44 |         data : array-like
 45 |             The data to use for the estimation
 46 | 
 47 |         Returns
 48 |         -------
 49 |         matrix : estimated transition matrix
 50 |         confint_lower: lower confidence interval
 51 |         confint_upper: upper confidence interval
 52 | 
 53 |         Notes
 54 |         ------
 55 | 
 56 |         * loop over data rows
 57 |         * expected format is (id, state_in, state_out)
 58 |         * calculate population count N^i_k per state i
 59 |         * calculate migrations count N^{ij}_{kl} from i to j
 60 |         * calculate transition matrix as ratio T^{ij}_{kl} = N^{ij}_{kl} / N^i_k
 61 | 
 62 |         """
 63 | 
 64 |         # In the simple estimator all events are part of the same cohort
 65 |         state_count = self.states.cardinality
 66 |         state_list = self.states.get_states()
 67 | 
 68 |         # create storage for counts and transitions
 69 |         tm_count = np.ndarray(state_count)
 70 |         tmn_count = np.ndarray((state_count, state_count))
 71 |         tm_count.fill(0.0)
 72 |         tmn_count.fill(0.0)
 73 | 
 74 |         i = 0
 75 |         for row in data.itertuples(index=False):
 76 |             # state_in = state_list.index(row[2])
 77 |             # state_out = state_list.index(row[3])
 78 |             state_in = row[2]
 79 |             state_out = row[3]
 80 |             tm_count[state_in] += 1
 81 |             tmn_count[state_in, state_out] += 1
 82 |             i += 1
 83 | 
 84 |         self.counts = int(tm_count.sum())
 85 | 
 86 |         if self.ci_method:
 87 |             '''Confidence intervals for multinomial proportions. See the statsmodels URL
 88 |             http://www.statsmodels.org/devel/_modules/statsmodels/stats/proportion.html
 89 |     
 90 |             Parameters
 91 |             ----------
 92 |             counts : array_like of int, 1-D
 93 |                 Number of observations in each category.
 94 |             alpha : float in (0, 1), optional
 95 |                 Significance level, defaults to 0.05.
 96 |             method : {'goodman', 'sison-glaz'}, optional
 97 |                 Method to use to compute the confidence intervals; available methods
 98 |                 are:
 99 |     
100 |                  - `goodman`: based on a chi-squared approximation, valid if all
101 |                    values in `counts` are greater or equal to 5 [2]_
102 |                  - `sison-glaz`: less conservative than `goodman`, but only valid if
103 |                    `counts` has 7 or more categories (``len(counts) >= 7``) [3]_
104 |     
105 |             Returns
106 |             -------
107 |             confint : ndarray, 2-D
108 |                 Array of [lower, upper] confidence levels for each category, such that
109 |                 overall coverage is (approximately) `1-alpha`.
110 |             '''
111 | 
112 |             confint_lower = np.ndarray((state_count, state_count, 1))
113 |             confint_upper = np.ndarray((state_count, state_count, 1))
114 |             for s1 in range(state_count):
115 |                 intervals = st.multinomial_proportions_confint(tmn_count[s1, :], alpha=self.ci_alpha, method=self.ci_method)
116 |                 for s2 in range(state_count):
117 |                     confint_lower[s1, s2, 0] = intervals[s2][0]
118 |                     confint_upper[s1, s2, 0] = intervals[s2][1]
119 |             self.confint_lower = confint_lower
120 |             self.confint_upper = confint_upper
121 | 
122 |         # Normalization of counts to produce family of probability matrices
123 |         for s1 in range(state_count):
124 |             for s2 in range(state_count):
125 |                 if tm_count[s1] > 0:
126 |                     tmn_count[(s1, s2)] = tmn_count[(s1, s2)] / tm_count[s1]
127 | 
128 |         # We store and return the matrix in matrix set (but there is only one instance)
129 |         self.matrix_set.append(tmn_count)
130 | 
131 |         return self.matrix_set
132 | 


--------------------------------------------------------------------------------
/examples/python/empirical_transition_matrix.py:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | 
  3 | # (c) 2017-2024 Open Risk, all rights reserved
  4 | #
  5 | # TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included
  6 | # in the source distribution of TransitionMatrix. This is notwithstanding any licenses of
  7 | # third-party software included in this distribution. You may not use this file except in
  8 | # compliance with the License.
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software distributed under
 11 | # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
 12 | # either express or implied. See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | 
 16 | """
 17 | Example workflows using transitionMatrix to estimate an empirical transition matrix from duration type data. The datasets are produced in examples/generate_synthetic_data.py
 18 | 
 19 | """
 20 | import matplotlib.pyplot as plt
 21 | import numpy as np
 22 | import pandas as pd
 23 | 
 24 | import transitionMatrix as tm
 25 | from transitionMatrix import source_path
 26 | from transitionMatrix.estimators import aalen_johansen_estimator as aj
 27 | from transitionMatrix.utils.converters import datetime_to_float
 28 | 
 29 | dataset_path = source_path + "datasets/"
 30 | 
 31 | # Example 1: Credit Rating Migration example
 32 | # Example 2: Simple 2x2 Matrix for testing
 33 | # Example 3: Credit Rating Migration example with timestamps in raw date format
 34 | 
 35 | example = 3
 36 | 
 37 | # Step 1
 38 | # Load the data set into a pandas frame
 39 | # Make sure state is read as a string and not as integer
 40 | # Second synthetic data example:
 41 | # n entities with ~10 observations each, [0,1] state, 50%/50% transition matrix
 42 | print("> Step 1: Load the data set into a pandas frame")
 43 | if example == 1:
 44 |     data = pd.read_csv(dataset_path + 'synthetic_data7.csv', dtype={'State': str})
 45 | elif example == 2:
 46 |     data = pd.read_csv(dataset_path + 'synthetic_data8.csv', dtype={'State': str})
 47 | elif example == 3:
 48 |     data = pd.read_csv(dataset_path + 'synthetic_data9.csv', parse_dates=True)
 49 |     # convert datetime data to floats, return also the observation window data
 50 |     bounds, data = datetime_to_float(data)
 51 |     print('Start and End dates', bounds)
 52 | 
 53 | sorted_data = data.sort_values(['Time', 'ID'], ascending=[True, True])
 54 | print(sorted_data.head(5))
 55 | print(sorted_data.describe())
 56 | 
 57 | # Step 2
 58 | # Describe and validate the State Space against the data
 59 | print("> Step 2: Describe and validate the State Space against the data")
 60 | # We insert the expected labels of the state space
 61 | if example == 1 or example == 3:
 62 |     definition = [('0', "AAA"), ('1', "AA"), ('2', "A"), ('3', "BBB"),
 63 |                   ('4', "BB"), ('5', "B"), ('6', "CCC"), ('7', "D")]
 64 | elif example == 2:
 65 |     definition = [('0', "G"), ('1', "B")]
 66 | myState = tm.StateSpace(definition)
 67 | myState.describe()
 68 | # We validate that indeed the data set conforms to our expectations
 69 | labels = {'State': 'From'}
 70 | print(myState.validate_dataset(dataset=sorted_data, labels=labels))
 71 | labels = {'State': 'To'}
 72 | print(myState.validate_dataset(dataset=sorted_data, labels=labels))
 73 | 
 74 | # Step 3
 75 | # Estimate matrices using the Aalen-Johansen estimator
 76 | print("> Step 3: Estimate matrices using the Aalen-Johansen estimator")
 77 | myEstimator = aj.AalenJohansenEstimator(states=myState)
 78 | # labels = {'Timestamp': 'Time', 'From_State': 'From', 'To_State': 'To', 'ID': 'ID'}
 79 | labels = {'Time': 'Time', 'From': 'From', 'To': 'To', 'ID': 'ID'}
 80 | etm, times = myEstimator.fit(sorted_data, labels=labels)
 81 | 
 82 | # Step 4
 83 | # Print the cumulative computed matrix
 84 | print("> Step 4: Print the cumulative computed matrix")
 85 | print(etm[:, :, -1])
 86 | 
 87 | # Step 5
 88 | # Create a visualization of the transition rates
 89 | if example == 1 or example == 3:
 90 |     # Now lets plot a collection of curves for all ratings
 91 |     print("> Plot the transition curves")
 92 | 
 93 |     Periods = 10
 94 |     Ratings = 8
 95 | 
 96 |     m = 4
 97 |     n = 2
 98 |     f, axarr = plt.subplots(m, n)
 99 |     f.subplots_adjust(left=0.05, bottom=0.05, right=0.95, top=0.90, wspace=0.0, hspace=0.1)
100 |     # plt.style.use(['ggplot'])
101 | 
102 |     for ri in range(0, Ratings):
103 |         axj = int(ri / 2)
104 |         axi = ri % 2
105 |         print(ri, axj, axi)
106 |         curves = []
107 |         for rf in range(0, Ratings):
108 |             cPD = etm[ri, rf, :]
109 |             curves.append(cPD)
110 |             # axarr[axj, axi].set_aspect(5)
111 |             axarr[axj, axi].set_ylabel('State ' + str(ri), fontsize=12)
112 |             axarr[axj, axi].set_xlabel("Time")
113 |             axarr[axj, axi].plot(times[1:], curves[rf], label="RI=%d" % (rf,))
114 |             # axarr[axj, axi].set_xticks(range(10), minor=False)
115 |             axarr[axj, axi].set_yticks(np.linspace(0, 1, 5), minor=False)
116 |             # axarr[axj, axi].yaxis.grid(True, which='minor')
117 |             axarr[axj, axi].margins(y=0.05, x=0.05)
118 |             axarr[axj, axi].autoscale()
119 |             axarr[axj, axi].grid(True)
120 | 
121 |     # plt.tight_layout()
122 |     f.suptitle("Multi-period Transition Probabilities", fontsize=12)
123 |     # plt.title("Multi-period Transition Probabilities")
124 |     plt.savefig("transition_probabilities.png")
125 |     plt.show()
126 | 
127 | 
128 | def main():
129 |     print("Done")
130 | 
131 | 
132 | if __name__ == "__main__":
133 |     main()
134 | 


--------------------------------------------------------------------------------
/transitionMatrix/estimators/__init__.py:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | 
  3 | # (c) 2017-2024 Open Risk, all rights reserved
  4 | #
  5 | # TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included
  6 | # in the source distribution of TransitionMatrix. This is notwithstanding any licenses of
  7 | # third-party software included in this distribution. You may not use this file except in
  8 | # compliance with the License.
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software distributed under
 11 | # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
 12 | # either express or implied. See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | from __future__ import print_function
 16 | 
 17 | 
 18 | class BaseEstimator(object):
 19 | 
 20 |     """ Base class for implementing any transition matrix estimator
 21 | 
 22 |     Offers basic methods common to all estimators
 23 | 
 24 |     """
 25 | 
 26 |     def __init__(self):
 27 |         self.states = None
 28 |         self.matrix_set = []
 29 |         self.count_set = []
 30 |         self.count_normalization = []
 31 |         self.average_matrix = []
 32 |         self.ci_alpha = None
 33 |         self.ci_method = None
 34 |         self.confint_lower = None
 35 |         self.confint_upper = None
 36 |         self.counts = None
 37 |         self.nans = None
 38 | 
 39 |     def get_matrix_set(self):
 40 |         return self.matrix_set
 41 | 
 42 |     def print(self, select='Frequencies', period=None):
 43 |         """
 44 |         Pretty print the estimated transition matrices
 45 |         :return:
 46 |         """
 47 |         if select == 'Counts':
 48 |             if period is not None:
 49 |                 print("Period: ", period)
 50 |                 print("Starting Count: ")
 51 |                 print(self.count_normalization[period])
 52 |                 print("Migration Counts: ")
 53 |                 print(self.count_set[period][:, :])
 54 |             else:
 55 |                 for k in range(len(self.count_set)):
 56 |                     print("Period: ", k)
 57 |                     print("Starting Count: ")
 58 |                     print(self.count_normalization[k])
 59 |                     print("Migration Counts: ")
 60 |                     print(self.count_set[k][:, :])
 61 |         elif select == 'Frequencies':
 62 |             if period is not None:
 63 |                 print("Period: ", period)
 64 |                 print(self.matrix_set[period][:, :])
 65 |             else:
 66 |                 for k in range(len(self.matrix_set)):
 67 |                     print("Period: ", k)
 68 |                     print(self.matrix_set[k][:, :])
 69 | 
 70 |         return
 71 | 
 72 |     def summary(self, k=0):
 73 |         """
 74 |         Pretty-print a summary of estimation results (values and confidence intervals)
 75 |         """
 76 |         if self.ci_method:
 77 |             state_count = self.states.cardinality
 78 |             print('                      Transition Matrix Estimation Results                    ')
 79 |             print('==============================================================================')
 80 |             print('Confidence Level: ', self.ci_alpha)
 81 |             print('Confidence Level Method: ', self.ci_method)
 82 |             print('------------------------------------------------------------------------------')
 83 |             print('Row  Col  Lower Bound      Value   Upper Bound')
 84 |             for s1 in range(state_count):
 85 |                 for s2 in range(state_count):
 86 |                     lv = self.confint_lower[s1, s2, k]
 87 |                     rv = self.confint_upper[s1, s2, k]
 88 |                     cv = self.matrix_set[k][s1, s2]
 89 |                     print('{0:3} {1:4} {2:12f} {3:10f} {4:12f}'.format(s1, s2, lv, cv, rv))
 90 |                 print('..............................................................................')
 91 |             print('==============================================================================')
 92 |         else:
 93 |             state_count = self.states.cardinality
 94 |             print('                      Transition Matrix Estimation Results                    ')
 95 |             print('==============================================================================')
 96 |             print('Row  Col  Value')
 97 |             for s1 in range(state_count):
 98 |                 for s2 in range(state_count):
 99 |                     cv = self.matrix_set[k][s1, s2]
100 |                     print('{0:3} {1:4} {2:10f}'.format(s1, s2, cv))
101 |                 print('..............................................................................')
102 |             print('==============================================================================')
103 |         return
104 | 
105 | 
106 | class DurationEstimator(BaseEstimator):
107 | 
108 |     """ Base class for implementing any duration based transition matrix estimator
109 | 
110 |     Offers methods common to all duration based estimators
111 |     Two subclasses:
112 | 
113 |     * Time homogeneous estimator (constant transition rates)
114 |     * Time inhomogeneous estimator (variable transition probabilities) Aalen-Johansen
115 | 
116 |     T(s, t) = T(0, t)  (transition from start=0)
117 |     Compute transition_times(k) T^ij(t) numpy(i,j,k)
118 | 
119 |     Transitions at cohort intervals
120 |     Approximate numpy(i,j, k_index : largest k-value that is less than t(boundary))
121 | 
122 |     """
123 | 
124 |     def __init__(self, cohort_intervals=None, states=None):
125 |         BaseEstimator.__init__(self)
126 |         self.cohort_intervals = cohort_intervals
127 |         if states is not None:
128 |             self.states = states
129 |         self.timepoint_count = None
130 | 


--------------------------------------------------------------------------------
/CHANGELOG.rst:
--------------------------------------------------------------------------------
  1 | ChangeLog
  2 | ===========================
  3 | 
  4 | PLEASE NOTE THAT THE API OF TRANSITION MATRIX IS STILL UNSTABLE AS MORE USE CASES / FEATURES ARE ADDED REGULARLY
  5 | 
  6 | v0.5.2 (XX-12-2024)
  7 | --------------------
  8 | * Documentation: Streamlining visualization workflows (issue #12)
  9 | 
 10 | v0.5.1 (29-09-2023)
 11 | --------------------
 12 | * Installation:
 13 |     * Bump python dependency to 3.10
 14 | 
 15 | v0.5.0 (21-02-2022)
 16 | -------------------
 17 | * Installation:
 18 |     * Bump python dependency to 3.7
 19 |     * PyPI release update
 20 | 
 21 | v0.4.9 (04-05-2021)
 22 | -------------------
 23 | 
 24 | * Refactoring: All non-core functionality moved to separate directories/sub-packages
 25 |     * credit curve stuff moved to credit ratings modules
 26 |     * data generators moved to generators modules
 27 |     * etc.
 28 | * Documentation: Major expansion (Still incomplete)
 29 |     * Expanded Data Formats
 30 |     * Rating Scales, CQS etc
 31 |     * Listing all datasets and examples
 32 | * Testing / Training: An interesting use case raised as issue #20
 33 |     * Added an end-to-end example of estimating a credit rating matrix from raw data
 34 |     * Includes various data preprocessing examples
 35 | * Datasets:
 36 |     * rating_data.csv (cleaned up credit data)
 37 |     * synthetic_data10.csv Credit Rating Migrations in Long Format / Compact Form (for testing)
 38 |     * deterministic generator (replicate given trajectories)
 39 | * Tests:
 40 |     * test_roundtrip.py testing via roundtriping methods
 41 | 
 42 | 
 43 | v0.4.8 (07-02-2021)
 44 | -------------------
 45 | 
 46 | * Documentation: Pulled all rst files in docs
 47 | * Refactoring: credit rating data moved into separate module
 48 | 
 49 | 
 50 | v0.4.7 (29-09-2020)
 51 | -------------------
 52 | 
 53 | * Documentation: Expanded and updated description of classes
 54 | * Documentation: Including Open Risk Academy code examples
 55 | * Feature: logarithmic sankey visualization
 56 | 
 57 | v0.4.6 (22-05-2019)
 58 | -------------------
 59 | 
 60 | * Feature: Update of CQS Mappings, addition of new rating scales
 61 | * Documentation: Documentation of rating scale structure and mappings
 62 | * Training: Example of mapping portfolio data to CQS
 63 | 
 64 | v0.4.5 (21-04-2019)
 65 | -------------------
 66 | 
 67 | * Training: Monthly_from_Annual.ipynb (a Jupyter notebook illustrating how to obtain interpolate transition rates on monthly intervals)
 68 | * Datasets: generic_monthly.json
 69 | * Feature: print_matrix function for generic matrix pretty printing
 70 | * Feature: matrix_exponent function for obtaining arbitrary integral matrices from a given generator
 71 | 
 72 | v0.4.4 (03-04-2019)
 73 | -------------------
 74 | 
 75 | * Documentation: Cleanup of docs following separation of threshold / portfolio models
 76 | * Datasets: generic_multiperiod.json
 77 | * Feature: CreditCurve class for holding credit curves
 78 | 
 79 | 
 80 | v0.4.3 (29-03-2019)
 81 | -------------------
 82 | 
 83 | * Refactoring: Significant rearrangement of code (the threshold models package moved to portfolioAnalytics for more consistent structure of the code base / functionality)
 84 | 
 85 | v0.4.2 (29-01-2019)
 86 | -------------------
 87 | 
 88 | * Feature: converter function in transitionMatrix.utils.converters to convert long form dataframes into canonical float form
 89 | * Datasets: synthetic_data9.csv (datetime in string format)
 90 | * Training: new data generator in examples/generate_synthetic_data.py to generate long format with string dates
 91 | * Training: Additional example (=3) in examples/empirical_transition_matrix.py to process long format with string dates
 92 | * Documentation: More detailed explanation of Long Data Formats with links to Open Risk Manual
 93 | * Documentation: Enabled sphinx.ext.autosectionlabel for easy internal links / removed duplicate labels
 94 | 
 95 | v0.4.1 (31-10-2018)
 96 | -------------------
 97 | 
 98 | * Feature: Added functionality for conditioning multi-period transition matrices
 99 | * Training: Example calculation and visualization of conditional matrices
100 | * Datasets: State space description and CGS mappings for top-6 credit rating agencies
101 | 
102 | 
103 | v0.4.0 (23-10-2018)
104 | -------------------
105 | 
106 | * Installation: First PyPI and wheel installation options
107 | * Feature: Added Aalen-Johansen Duration Estimator
108 | * Documentation: Major overhaul of documentation, now targeting ReadTheDocs distribution
109 | * Training: Streamlining of all examples
110 | * Datasets: Synthetic Datasets in long format
111 | 
112 | v0.3.1 (21-09-2018)
113 | -------------------
114 | 
115 | * Feature: Expanded functionality to compute and visualize credit curves
116 | 
117 | v0.3 (27-08-2018)
118 | -------------------
119 | 
120 | * Feature: Addition of portfolio models (formerly portfolio_analytics_library) for data generation and testing
121 | * Training: Added examples in jupyter notebook format
122 | 
123 | v0.2 (05-06-2018)
124 | -------------------
125 | 
126 | * Feature: Addition of threshold generation algorithms
127 | 
128 | v0.1.3 (04-05-2018)
129 | -------------------
130 | 
131 | * Documentation: Sphinx based documentation
132 | * Training: Additional visualization examples
133 | 
134 | v0.1.2 (05-12-2017)
135 | -------------------
136 | 
137 | * Refactoring: Dataset paths
138 | * Bugfix: Correcting requirement dependencies (missing matplotlib)
139 | * Documentation: More detailed instructions
140 | 
141 | v0.1.1 (03-12-2017)
142 | -------------------
143 | 
144 | * Feature: TransitionMatrix model: new methods to merge States, fix problematic probability matrices, I/O API's
145 | * Feature: TransitionMatrixSet mode: json and csv readers, methods for set-wise manipulations
146 | * Datasets: Additional multiperiod datasets (Standard and Poors historical corporate rating transition rates)
147 | * Feature: Enhanced matrix comparison functionality
148 | * Training: Three additional example workflows
149 |     * fixing multiperiod matrices (completing State Space)
150 |     * adjusting matrices for withdrawn entries
151 |     * generating full  multi-period sets from limited observations
152 | 
153 | v0.1.0 (11-11-2017)
154 | -------------------
155 | 
156 | * First public release of the package


--------------------------------------------------------------------------------
/examples/python/matrix_from_duration_data.py:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | 
  3 | # (c) 2017-2024 Open Risk, all rights reserved
  4 | #
  5 | # TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included
  6 | # in the source distribution of TransitionMatrix. This is notwithstanding any licenses of
  7 | # third-party software included in this distribution. You may not use this file except in
  8 | # compliance with the License.
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software distributed under
 11 | # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
 12 | # either express or implied. See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | 
 16 | """
 17 | Example workflows using transitionMatrix to estimate a matrix from duration type data
 18 | The datasets are produced in examples/generate_synthetic_data.py
 19 | 
 20 | """
 21 | 
 22 | import pandas as pd
 23 | 
 24 | import transitionMatrix as tm
 25 | from transitionMatrix import source_path
 26 | from transitionMatrix.estimators import cohort_estimator as es
 27 | from transitionMatrix.utils.converters import datetime_to_float
 28 | 
 29 | dataset_path = source_path + "datasets/"
 30 | 
 31 | # Select the example to run
 32 | # 1-> An example with limited data (dataset contains only one entity)
 33 | # 2-> A full example with a 2x2 matrix
 34 | # 3-> A full example with a 8x8 matrix
 35 | 
 36 | example = 1
 37 | 
 38 | if example == 1:
 39 | 
 40 |     # An example with limited data (dataset contains only one entity)
 41 |     data = pd.read_csv(dataset_path + 'synthetic_data1.csv', dtype={'State': str})
 42 |     sorted_data = data.sort_values(['ID', 'Time'], ascending=[True, True])
 43 |     myState = tm.StateSpace([('0', "A"), ('1', "B"), ('2', "C"), ('3', "D")])
 44 |     print("> Validate data set")
 45 |     print(myState.validate_dataset(dataset=sorted_data))
 46 |     # Bin the data into 5 intervals
 47 |     cohort_data, cohort_intervals = tm.utils.bin_timestamps(data, cohorts=5)
 48 |     print("> Cohort intervals: ", cohort_intervals)
 49 |     print(80 * '=')
 50 |     print("> Cohort data")
 51 |     print(cohort_data)
 52 |     myEstimator = es.CohortEstimator(states=myState, ci={'method': 'goodman', 'alpha': 0.05})
 53 |     labels = {'Time': 'Cohort', 'State': 'State', 'ID': 'ID'}
 54 |     print(80 * '=')
 55 |     result = myEstimator.fit(cohort_data, labels=labels)
 56 |     print(80 * '=')
 57 |     print("> Display results")
 58 |     myMatrixSet = tm.TransitionMatrixSet(values=result, temporal_type='Incremental')
 59 |     print(myMatrixSet.temporal_type)
 60 |     myMatrixSet.print_matrix()
 61 | 
 62 | 
 63 | elif example == 2:
 64 | 
 65 |     # Step 1
 66 |     # Load the data set into a pandas frame
 67 |     # Make sure state is read as a string and not as integer
 68 |     # Second synthetic data example:
 69 |     # n entities with ~10 observations each, [0,1] state, 50%/50% transition matrix
 70 |     print("> Step 1: Load the data")
 71 |     data = pd.read_csv(dataset_path + 'synthetic_data2.csv', dtype={'State': str})
 72 |     sorted_data = data.sort_values(['ID', 'Time'], ascending=[True, True])
 73 |     print(sorted_data.describe())
 74 | 
 75 |     # Step 2
 76 |     # Describe and validate the State Space against the data
 77 |     print("> Step 2: Validate against state space")
 78 |     myState = tm.StateSpace([('0', "Basic"), ('1', "Default")])
 79 |     myState.describe()
 80 |     print(myState.validate_dataset(dataset=sorted_data))
 81 | 
 82 |     # Step 3
 83 |     # Arrange the data in period cohorts
 84 |     print("> Step 3: Arrange the data in period cohorts")
 85 |     cohort_data, cohort_intervals = tm.utils.bin_timestamps(data, cohorts=5)
 86 | 
 87 |     # Step 4
 88 |     # Estimate matrices using method of choice
 89 |     # compute confidence interval using goodman method at 95% confidence level
 90 |     print("> Step 4: Estimate matrices")
 91 |     myEstimator = es.CohortEstimator(states=myState, ci={'method': 'goodman', 'alpha': 0.05})
 92 |     labels = {'Timestamp': 'Cohort', 'State': 'State', 'ID': 'ID'}
 93 |     result = myEstimator.fit(cohort_data, labels=labels)
 94 | 
 95 |     # Step 5
 96 |     # Print out the set of estimated matrices
 97 |     print("> Step 5: Display results")
 98 |     myMatrixSet = tm.TransitionMatrixSet(values=result, temporal_type='Incremental')
 99 |     print(myMatrixSet.temporal_type)
100 |     myMatrixSet.print_matrix()
101 | 
102 | 
103 | elif example == 3:
104 | 
105 |     data = pd.read_csv(dataset_path + 'synthetic_data3.csv', dtype={'State': str})
106 |     sorted_data = data.sort_values(['ID', 'Time'], ascending=[True, True])
107 |     myState = tm.StateSpace([('0', "A"), ('1', "B"), ('2', "C"), ('3', "D"), ('4', "E"), ('5', "F"), ('6', "G")])
108 |     print(myState.validate_dataset(dataset=sorted_data))
109 |     cohort_data, cohort_intervals = tm.utils.bin_timestamps(data, cohorts=5)
110 |     myEstimator = es.CohortEstimator(states=myState, ci={'method': 'goodman', 'alpha': 0.05})
111 |     labels = {'Time': 'Cohort', 'State': 'State', 'ID': 'ID'}
112 |     result = myEstimator.fit(cohort_data, labels=labels)
113 |     myMatrixSet = tm.TransitionMatrixSet(values=result, temporal_type='Incremental')
114 |     myMatrixSet.print_matrix()
115 | 
116 | elif example == 4:
117 | 
118 |     data = pd.read_csv(dataset_path + 'synthetic_data10.csv', dtype={'State': str})
119 |     sorted_data = data.sort_values(['ID', 'Time'], ascending=[True, True])
120 |     myState = tm.StateSpace(transition_data=sorted_data)
121 |     myState.describe()
122 |     print(myState.validate_dataset(dataset=sorted_data))
123 |     [start_date, end_date, total_days], data = datetime_to_float(sorted_data)
124 |     print(data.head())
125 |     cohort_data, cohort_intervals = tm.utils.bin_timestamps(data, cohorts=6)
126 |     myEstimator = es.CohortEstimator(states=myState, ci={'method': 'goodman', 'alpha': 0.05})
127 |     print(cohort_data.head())
128 |     result = myEstimator.fit(cohort_data, labels={'Time': 'Cohort', 'State': 'State', 'ID': 'ID'})
129 |     myMatrix = tm.TransitionMatrix(myEstimator.average_matrix)
130 |     myMatrix.print_matrix(accuracy=3)
131 | 
132 | 
133 | def main():
134 |     print("Done")
135 | 
136 | 
137 | if __name__ == "__main__":
138 |     main()
139 | 


--------------------------------------------------------------------------------
/examples/python/data_cleaning_example.py:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | 
  3 | # (c) 2017-2024 Open Risk (https://www.openriskmanagement.com)
  4 | #
  5 | # TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included
  6 | # in the source distribution of TransitionMatrix. This is notwithstanding any licenses of
  7 | # third-party software included in this distribution. You may not use this file except in
  8 | # compliance with the License.
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software distributed under
 11 | # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
 12 | # either express or implied. See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import pprint as pp
 16 | 
 17 | import pandas as pd
 18 | 
 19 | from transitionMatrix.utils.converters import frame_to_array, datetime_to_float
 20 | from transitionMatrix.utils.preprocessing import transitions_summary, validate_absorbing_state
 21 | 
 22 | """ Examples of using transitionMatrix to prepare data sets (data cleansing). The functionality is primarily based on pandas, with transition data specific procedures supported by the utils sub-package. For some operations (and large datasets) it might be advisable to work with numpy arrays
 23 | 
 24 | """
 25 | 
 26 | # Load the raw data into a pandas frame
 27 | raw_data = pd.read_csv('../../datasets/rating_data_raw.csv')
 28 | 
 29 | # Print a generic summary based on pandas describe() method
 30 | print(raw_data.describe())
 31 | 
 32 | # Bring the column names to a standard convention
 33 | raw_data.rename(columns={"RatingNum": "State", "Date": "Time", "CustomerId": "ID"}, inplace=True)
 34 | 
 35 | print(raw_data.head())
 36 | 
 37 | # Print a summary of transition statistics
 38 | pp.pprint(transitions_summary(raw_data))
 39 | 
 40 | # Drop redundant column
 41 | raw_data = raw_data.drop(columns=['Rating'])
 42 | 
 43 | # Move the NR column to the end
 44 | reorder_dict = {
 45 |     0: 8,
 46 |     1: 0,
 47 |     2: 1,
 48 |     3: 2,
 49 |     4: 3,
 50 |     5: 4,
 51 |     6: 5,
 52 |     7: 6,
 53 |     8: 7
 54 | }
 55 | raw_data = raw_data.replace({"State": reorder_dict})
 56 | 
 57 | print(raw_data.head(10))
 58 | 
 59 | # Convert date strings to floats
 60 | [start_date, end_date, total_days], converted_data = datetime_to_float(raw_data, time_column='Time')
 61 | print([start_date, end_date, total_days])
 62 | 
 63 | # NB: In the below the D = 7, NR = 8 special states are hardwired
 64 | 
 65 | # remove an initial observation for an entity if it is classified as D
 66 | # Reason: an initial defaulted observation is unusual / non-sensical
 67 | rows = []
 68 | entity_id, event_time, entity_state = frame_to_array(converted_data)
 69 | for i in range(len(entity_id)):
 70 |     if entity_id[i - 1] != entity_id[i] and entity_state[i] == 7:
 71 |         pass
 72 |     else:
 73 |         rows.append((entity_id[i], event_time[i], entity_state[i]))
 74 | clean_data0 = pd.DataFrame(rows, columns=['ID', 'Time', 'State'])
 75 | 
 76 | # remove an initial observation for an entity if it is classified as NR
 77 | # Reason: left truncation of observations must be handled consistently
 78 | 
 79 | rows = []
 80 | entity_id, event_time, entity_state = frame_to_array(clean_data0)
 81 | for i in range(len(entity_id)):
 82 |     if entity_id[i - 1] != entity_id[i] and entity_state[i] == 8:
 83 |         pass
 84 |     else:
 85 |         rows.append((entity_id[i], event_time[i], entity_state[i]))
 86 | clean_data1 = pd.DataFrame(rows, columns=['ID', 'Time', 'State'])
 87 | 
 88 | 
 89 | # remove an intermediate observation for an entity if it is classified as NR
 90 | # Reason: it is non-informative and it complicates the handling of NR state (non-absorbing)
 91 | rows = []
 92 | entity_id, event_time, entity_state = frame_to_array(clean_data1)
 93 | for i in range(len(entity_id) - 1):
 94 |     if entity_id[i + 1] == entity_id[i] and entity_state[i] == 8 and entity_state[i + 1] != 8:
 95 |         pass
 96 |     else:
 97 |         rows.append((entity_id[i], event_time[i], entity_state[i]))
 98 | clean_data2 = pd.DataFrame(rows, columns=['ID', 'Time', 'State'])
 99 | 
100 | # remove an intermediate observation for an entity if it is classified as D
101 | # Reason: this is (presumably) a 're-emergence from default' type event. complicates the handling of D state (non-absorbing)
102 | rows = []
103 | entity_id, event_time, entity_state = frame_to_array(clean_data2)
104 | for i in range(len(entity_id) - 1):
105 |     if entity_id[i + 1] == entity_id[i] and entity_state[i] == 7 and entity_state[i + 1] != 7:
106 |         pass
107 |     else:
108 |         rows.append((entity_id[i], event_time[i], entity_state[i]))
109 | clean_data3 = pd.DataFrame(rows, columns=['ID', 'Time', 'State'])
110 | 
111 | # remove NR observations of defaulted entities
112 | # Reason: non-informative, ensure D is truly an absorbing state
113 | # (NB: the labels 0, 8 are hardwired for this data set)
114 | rows = []
115 | entity_id, event_time, entity_state = frame_to_array(clean_data3)
116 | 
117 | for i in range(len(entity_id)):
118 |     if entity_state[i] == 8 and entity_state[i - 1] == 7:
119 |         pass
120 |     else:
121 |         rows.append((entity_id[i], event_time[i], entity_state[i]))
122 | clean_data4 = pd.DataFrame(rows, columns=['ID', 'Time', 'State'])
123 | 
124 | # check that NR and D are absorbing states
125 | print(validate_absorbing_state(clean_data4, 7))
126 | print(validate_absorbing_state(clean_data4, 8))
127 | 
128 | pp.pprint(transitions_summary(clean_data4))
129 | 
130 | # if the first entry is not at the earliest global observation timepoint, add the initial observation
131 | # this assumption removes left truncation condition but may bias the data
132 | # NB: 0.0 is hardwired as left observation window
133 | rows = []
134 | entity_id, event_time, entity_state = frame_to_array(clean_data4)
135 | for i in range(len(entity_id)):
136 |     if entity_id[i - 1] != entity_id[i] and event_time[i] > 0:
137 |         rows.append((entity_id[i], event_time[i], entity_state[i]))
138 |         rows.append((entity_id[i], 0.0, entity_state[i]))
139 |     else:
140 |         rows.append((entity_id[i], event_time[i], entity_state[i]))
141 | 
142 | clean_data = pd.DataFrame(rows, columns=['ID', 'Time', 'State'])
143 | 
144 | # Sort by entity ID, then event Time
145 | sorted_data = clean_data.sort_values(['ID', 'Time'], ascending=[True, True])
146 | 
147 | pp.pprint(transitions_summary(sorted_data))
148 | sorted_data.to_csv('../../datasets/rating_data.csv', index=False)
149 | 


--------------------------------------------------------------------------------
/examples/python/matrix_from_cohort_data.py:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | 
  3 | # (c) 2017-2024 Open Risk, all rights reserved
  4 | #
  5 | # TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included
  6 | # in the source distribution of TransitionMatrix. This is notwithstanding any licenses of
  7 | # third-party software included in this distribution. You may not use this file except in
  8 | # compliance with the License.
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software distributed under
 11 | # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
 12 | # either express or implied. See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | 
 16 | """
 17 | Example workflows using transitionMatrix to estimate a transition matrix from data in cohort format
 18 | 
 19 | """
 20 | 
 21 | import pprint as pp
 22 | 
 23 | import pandas as pd
 24 | 
 25 | import transitionMatrix as tm
 26 | from transitionMatrix import source_path
 27 | from transitionMatrix.creditratings.creditsystems import Generic_SS
 28 | from transitionMatrix.estimators import cohort_estimator as es
 29 | from transitionMatrix.utils.preprocessing import transitions_summary, unique_timestamps
 30 | 
 31 | dataset_path = source_path + "datasets/"
 32 | 
 33 | # Select the example to run
 34 | # 1-> S&P Style Credit Rating Migration Matrix
 35 | # 2-> An IFRS 9 Style 3x3 Migration Matrix
 36 | # 3-> The Simplest Absorbing Case (for validation)
 37 | 
 38 | example = 3
 39 | 
 40 | if example == 3:
 41 |     # Example 3: S&P Style Credit Rating Migration Matrix
 42 | 
 43 |     # S&P Ratings State Space
 44 |     # definition = [('0', "AAA"), ('1', "AA"), ('2', "A"), ('3', "BBB"),
 45 |     #               ('4', "BB"), ('5', "B"), ('6', "CCC"), ('7', "D")]
 46 | 
 47 |     myState = Generic_SS
 48 | 
 49 |     print("> Describe state space")
 50 |     myState.describe()
 51 |     print("> List of states")
 52 |     print(80 * '-')
 53 |     print(myState.get_states())
 54 |     print("> List of state labels")
 55 |     print(80 * '-')
 56 |     print(myState.get_state_labels())
 57 | 
 58 |     print("> Load Dataset")
 59 |     data = pd.read_csv(dataset_path + 'synthetic_data4.csv', dtype={'State': str})
 60 | 
 61 |     print("> Transitions Summary")
 62 |     print(80 * '-')
 63 |     pp.pprint(transitions_summary(data))
 64 | 
 65 |     print("> Sort and Validate dataset")
 66 |     print(80 * '-')
 67 |     sorted_data = data.sort_values(['ID', 'Time'], ascending=[True, True])
 68 |     print(myState.validate_dataset(dataset=sorted_data))
 69 | 
 70 |     # compute confidence interval using goodman method at 95% confidence level
 71 |     print("> Cohort Estimator")
 72 |     print(80 * '-')
 73 |     cohort_bounds = unique_timestamps(sorted_data)
 74 |     myEstimator = es.CohortEstimator(states=myState, cohort_bounds=cohort_bounds,
 75 |                                      ci={'method': 'goodman', 'alpha': 0.05})
 76 |     result = myEstimator.fit(sorted_data)
 77 | 
 78 |     # Print confidence intervals
 79 |     print("> Compute confidence interval using goodman method at 95% confidence level")
 80 |     myEstimator.summary()
 81 | 
 82 |     # Print the estimated results
 83 |     myMatrixSet = tm.TransitionMatrixSet(values=result, temporal_type='Incremental')
 84 |     # print(myMatrixSet.temporal_type)
 85 |     print("> Print Estimated Matrix Set")
 86 |     myMatrixSet.print_matrix()
 87 | 
 88 | elif example == 2:
 89 |     # Example 2: IFRS 9 Style Migration Matrix
 90 |     # Format: discrete time grid (already arranged in cohorts)
 91 | 
 92 |     # Step 1
 93 |     # Load the data set into a pandas frame
 94 |     # Make sure state is read as a string and not as integer
 95 |     # Fifth synthetic data example: IFRS 9 Migration Matrix
 96 |     print(">>> Step 1: Data Loading")
 97 |     data = pd.read_csv(dataset_path + 'synthetic_data5.csv', dtype={'State': str})
 98 |     sorted_data = data.sort_values(['ID', 'Time'], ascending=[True, True])
 99 |     # Data is a pandas frame, all methods are available
100 |     print(sorted_data.describe())
101 | 
102 |     # Step 2
103 |     # Describe and validate the State Space against the data
104 |     # We create a mock IFRS 9 state space (three stage assets)
105 |     print(">>> Step 2: Diagnostics")
106 |     definition = [('0', "Stage 1"), ('1', "Stage 2"), ('2', "Stage 3")]
107 |     myState = tm.StateSpace(definition)
108 |     myState.describe()
109 |     print(myState.validate_dataset(dataset=sorted_data))
110 | 
111 |     # Step 3
112 |     # Estimate matrices using method of choice
113 |     # compute confidence interval using goodman method at 95% confidence level
114 |     print(">>> Step 3: Estimation")
115 |     cohort_bounds = unique_timestamps(sorted_data)
116 |     myEstimator = es.CohortEstimator(states=myState, cohort_bounds=cohort_bounds,
117 |                                      ci={'method': 'goodman', 'alpha': 0.05})
118 |     # myMatrix = matrix.CohortEstimator(states=myState)
119 |     result = myEstimator.fit(sorted_data)
120 |     myEstimator.summary()
121 | 
122 |     print(">>> Step 4: Average Matrix")
123 |     print(myEstimator.average_matrix)
124 | 
125 |     # Step 4
126 |     # Review full set of numerical results
127 |     print(">>> Step 5")
128 |     myMatrixSet = tm.TransitionMatrixSet(values=result, temporal_type='Incremental')
129 |     print(myMatrixSet.temporal_type)
130 |     myMatrixSet.print_matrix()
131 | 
132 | elif example == 1:
133 |     # Example 1: Simplest Absorbing Case for validation
134 |     data = pd.read_csv(dataset_path + 'synthetic_data6.csv', dtype={'State': str})
135 |     sorted_data = data.sort_values(['ID', 'Time'], ascending=[True, True])
136 |     myState = tm.StateSpace()
137 |     myState.generic(2)
138 |     print(80 * '-')
139 |     print('State Space Validation:')
140 |     print(myState.validate_dataset(dataset=sorted_data))
141 |     cohort_bounds = unique_timestamps(sorted_data)
142 |     myEstimator = es.CohortEstimator(states=myState, cohort_bounds=cohort_bounds,
143 |                                      ci={'method': 'goodman', 'alpha': 0.05})
144 |     result = myEstimator.fit(sorted_data)
145 |     myMatrixSet = tm.TransitionMatrixSet(values=result, temporal_type='Incremental')
146 |     print(80 * '-')
147 |     print('Sample Estimated Matrix (Count Format, All Cohorts:')
148 |     myEstimator.print(select='Counts')
149 |     print(80 * '-')
150 |     print('Sample Estimated Matrix (Frequency Format, Period 3):')
151 |     myEstimator.print(select='Frequencies', period=3)
152 | 
153 | 
154 | def main():
155 |     print("Done")
156 | 
157 | 
158 | if __name__ == "__main__":
159 |     main()
160 | 


--------------------------------------------------------------------------------
/docs/source/data_formats.rst:
--------------------------------------------------------------------------------
  1 | Input Data Formats
  2 | ===================
  3 | 
  4 | The transitionMatrix package supports a variety of input data formats for empirical (observation) data. Two key ones are described here in more detail. More background about data formats is available at the `Open Risk Manual Risk Data Category <https://www.openriskmanual.org/wiki/Category:Risk_Data>`_
  5 | 
  6 | 
  7 | Long Data Format
  8 | -------------------------------------------
  9 | 
 10 | Long Data Format is a tabular representation of time series data that records the states (measurements) of multiple entities. Its defining characteristic is that each table row contains data pertaining to one entity at one point in time.
 11 | 
 12 | Canonical Form of Long Data
 13 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 14 | 
 15 | The Long Data Format (also Narrow or Stacked) consists of Tuples, e.g. (Entity ID, Time, From State, To State) indicating the time T at which an entity with ID migrated from the (From State) -> to the (To State).
 16 | 
 17 | The *canonical form* used as input to duration based estimators uses normalized timestamps (from 0 to T_max, where T_max is the last timepoint) and looks as follows:
 18 | 
 19 |     +----+------+------+----+
 20 |     | ID | Time | From | To |
 21 |     +----+------+------+----+
 22 |     |  1 | 1.1  |   0  | 1  |
 23 |     +----+------+------+----+
 24 |     |  1 | 2.0  |   1  | 2  |
 25 |     +----+------+------+----+
 26 |     |  1 | 3.4  |   2  | 3  |
 27 |     +----+------+------+----+
 28 |     |  1 | 4.0  |   3  | 2  |
 29 |     +----+------+------+----+
 30 |     |  2 | 1.2  |   0  | 1  |
 31 |     +----+------+------+----+
 32 |     |  2 | 2.4  |   1  | 2  |
 33 |     +----+------+------+----+
 34 |     |  2 | 3.5  |   2  | 3  |
 35 |     +----+------+------+----+
 36 | 
 37 | The canonical form has the advantage of being unambiguous about the context where the transition occurs. The meaning of each row of data stands on its own and does not rely on the order (or even the presence) of other records. This facilitates, for example, the algorithmic processing of the data. On the flipside,  the format is less efficient in terms of storage (the state information occurs twice) compared to the compact format (See below).
 38 | 
 39 | The canonical format requires that the final state of all entities at the end of the observation window (Time F) is included (otherwise we have no indication about when the measurements stopped). Alternatively such information is provided as separate metadata (or implicitly, for example if measurements are understood to span a number of full annual periods).
 40 | 
 41 | .. note::
 42 | 
 43 |     Synthetic_data(7, 8, 9) in the :ref:`Datasets` collection are examples of data in long format and canonical form
 44 | 
 45 | String Dates
 46 | ~~~~~~~~~~~~~~~~
 47 | 
 48 | It is frequent that transition data (e.g. from financial applications) have timestamps in the form of a *date string*. For example:
 49 | 
 50 |     +----+-------------+------+----+
 51 |     | ID | Date String | From | To |
 52 |     +----+-------------+------+----+
 53 |     |  1 | 10-10-2010  | 0    | 1  |
 54 |     +----+-------------+------+----+
 55 |     |  1 | 10-11-2010  | 1    | 2  |
 56 |     +----+-------------+------+----+
 57 | 
 58 | String dates must be converted to a numerical representation before we can work with the transition data. transitionMatrix offers the :func:`transitionMatrix.utils.converters.datetime_to_float` function of :mod:`transitionMatrix.utils` subpackage can be used to convert data into the canonical form.
 59 | 
 60 | .. note::
 61 | 
 62 |     Synthetic_data9 and rating_data in the :ref:`Datasets` collection have observation times in string data form.
 63 | 
 64 | 
 65 | Compact Form of Long Format
 66 | -------------------------------------------
 67 | 
 68 | The format uses triples (ID, Time, State), indicating the time T at which an entity ID **Left** its previous state S (the state it migrates to is encoded in the next observation of the same entity). The convention can obviously be reversed to indicate the time of entering a new state (in which case we need some information to bound the start of the observation window).
 69 | 
 70 | The compact long format avoids the duplication of data of the canonical approach but requires the presence of other records to infer the realised sequence of events.
 71 | 
 72 | The format also requires that the final state of all entities at the end of the observation window (Time F) is included as the last record (otherwise we have no indication about when the measurements stopped). Alternatively such information is provided separately (or implicitly, e.g. if measurements are understood to span a number of full annual periods).
 73 | 
 74 | 
 75 |     +----+--------+-------+
 76 |     | ID | Time   | State |
 77 |     +----+--------+-------+
 78 |     |  1 |    1.1 |     0 |
 79 |     +----+--------+-------+
 80 |     |  1 |    2.0 |     1 |
 81 |     +----+--------+-------+
 82 |     |  1 |    3.4 |     2 |
 83 |     +----+--------+-------+
 84 |     |  1 |    4.0 |     3 |
 85 |     +----+--------+-------+
 86 |     |  1 |    F   |     2 |
 87 |     +----+--------+-------+
 88 |     |  2 |    1.2 |     0 |
 89 |     +----+--------+-------+
 90 |     |  2 |    2.4 |     1 |
 91 |     +----+--------+-------+
 92 |     |  2 |    3.5 |     2 |
 93 |     +----+--------+-------+
 94 |     |  2 |    F   | 3     |
 95 |     +----+--------+-------+
 96 | 
 97 | Wide Data Format
 98 | ------------------
 99 | 
100 | Wide Data Format is an alternative tabular representation of time series data that records the states (measurements) of multiple entities. Its defining characteristic is that each table row contains *all the data* pertaining to any one entity. The measurement times are not arbitrary but encoded in the column labels:
101 | 
102 |     +----+--------+-------+-------+
103 |     | ID |   2011 |  2012 |  2013 |
104 |     +----+--------+-------+-------+
105 |     | A1 |      1 |    0  |    1  |
106 |     +----+--------+-------+-------+
107 |     | A2 |      2 |    1  |    3  |
108 |     +----+--------+-------+-------+
109 |     | A3 |      0 |    1  |    2  |
110 |     +----+--------+-------+-------+
111 | 
112 | Conversion from wide to long formats can be handled using the `pandas wide_to_long method
113 | <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.wide_to_long.html>`_.
114 | 
115 | 
116 | (This method will be more integrated in the future)
117 | 
118 | 
119 | Other Formats
120 | -------------------------------------------
121 | 
122 | As mentioned, a design choice is that data ingestion of transitionMatrix is via a pandas dataframe so other formats can be handled with additional code by the user. If there is a format that you repeatedly encounter submit an issue with your desired format / transformation `suggestion <https://github.com/open-risk/transitionMatrix/issues>`_.


--------------------------------------------------------------------------------
/datasets/scenario_data.csv:
--------------------------------------------------------------------------------
  1 | ID,Time,State
  2 | 0,0,3
  3 | 1,0,3
  4 | 2,0,3
  5 | 3,0,3
  6 | 4,0,3
  7 | 5,0,3
  8 | 6,0,3
  9 | 7,0,3
 10 | 8,0,3
 11 | 9,0,3
 12 | 10,0,3
 13 | 11,0,3
 14 | 12,0,3
 15 | 13,0,3
 16 | 14,0,3
 17 | 15,0,3
 18 | 16,0,3
 19 | 17,0,3
 20 | 18,0,3
 21 | 19,0,3
 22 | 20,0,3
 23 | 21,0,3
 24 | 22,0,3
 25 | 23,0,3
 26 | 24,0,3
 27 | 25,0,3
 28 | 26,0,3
 29 | 27,0,3
 30 | 28,0,3
 31 | 29,0,3
 32 | 30,0,3
 33 | 31,0,3
 34 | 32,0,3
 35 | 33,0,3
 36 | 34,0,3
 37 | 35,0,3
 38 | 36,0,3
 39 | 37,0,3
 40 | 38,0,3
 41 | 39,0,3
 42 | 40,0,3
 43 | 41,0,3
 44 | 42,0,3
 45 | 43,0,3
 46 | 44,0,3
 47 | 45,0,3
 48 | 46,0,3
 49 | 47,0,3
 50 | 48,0,3
 51 | 49,0,3
 52 | 0,1,4
 53 | 1,1,4
 54 | 2,1,3
 55 | 3,1,4
 56 | 4,1,3
 57 | 5,1,3
 58 | 6,1,3
 59 | 7,1,3
 60 | 8,1,3
 61 | 9,1,3
 62 | 10,1,3
 63 | 11,1,3
 64 | 12,1,5
 65 | 13,1,4
 66 | 14,1,3
 67 | 15,1,3
 68 | 16,1,7
 69 | 17,1,3
 70 | 18,1,3
 71 | 19,1,3
 72 | 20,1,4
 73 | 21,1,3
 74 | 22,1,5
 75 | 23,1,3
 76 | 24,1,3
 77 | 25,1,4
 78 | 26,1,4
 79 | 27,1,3
 80 | 28,1,3
 81 | 29,1,3
 82 | 30,1,4
 83 | 31,1,3
 84 | 32,1,3
 85 | 33,1,3
 86 | 34,1,3
 87 | 35,1,4
 88 | 36,1,3
 89 | 37,1,4
 90 | 38,1,3
 91 | 39,1,3
 92 | 40,1,4
 93 | 41,1,3
 94 | 42,1,3
 95 | 43,1,4
 96 | 44,1,4
 97 | 45,1,4
 98 | 46,1,3
 99 | 47,1,3
100 | 48,1,3
101 | 49,1,3
102 | 0,2,4
103 | 1,2,4
104 | 2,2,3
105 | 3,2,5
106 | 4,2,3
107 | 5,2,3
108 | 6,2,3
109 | 7,2,3
110 | 8,2,4
111 | 9,2,3
112 | 10,2,3
113 | 11,2,3
114 | 12,2,4
115 | 13,2,4
116 | 14,2,4
117 | 15,2,3
118 | 16,2,7
119 | 17,2,3
120 | 18,2,3
121 | 19,2,3
122 | 20,2,4
123 | 21,2,3
124 | 22,2,4
125 | 23,2,3
126 | 24,2,4
127 | 25,2,5
128 | 26,2,5
129 | 27,2,3
130 | 28,2,3
131 | 29,2,3
132 | 30,2,5
133 | 31,2,3
134 | 32,2,3
135 | 33,2,3
136 | 34,2,3
137 | 35,2,4
138 | 36,2,4
139 | 37,2,4
140 | 38,2,3
141 | 39,2,3
142 | 40,2,4
143 | 41,2,3
144 | 42,2,4
145 | 43,2,5
146 | 44,2,4
147 | 45,2,4
148 | 46,2,3
149 | 47,2,3
150 | 48,2,4
151 | 49,2,4
152 | 0,3,4
153 | 1,3,4
154 | 2,3,3
155 | 3,3,5
156 | 4,3,4
157 | 5,3,3
158 | 6,3,3
159 | 7,3,3
160 | 8,3,5
161 | 9,3,3
162 | 10,3,3
163 | 11,3,3
164 | 12,3,5
165 | 13,3,4
166 | 14,3,4
167 | 15,3,3
168 | 16,3,7
169 | 17,3,3
170 | 18,3,3
171 | 19,3,4
172 | 20,3,5
173 | 21,3,4
174 | 22,3,4
175 | 23,3,4
176 | 24,3,4
177 | 25,3,5
178 | 26,3,5
179 | 27,3,3
180 | 28,3,3
181 | 29,3,3
182 | 30,3,5
183 | 31,3,4
184 | 32,3,3
185 | 33,3,3
186 | 34,3,4
187 | 35,3,4
188 | 36,3,5
189 | 37,3,4
190 | 38,3,3
191 | 39,3,3
192 | 40,3,5
193 | 41,3,3
194 | 42,3,4
195 | 43,3,6
196 | 44,3,4
197 | 45,3,5
198 | 46,3,3
199 | 47,3,3
200 | 48,3,4
201 | 49,3,5
202 | 0,4,4
203 | 1,4,4
204 | 2,4,3
205 | 3,4,5
206 | 4,4,4
207 | 5,4,3
208 | 6,4,3
209 | 7,4,4
210 | 8,4,5
211 | 9,4,4
212 | 10,4,3
213 | 11,4,3
214 | 12,4,5
215 | 13,4,4
216 | 14,4,4
217 | 15,4,3
218 | 16,4,7
219 | 17,4,4
220 | 18,4,4
221 | 19,4,4
222 | 20,4,6
223 | 21,4,4
224 | 22,4,5
225 | 23,4,4
226 | 24,4,4
227 | 25,4,5
228 | 26,4,4
229 | 27,4,3
230 | 28,4,3
231 | 29,4,3
232 | 30,4,7
233 | 31,4,4
234 | 32,4,4
235 | 33,4,3
236 | 34,4,4
237 | 35,4,4
238 | 36,4,6
239 | 37,4,4
240 | 38,4,4
241 | 39,4,3
242 | 40,4,5
243 | 41,4,3
244 | 42,4,4
245 | 43,4,7
246 | 44,4,4
247 | 45,4,5
248 | 46,4,3
249 | 47,4,3
250 | 48,4,5
251 | 49,4,5
252 | 0,5,5
253 | 1,5,4
254 | 2,5,3
255 | 3,5,6
256 | 4,5,4
257 | 5,5,3
258 | 6,5,4
259 | 7,5,4
260 | 8,5,5
261 | 9,5,4
262 | 10,5,3
263 | 11,5,4
264 | 12,5,5
265 | 13,5,4
266 | 14,5,4
267 | 15,5,4
268 | 16,5,7
269 | 17,5,4
270 | 18,5,4
271 | 19,5,5
272 | 20,5,4
273 | 21,5,4
274 | 22,5,5
275 | 23,5,4
276 | 24,5,4
277 | 25,5,5
278 | 26,5,5
279 | 27,5,3
280 | 28,5,3
281 | 29,5,4
282 | 30,5,7
283 | 31,5,4
284 | 32,5,4
285 | 33,5,3
286 | 34,5,4
287 | 35,5,4
288 | 36,5,6
289 | 37,5,5
290 | 38,5,4
291 | 39,5,3
292 | 40,5,5
293 | 41,5,3
294 | 42,5,4
295 | 43,5,7
296 | 44,5,4
297 | 45,5,6
298 | 46,5,3
299 | 47,5,3
300 | 48,5,5
301 | 49,5,5
302 | 0,6,5
303 | 1,6,4
304 | 2,6,3
305 | 3,6,7
306 | 4,6,5
307 | 5,6,3
308 | 6,6,4
309 | 7,6,4
310 | 8,6,5
311 | 9,6,4
312 | 10,6,3
313 | 11,6,4
314 | 12,6,7
315 | 13,6,4
316 | 14,6,4
317 | 15,6,4
318 | 16,6,7
319 | 17,6,4
320 | 18,6,4
321 | 19,6,5
322 | 20,6,5
323 | 21,6,4
324 | 22,6,4
325 | 23,6,4
326 | 24,6,4
327 | 25,6,6
328 | 26,6,5
329 | 27,6,3
330 | 28,6,3
331 | 29,6,4
332 | 30,6,7
333 | 31,6,4
334 | 32,6,4
335 | 33,6,3
336 | 34,6,4
337 | 35,6,4
338 | 36,6,7
339 | 37,6,5
340 | 38,6,4
341 | 39,6,4
342 | 40,6,4
343 | 41,6,3
344 | 42,6,4
345 | 43,6,7
346 | 44,6,5
347 | 45,6,5
348 | 46,6,3
349 | 47,6,3
350 | 48,6,4
351 | 49,6,5
352 | 0,7,4
353 | 1,7,4
354 | 2,7,4
355 | 3,7,7
356 | 4,7,4
357 | 5,7,3
358 | 6,7,4
359 | 7,7,4
360 | 8,7,5
361 | 9,7,4
362 | 10,7,3
363 | 11,7,4
364 | 12,7,7
365 | 13,7,4
366 | 14,7,4
367 | 15,7,4
368 | 16,7,7
369 | 17,7,4
370 | 18,7,4
371 | 19,7,5
372 | 20,7,6
373 | 21,7,4
374 | 22,7,4
375 | 23,7,5
376 | 24,7,4
377 | 25,7,6
378 | 26,7,5
379 | 27,7,3
380 | 28,7,3
381 | 29,7,4
382 | 30,7,7
383 | 31,7,4
384 | 32,7,4
385 | 33,7,3
386 | 34,7,4
387 | 35,7,4
388 | 36,7,7
389 | 37,7,5
390 | 38,7,4
391 | 39,7,4
392 | 40,7,4
393 | 41,7,3
394 | 42,7,4
395 | 43,7,7
396 | 44,7,4
397 | 45,7,5
398 | 46,7,3
399 | 47,7,3
400 | 48,7,7
401 | 49,7,5
402 | 0,8,4
403 | 1,8,4
404 | 2,8,4
405 | 3,8,7
406 | 4,8,5
407 | 5,8,3
408 | 6,8,4
409 | 7,8,4
410 | 8,8,7
411 | 9,8,4
412 | 10,8,3
413 | 11,8,4
414 | 12,8,7
415 | 13,8,5
416 | 14,8,4
417 | 15,8,4
418 | 16,8,7
419 | 17,8,4
420 | 18,8,4
421 | 19,8,5
422 | 20,8,7
423 | 21,8,5
424 | 22,8,4
425 | 23,8,5
426 | 24,8,4
427 | 25,8,6
428 | 26,8,5
429 | 27,8,3
430 | 28,8,3
431 | 29,8,4
432 | 30,8,7
433 | 31,8,5
434 | 32,8,4
435 | 33,8,4
436 | 34,8,5
437 | 35,8,4
438 | 36,8,7
439 | 37,8,5
440 | 38,8,4
441 | 39,8,4
442 | 40,8,4
443 | 41,8,3
444 | 42,8,4
445 | 43,8,7
446 | 44,8,5
447 | 45,8,5
448 | 46,8,3
449 | 47,8,3
450 | 48,8,7
451 | 49,8,5
452 | 0,9,4
453 | 1,9,4
454 | 2,9,4
455 | 3,9,7
456 | 4,9,5
457 | 5,9,3
458 | 6,9,4
459 | 7,9,4
460 | 8,9,7
461 | 9,9,4
462 | 10,9,3
463 | 11,9,4
464 | 12,9,7
465 | 13,9,5
466 | 14,9,4
467 | 15,9,5
468 | 16,9,7
469 | 17,9,4
470 | 18,9,4
471 | 19,9,6
472 | 20,9,7
473 | 21,9,5
474 | 22,9,4
475 | 23,9,5
476 | 24,9,4
477 | 25,9,5
478 | 26,9,5
479 | 27,9,3
480 | 28,9,3
481 | 29,9,4
482 | 30,9,7
483 | 31,9,4
484 | 32,9,5
485 | 33,9,4
486 | 34,9,6
487 | 35,9,4
488 | 36,9,7
489 | 37,9,6
490 | 38,9,4
491 | 39,9,5
492 | 40,9,4
493 | 41,9,3
494 | 42,9,4
495 | 43,9,7
496 | 44,9,4
497 | 45,9,5
498 | 46,9,3
499 | 47,9,3
500 | 48,9,7
501 | 49,9,5
502 | 0,10,5
503 | 1,10,4
504 | 2,10,4
505 | 3,10,7
506 | 4,10,7
507 | 5,10,3
508 | 6,10,4
509 | 7,10,4
510 | 8,10,7
511 | 9,10,4
512 | 10,10,3
513 | 11,10,4
514 | 12,10,7
515 | 13,10,7
516 | 14,10,4
517 | 15,10,5
518 | 16,10,7
519 | 17,10,4
520 | 18,10,4
521 | 19,10,6
522 | 20,10,7
523 | 21,10,5
524 | 22,10,4
525 | 23,10,5
526 | 24,10,4
527 | 25,10,5
528 | 26,10,5
529 | 27,10,4
530 | 28,10,4
531 | 29,10,4
532 | 30,10,7
533 | 31,10,4
534 | 32,10,4
535 | 33,10,4
536 | 34,10,5
537 | 35,10,4
538 | 36,10,7
539 | 37,10,7
540 | 38,10,4
541 | 39,10,5
542 | 40,10,5
543 | 41,10,4
544 | 42,10,4
545 | 43,10,7
546 | 44,10,5
547 | 45,10,7
548 | 46,10,3
549 | 47,10,3
550 | 48,10,7
551 | 49,10,5
552 | 


--------------------------------------------------------------------------------
/transitionMatrix/creditratings/creditcurve.py:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | 
  3 | # (c) 2017-2024 Open Risk (https://www.openriskmanagement.com)
  4 | #
  5 | # TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included
  6 | # in the source distribution of TransitionMatrix. This is notwithstanding any licenses of
  7 | # third-party software included in this distribution. You may not use this file except in
  8 | # compliance with the License.
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software distributed under
 11 | # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
 12 | # either express or implied. See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | """ This module provides objects related to credit curves
 16 | 
 17 | * CreditCurve_ implements the functionality of a collection of credit (default curves)
 18 | 
 19 | """
 20 | 
 21 | import numpy as np
 22 | import pandas as pd
 23 | 
 24 | 
 25 | class CreditCurve(np.matrix):
 26 |     """ The _`CreditCurve` object implements a typical collection of `credit curves <https://www.openriskmanual.org/wiki/Credit_Curve>`_.
 27 |     The class inherits from numpy matrices and implements additional properties specific to curves.
 28 | 
 29 |     """
 30 | 
 31 |     def __new__(cls, values=None, json_file=None, csv_file=None):
 32 |         """ Create a new credit curve set. Different options for initialization are:
 33 | 
 34 |         * providing values as a list of list
 35 |         * providing values as a numpy array  (The rows are the different curves, the columns are different periods)
 36 |         * loading from a csv file
 37 |         * loading from a json file
 38 | 
 39 |         Without data, a default identity matrix is generated with user specified dimension
 40 | 
 41 |         :param values: initialization values
 42 |         :param json_file: a json file containing transition matrix data
 43 |         :param csv_file: a csv file containing transition matrix data
 44 |         :type values: list of lists or numpy array
 45 |         :returns: returns a CreditCurve object
 46 |         :rtype: object
 47 | 
 48 |         .. note:: The initialization in itself does not validate if the provided values form indeed a credit curve
 49 | 
 50 |         :Example:
 51 | 
 52 |         .. code-block:: python
 53 | 
 54 |             A = tm.CreditCurve(values=[[0.1, 0.2, 0.3], [0.2, 0.6, 0.8], [0.01, 0.02, 0.06]])
 55 | 
 56 |         """
 57 |         obj = None
 58 |         if values is not None:
 59 |             # Initialize with given values
 60 |             obj = np.asarray(values).view(cls)
 61 |         elif json_file is not None:
 62 |             # Initialize from file in json format
 63 |             q = pd.read_json(json_file)
 64 |             obj = np.asarray(q.values).view(cls)
 65 |         elif csv_file is not None:
 66 |             # Initialize from file in csv format
 67 |             q = pd.read_csv(csv_file, index_col=None)
 68 |             obj = np.asarray(q.values).view(cls)
 69 |         # validation flag is set to False at initialization
 70 |         obj.validated = False
 71 |         # temporary dimension assignment (must validated for squareness)
 72 |         obj.dimension = obj.shape[0]
 73 |         return obj
 74 | 
 75 |     def to_json(self, file):
 76 |         """
 77 |         Write credit curves to file in json format
 78 | 
 79 |         :param file: json filename
 80 |         """
 81 | 
 82 |         q = pd.DataFrame(self)
 83 |         q.to_json(file, orient='values')
 84 | 
 85 |     def to_csv(self, file):
 86 |         """
 87 |         Write credit curves to file in csv format
 88 | 
 89 |         :param file: csv filename
 90 |         """
 91 | 
 92 |         q = pd.DataFrame(self)
 93 |         q.to_csv(file, index=False)
 94 | 
 95 |     def to_html(self, file=None):
 96 |         html_table = pd.DataFrame(self).to_html()
 97 |         if file is not None:
 98 |             file = open(file, 'w')
 99 |             file.write(html_table)
100 |             file.close()
101 |         return html_table
102 | 
103 |     def validate(self, accuracy=1e-3):
104 |         """ Validate required properties of a credit curve set. The following are checked
105 | 
106 |         1. check that all values are probabilities (between 0 and 1)
107 |         2. check that values are non-decreasing
108 | 
109 |         :param accuracy: accuracy level to use for validation
110 |         :type accuracy: float
111 | 
112 |         :returns: List of tuples with validation messages
113 |         """
114 |         validation_messages = []
115 | 
116 |         curve_set = self
117 |         curve_set_size = curve_set.shape[0]
118 |         curve_set_periods = curve_set.shape[1]
119 | 
120 |         # checking that values of curve_set are within allowed range
121 |         for i in range(curve_set_size):
122 |             for j in range(curve_set_periods):
123 |                 if curve_set[i, j] < 0:
124 |                     validation_messages.append(("Negative Probabilities: ", (i, j, curve_set[i, j])))
125 |                 if curve_set[i, j] > 1:
126 |                     validation_messages.append(("Probabilities Larger than 1: ", (i, j, curve_set[i, j])))
127 |         # checking monotonicity
128 |         for i in range(curve_set_size):
129 |             for j in range(1, curve_set_periods):
130 |                 if curve_set[i, j] < curve_set[i, j - 1]:
131 |                     validation_messages.append(("Curve not monotonic: ", (i, j)))
132 | 
133 |         if len(validation_messages) == 0:
134 |             self.validated = True
135 |             return self.validated
136 |         else:
137 |             self.validated = False
138 |             return validation_messages
139 | 
140 |     def hazard_curve(self):
141 |         """ Compute hazard rates
142 | 
143 |         .. Todo:: Compute hazard rates
144 | 
145 |         :return: TODO
146 | 
147 |         """
148 |         pass
149 | 
150 |     def characterize(self):
151 |         """ Analyse or classify a credit curve according to its properties
152 | 
153 |         * slope of hazard rate
154 | 
155 |         .. Todo:: Further characterization
156 | 
157 |         """
158 | 
159 |         pass
160 | 
161 |     def print_curve(self, format_type='Standard', accuracy=2):
162 |         """ Pretty print a set of credit curves
163 | 
164 |         :param format_type: formatting options (Standard, Percent)
165 |         :type format_type: str
166 |         :param accuracy: number of decimals to display
167 |         :type accuracy: int
168 | 
169 |         """
170 |         for s_in in range(self.shape[0]):
171 |             for s_out in range(self.shape[1]):
172 |                 if format_type == 'Standard':
173 |                     format_string = "{0:." + str(accuracy) + "f}"
174 |                     print(format_string.format(self[s_in, s_out]) + ' ', end='')
175 |                 elif format_type == 'Percent':
176 |                     print("{0:.2f}%".format(100 * self[s_in, s_out]) + ' ', end='')
177 |             print('')
178 |         print('')
179 | 


--------------------------------------------------------------------------------