├── tests ├── __init__.py ├── test.json ├── test.csv ├── test_datasets.py ├── test_utils.py ├── test_duration_estimator.py ├── test_nr_transform.py ├── test_state_space.py ├── test_roundtrip.py ├── test_cohort_estimator.py └── test_model.py ├── examples ├── __init__.py ├── python │ ├── __init__.py │ ├── README.md │ ├── characterize_datasets.py │ ├── deterministic_paths.py │ ├── state_space_operations.py │ ├── compare_estimators.py │ ├── adjust_nr_state.py │ ├── matrix_set_operations.py │ ├── matrix_set_lendingclub.py │ ├── fix_multiperiod_matrix.py │ ├── estimate_matrix.py │ ├── matrix_lendingclub.py │ ├── credit_curves.py │ ├── example_list.csv │ ├── generate_full_multiperiod_set.py │ ├── matrix_operations.py │ ├── empirical_transition_matrix.py │ ├── matrix_from_duration_data.py │ ├── data_cleaning_example.py │ └── matrix_from_cohort_data.py ├── sankey.png ├── estimation.png ├── overview.png ├── scatterplot.png ├── scatterplot2.png ├── credit_curves.png ├── single_entity.png ├── TransitionMatrix.png ├── sampled_histories.png ├── scale_conversions.png ├── monthly_credit_curves.png ├── transition_probabilities.png ├── JLT.json └── JLT.csv ├── test.json ├── test.csv ├── transitionMatrix ├── generators │ └── __init__.py ├── statespaces │ └── __init__.py ├── estimators │ ├── kaplan_meier_estimator.py │ ├── simple_estimator.py │ └── __init__.py ├── utils │ ├── __init__.py │ └── converters.py └── creditratings │ └── creditcurve.py ├── docs ├── source │ ├── changelog.rst │ ├── _static │ │ ├── Architecture_Overview.png │ │ └── custom.css │ ├── simple_estimator.rst │ ├── modules.rst │ ├── transitionMatrix.generators.rst │ ├── aalen-johansen_estimator.rst │ ├── transitionMatrix.statespaces.rst │ ├── transitionMatrix.visualization.rst │ ├── preprocessing.rst │ ├── credit_ratings.rst │ ├── transitionMatrix.utils.rst │ ├── postprocessing.rst │ ├── state_spaces.rst │ ├── federation.rst │ ├── transitionMatrix.creditratings.rst │ ├── data_generators.rst │ ├── withdrawn_ratings.rst │ ├── datasets.rst │ ├── credit_curves.rst │ ├── index.rst │ ├── transitionMatrix.rst │ ├── example_with_jlt.rst │ ├── cohort_estimator.rst │ ├── transitionMatrix.estimators.rst │ ├── multi-period_transitions.rst │ ├── examples.rst │ ├── visualization.rst │ ├── testing.rst │ ├── predefined_rating_scales.rst │ ├── estimators.rst │ ├── cohorts.rst │ ├── basic_operations.rst │ ├── roadmap.rst │ ├── description.rst │ └── data_formats.rst └── Makefile ├── .github └── FUNDING.yml ├── requirements.txt ├── datasets ├── nr_test_case.xlsx ├── test.csv ├── JLT.csv ├── JLT.json ├── dataset_list.csv ├── synthetic_data1.csv ├── synthetic_data.csv ├── sp_1981-2016.csv ├── sp 2017.csv └── scenario_data.csv ├── justfile ├── test_upload.sh ├── Makefile ├── MANIFEST.in ├── setup.cfg ├── requirements-dev.txt ├── test.py ├── .readthedocs.yaml ├── run_examples.py ├── .gitignore ├── setup.py ├── README.md └── CHANGELOG.rst /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test.json: -------------------------------------------------------------------------------- 1 | [[1.0,0.0],[0.0,1.0]] -------------------------------------------------------------------------------- /tests/test.json: -------------------------------------------------------------------------------- 1 | [[1.0,0.0],[0.0,1.0]] -------------------------------------------------------------------------------- /test.csv: -------------------------------------------------------------------------------- 1 | 0,1 2 | 1.0,0.0 3 | 0.0,1.0 4 | -------------------------------------------------------------------------------- /transitionMatrix/generators/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /transitionMatrix/statespaces/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/test.csv: -------------------------------------------------------------------------------- 1 | 0,1 2 | 1.0,0.0 3 | 0.0,1.0 4 | -------------------------------------------------------------------------------- /docs/source/changelog.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../../CHANGELOG.rst -------------------------------------------------------------------------------- /examples/python/__init__.py: -------------------------------------------------------------------------------- 1 | # init funny all those empty inits 2 | 3 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: [open-risk] 4 | 5 | -------------------------------------------------------------------------------- /examples/sankey.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/open-risk/transitionMatrix/HEAD/examples/sankey.png -------------------------------------------------------------------------------- /examples/estimation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/open-risk/transitionMatrix/HEAD/examples/estimation.png -------------------------------------------------------------------------------- /examples/overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/open-risk/transitionMatrix/HEAD/examples/overview.png -------------------------------------------------------------------------------- /examples/scatterplot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/open-risk/transitionMatrix/HEAD/examples/scatterplot.png -------------------------------------------------------------------------------- /examples/scatterplot2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/open-risk/transitionMatrix/HEAD/examples/scatterplot2.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | Jinja2 2 | matplotlib 3 | numpy 4 | pandas 5 | Pillow 6 | requests 7 | scipy 8 | statsmodels 9 | -------------------------------------------------------------------------------- /datasets/nr_test_case.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/open-risk/transitionMatrix/HEAD/datasets/nr_test_case.xlsx -------------------------------------------------------------------------------- /examples/credit_curves.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/open-risk/transitionMatrix/HEAD/examples/credit_curves.png -------------------------------------------------------------------------------- /examples/single_entity.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/open-risk/transitionMatrix/HEAD/examples/single_entity.png -------------------------------------------------------------------------------- /justfile: -------------------------------------------------------------------------------- 1 | docs: 2 | sphinx-build docs/source docs/build/html 3 | 4 | show: 5 | start docs/build/html/index.html -------------------------------------------------------------------------------- /examples/TransitionMatrix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/open-risk/transitionMatrix/HEAD/examples/TransitionMatrix.png -------------------------------------------------------------------------------- /examples/sampled_histories.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/open-risk/transitionMatrix/HEAD/examples/sampled_histories.png -------------------------------------------------------------------------------- /examples/scale_conversions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/open-risk/transitionMatrix/HEAD/examples/scale_conversions.png -------------------------------------------------------------------------------- /examples/monthly_credit_curves.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/open-risk/transitionMatrix/HEAD/examples/monthly_credit_curves.png -------------------------------------------------------------------------------- /examples/transition_probabilities.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/open-risk/transitionMatrix/HEAD/examples/transition_probabilities.png -------------------------------------------------------------------------------- /docs/source/_static/Architecture_Overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/open-risk/transitionMatrix/HEAD/docs/source/_static/Architecture_Overview.png -------------------------------------------------------------------------------- /test_upload.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | rm -rf dist/* 3 | python3 setup.py sdist bdist_egg 4 | twine upload --repository-url https://test.pypi.org/legacy/ dist/* 5 | -------------------------------------------------------------------------------- /datasets/test.csv: -------------------------------------------------------------------------------- 1 | ID,Time,State 2 | 1,4,1 3 | 2,3,1 4 | 3,3,0 5 | 4,4,1 6 | 5,2,1 7 | 6,0,1 8 | 7,2,1 9 | 1,14,2 10 | 2,13,2 11 | 3,13,1 12 | 4,14,2 13 | 5,12,2 14 | 6,10,2 15 | 7,12,2 -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | autopep8: 2 | autopep8 --ignore E501,E241,W690 --in-place --recursive --aggressive transitionMatrix/ 3 | 4 | lint: 5 | flake8 transitionMatrix 6 | 7 | autolint: autopep8 lint 8 | 9 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include examples * 2 | recursive-include datasets * 3 | recursive-include tests * 4 | include datasets/*.csv 5 | include datasets/*.json 6 | include MANIFEST.in 7 | include LICENSE.txt 8 | include description.rst -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | license_file = LICENSE.txt 3 | 4 | [sdist] 5 | formats = gztar 6 | 7 | [bdist_wheel] 8 | universal=1 9 | 10 | [tool:pytest] 11 | norecursedirs = .* docs build docs examples wheel 12 | testpaths = tests -------------------------------------------------------------------------------- /docs/source/simple_estimator.rst: -------------------------------------------------------------------------------- 1 | Simple Estimator 2 | ======================== 3 | 4 | The estimation of a transition matrix is one of the core functionalities of transitionMatrix. The two main estimators currently implemented are: 5 | 6 | -------------------------------------------------------------------------------- /docs/source/modules.rst: -------------------------------------------------------------------------------- 1 | API 2 | ============================== 3 | 4 | The transitionMatrix package structure and API. 5 | 6 | .. warning:: The library is still being expanded / refactored. Significant structure and API changes are likely. 7 | 8 | .. toctree:: 9 | :maxdepth: 2 10 | 11 | transitionMatrix -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | Jinja2 2 | matplotlib 3 | numpy 4 | pandas 5 | Pillow 6 | requests 7 | scipy 8 | statsmodels 9 | Sphinx 10 | sphinx-rtd-theme 11 | sphinxcontrib-applehelp 12 | sphinxcontrib-devhelp 13 | sphinxcontrib-htmlhelp 14 | sphinxcontrib-jsmath 15 | sphinxcontrib-qthelp 16 | sphinxcontrib-serializinghtml 17 | pytest -------------------------------------------------------------------------------- /docs/source/transitionMatrix.generators.rst: -------------------------------------------------------------------------------- 1 | Generators SubPackage 2 | ============================ 3 | 4 | This subpackage implements test data generation 5 | 6 | transitionMatrix.generators contents 7 | --------------------------------------------- 8 | 9 | .. automodule:: transitionMatrix.generators.dataset_generators 10 | :members: 11 | :undoc-members: 12 | :show-inheritance: 13 | 14 | -------------------------------------------------------------------------------- /examples/JLT.json: -------------------------------------------------------------------------------- 1 | [[0.891,0.0963,0.0078,0.0019,0.003,0.0,0.0,0.0],[0.0086,0.901,0.0747,0.0099,0.0029,0.0029,0.0,0.0],[0.0009,0.0291,0.8894,0.0649,0.0101,0.0045,0.0,0.0009],[0.0006,0.0043,0.0656,0.8427,0.0644,0.016,0.0018,0.0045],[0.0004,0.0022,0.0079,0.0719,0.7764,0.1043,0.0127,0.0241],[0.0,0.0019,0.0031,0.0066,0.0517,0.8246,0.0435,0.0685],[0.0,0.0,0.0116,0.0116,0.0203,0.0754,0.6493,0.2319],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0]] -------------------------------------------------------------------------------- /docs/source/aalen-johansen_estimator.rst: -------------------------------------------------------------------------------- 1 | Aalen-Johansen Estimator 2 | ======================== 3 | 4 | The Aalen-Johansen estimator is a multi-state (matrix) version of the Kaplan–Meier estimator for the hazard of a survival process. The estimator can be used to estimate the transition probability matrix of a Markov process with a finite number of states. `See `_ 5 | -------------------------------------------------------------------------------- /docs/source/transitionMatrix.statespaces.rst: -------------------------------------------------------------------------------- 1 | State Spaces SubPackage 2 | ============================ 3 | 4 | This subpackage implements state space functionality 5 | 6 | 7 | transitionMatrix.statespaces.statespace module 8 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 9 | 10 | .. automodule:: transitionMatrix.statespaces.statespace 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: -------------------------------------------------------------------------------- /docs/source/_static/custom.css: -------------------------------------------------------------------------------- 1 | /* override table width restrictions */ 2 | @media screen and (min-width: 767px) { 3 | 4 | .wy-table-responsive table td { 5 | /* !important prevents the common CSS stylesheets from 6 | overriding this as on RTD they are loaded after this stylesheet */ 7 | white-space: normal !important; 8 | } 9 | 10 | .wy-table-responsive { 11 | overflow: visible !important; 12 | } 13 | 14 | } -------------------------------------------------------------------------------- /docs/source/transitionMatrix.visualization.rst: -------------------------------------------------------------------------------- 1 | Visualization subpackage 2 | ============================ 3 | 4 | This subpackage implements visualization functionality 5 | 6 | .. warning:: not yet implemented 7 | 8 | transitionMatrix.visualization contents 9 | --------------------------------------------- 10 | 11 | .. automodule:: transitionMatrix.visualization 12 | :members: 13 | :undoc-members: 14 | :show-inheritance: 15 | -------------------------------------------------------------------------------- /datasets/JLT.csv: -------------------------------------------------------------------------------- 1 | 0,1,2,3,4,5,6,7 2 | 0.891,0.0963,0.0078,0.0019,0.003,0.0,0.0,0.0 3 | 0.0086,0.901,0.0747,0.0099,0.0029,0.0029,0.0,0.0 4 | 0.0009,0.0291,0.8894,0.0649,0.0101,0.0045,0.0,0.0009 5 | 0.0006,0.0043,0.0656,0.8427,0.0644,0.016,0.0018,0.0045 6 | 0.0004,0.0022,0.0079,0.0719,0.7764,0.1043,0.0127,0.0241 7 | 0.0,0.0019,0.0031,0.0066,0.0517,0.8246,0.0435,0.0685 8 | 0.0,0.0,0.0116,0.0116,0.0203,0.0754,0.6493,0.2319 9 | 0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0 10 | -------------------------------------------------------------------------------- /examples/JLT.csv: -------------------------------------------------------------------------------- 1 | 0,1,2,3,4,5,6,7 2 | 0.891,0.0963,0.0078,0.0019,0.003,0.0,0.0,0.0 3 | 0.0086,0.901,0.0747,0.0099,0.0029,0.0029,0.0,0.0 4 | 0.0009,0.0291,0.8894,0.0649,0.0101,0.0045,0.0,0.0009 5 | 0.0006,0.0043,0.0656,0.8427,0.0644,0.016,0.0018,0.0045 6 | 0.0004,0.0022,0.0079,0.0719,0.7764,0.1043,0.0127,0.0241 7 | 0.0,0.0019,0.0031,0.0066,0.0517,0.8246,0.0435,0.0685 8 | 0.0,0.0,0.0116,0.0116,0.0203,0.0754,0.6493,0.2319 9 | 0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0 10 | -------------------------------------------------------------------------------- /docs/source/preprocessing.rst: -------------------------------------------------------------------------------- 1 | Preprocessing 2 | =================== 3 | 4 | The preprocessing stage includes preparatory steps leading up to the matrix :ref:`Estimation` to produce a transition matrix (or matrix set). 5 | 6 | The precise steps required depend on the sources of data, the nature of data, use specific requirements (best practices, regulation etc) and, not least, the desired estimation method. 7 | 8 | .. toctree:: 9 | :maxdepth: 2 10 | 11 | data_formats 12 | state_spaces 13 | cohorts -------------------------------------------------------------------------------- /docs/source/credit_ratings.rst: -------------------------------------------------------------------------------- 1 | Credit Ratings 2 | ====================== 3 | 4 | Working with credit data is a core use case of transitionMatrix. Functionality that is specific to credit ratings is generally grouped in the **credit ratings** subpackage (although the distinction of what is generic and what credit specific is not always clear). 5 | 6 | The following sections document various credit rating related activities. General documentation about `credit rating systems `_ 7 | 8 | 9 | .. toctree:: 10 | :maxdepth: 2 11 | :caption: Contents: 12 | 13 | predefined_rating_scales 14 | withdrawn_ratings 15 | credit_curves 16 | -------------------------------------------------------------------------------- /docs/source/transitionMatrix.utils.rst: -------------------------------------------------------------------------------- 1 | Utilities SubPackage 2 | ================================= 3 | 4 | This subpackage collects various utilities 5 | 6 | transitionMatrix.utils.converters module 7 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 8 | 9 | .. automodule:: transitionMatrix.utils.converters 10 | :members: 11 | :undoc-members: 12 | :show-inheritance: 13 | 14 | 15 | transitionMatrix.utils.preprocessing module 16 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 17 | 18 | .. automodule:: transitionMatrix.utils.preprocessing 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | 24 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = transitionMatrix 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /docs/source/postprocessing.rst: -------------------------------------------------------------------------------- 1 | Post-processing 2 | =================== 3 | 4 | The post-processing stage includes steps and activities after the estimation of a transition matrix. The precise steps required depend on specific circumstances but might involve some of the following: 5 | 6 | 7 | * "Fixing" a matrix by correcting deficiencies linked to data quality 8 | * Obtaining the infinitesimal generator of a matrix, a powerful tool for further analysis 9 | * Working with multi-period matrices 10 | * Visualizing transition datasets and transition frequencies 11 | 12 | .. toctree:: 13 | :maxdepth: 2 14 | 15 | basic_operations 16 | example_with_jlt 17 | multi-period_transitions 18 | visualization 19 | -------------------------------------------------------------------------------- /docs/source/state_spaces.rst: -------------------------------------------------------------------------------- 1 | State Spaces 2 | ============================== 3 | 4 | A State Space is a fundamental concept in probability theory and computer science representing the possible configurations for a modelled system 5 | 6 | The StateSpace object stores a state space structure as a List of tuples. The first two elements of each tuple contain the index (base-0) and label of the state space respectively. 7 | 8 | Additional fields are reserved for further characterisation 9 | 10 | 11 | Example: Map credit ratings between systems 12 | """""""""""""""""""""""""""""""""""""""""""" 13 | 14 | * Script: state_space_operations.py 15 | 16 | Example workflows for converting data from one credit rating system to another using an established mapping table 17 | 18 | -------------------------------------------------------------------------------- /docs/source/federation.rst: -------------------------------------------------------------------------------- 1 | Federation 2 | ====================== 3 | 4 | Credit Rating Ontology 5 | ---------------------- 6 | 7 | The Credit Ratings Ontology is a framework that aims to represent and categorize knowledge about Credit Rating Agencies and related data (Credit Ratings) using semantic web information technologies. 8 | 9 | This is a new project, related resources can be found here: 10 | 11 | * `Online documentation `_ 12 | * `Blog post `_ 13 | * `Course `_ 14 | * `Repo with ontology usage examples `_ 15 | 16 | .. note:: transitionMatrix functionality to federate semantically annotated credit data is planned -------------------------------------------------------------------------------- /examples/python/README.md: -------------------------------------------------------------------------------- 1 | Example Scripts 2 | =============== 3 | 4 | - [adjust_nr_state.py](adjust_nr_state.py) 5 | - [credit_curves.py](credit_curves.py) 6 | - [empirical_transition_matrix.py](empirical_transition_matrix.py) 7 | - [fix_multiperiod_matrix.py](fix_multiperiod_matrix.py) 8 | - [generate_full_multiperiod_set.py](generate_full_multiperiod_set.py) 9 | - [generate_synthetic_data.py](generate_synthetic_data.py) 10 | - [generate_visuals.py](generate_visuals.py) 11 | - [matrix_from_cohort_data.py](matrix_from_cohort_data.py) 12 | - [matrix_from_duration_data.py](matrix_from_duration_data.py) 13 | - [matrix_lendingclub.py](matrix_lendingclub.py) 14 | - [matrix_operations.py](matrix_operations.py) 15 | - [matrix_set_lendingclub.py](matrix_set_lendingclub.py) 16 | - [matrix_set_operations.py](matrix_set_operations.py) 17 | - [state_space_operations.py](state_space_operations.py) 18 | -------------------------------------------------------------------------------- /transitionMatrix/estimators/kaplan_meier_estimator.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | # (c) 2017-2024 Open Risk, all rights reserved 4 | # 5 | # TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included 6 | # in the source distribution of TransitionMatrix. This is notwithstanding any licenses of 7 | # third-party software included in this distribution. You may not use this file except in 8 | # compliance with the License. 9 | # 10 | # Unless required by applicable law or agreed to in writing, software distributed under 11 | # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 12 | # either express or implied. See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from __future__ import print_function 16 | 17 | from transitionMatrix.estimators import DurationEstimator 18 | 19 | 20 | class KaplanMeierEstimator(DurationEstimator): 21 | pass 22 | -------------------------------------------------------------------------------- /docs/source/transitionMatrix.creditratings.rst: -------------------------------------------------------------------------------- 1 | Credit Ratings SubPackage 2 | ============================ 3 | 4 | This subpackage collects credit rating specific functionality 5 | 6 | 7 | transitionMatrix.creditratings.creditcurve module 8 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 9 | 10 | .. automodule:: transitionMatrix.creditratings.creditcurve 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | 16 | transitionMatrix.creditratings.creditsystems module 17 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 18 | 19 | .. automodule:: transitionMatrix.creditratings.creditsystems 20 | :members: 21 | :undoc-members: 22 | :show-inheritance: 23 | 24 | 25 | transitionMatrix.creditratings.predefined module 26 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 27 | 28 | .. automodule:: transitionMatrix.creditratings.predefined 29 | :members: 30 | :undoc-members: 31 | :show-inheritance: 32 | -------------------------------------------------------------------------------- /docs/source/data_generators.rst: -------------------------------------------------------------------------------- 1 | Data Generators 2 | =================== 3 | 4 | The transitionMatrix distribution includes a number of data generators to support testing / training objectives. 5 | 6 | * **exponential_transitions**: Generate continuous time events from exponential distribution and uniform sampling from state space. Suitable for testing cohorting algorithms and duration based estimators. 7 | * **markov_chain**: Generate discrete events from a markov chain matrix in Compact data format. Suitable for testing cohort based estimators 8 | * **long_format**: Generate continuous events from a markov chain matrix in Long data format. Suitable for testing duration based estimators 9 | * **portfolio_lables**: Generate a collection of credit rating states emulating a snapshot of portfolio data. Suitable for mappings and transformations of credit rating states 10 | 11 | 12 | .. note:: Do not confuse *data generators* with *matrix generators* 13 | 14 | Data Generation Examples 15 | ------------------------- 16 | 17 | All data data generation examples are in script examples/python/generate_synthetic_data.py 18 | -------------------------------------------------------------------------------- /docs/source/withdrawn_ratings.rst: -------------------------------------------------------------------------------- 1 | Withdrawn Ratings 2 | ======================== 3 | 4 | Withdrawn ratings are a common issue that needs to be handled in the context of estimating transition matrices. See `right censoring issues `_ 5 | 6 | Adjust NR (Not Rated) States 7 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 8 | 9 | Adjusting for NR states can be done via the :meth:`transitionMatrix.model.TransitionMatrix.remove` method. 10 | 11 | 12 | Single Period Matrix 13 | """""""""""""""""""""""""""" 14 | Example of using transitionMatrix to adjust the (not-rated) NR state. Input data are the Standard and Poor's historical data (1981 - 2016) for corporate credit rating migrations. Example of handling 15 | 16 | * Script: examples/python/adjust_nr_states.py 17 | 18 | 19 | Multi-period Matrix 20 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 21 | 22 | * Script: examples/python/fix_multiperiod_matrix.py 23 | 24 | Example of using transitionMatrix to detect and solve various pathologies that might be affecting transition matrix data 25 | 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | # (c) 2017-2024 Open Risk, all rights reserved 4 | # 5 | # TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included 6 | # in the source distribution of TransitionMatrix. This is notwithstanding any licenses of 7 | # third-party software included in this distribution. You may not use this file except in 8 | # compliance with the License. 9 | # 10 | # Unless required by applicable law or agreed to in writing, software distributed under 11 | # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 12 | # either express or implied. See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | """ 17 | run the transitionMatrix test suite 18 | 19 | """ 20 | 21 | import sys 22 | import unittest 23 | 24 | from transitionMatrix import source_path 25 | 26 | sys.path.append(source_path) 27 | 28 | loader = unittest.TestLoader() 29 | start_dir = source_path + 'tests' 30 | suite = loader.discover(start_dir) 31 | 32 | runner = unittest.TextTestRunner() 33 | runner.run(suite) 34 | -------------------------------------------------------------------------------- /docs/source/datasets.rst: -------------------------------------------------------------------------------- 1 | Datasets 2 | =================== 3 | 4 | The transitionMatrix distribution includes a number of datasets to support testing / training objectives. Datasets come in two main types: 5 | 6 | * State Transition Data (used in estimation). There are both dummy (synthetic) examples and some actual data. Transition data are usually in CSV format. 7 | * Transition Matrices and Multi-period Sets of matrices (again both dummy and actual examples). Transition matrices are usually in JSON format. 8 | 9 | State Transition Data 10 | ------------------------------------------- 11 | 12 | The scripts are located in examples/python. For testing purposes all examples can be run using the run_examples.py script located in the root directory. Some scripts have an example flag that selects alternative input data or estimators. 13 | 14 | .. csv-table:: List of Transition Datasets 15 | :header-rows: 1 16 | :widths: 15 5 5 5 5 15 50 17 | :file: ../../datasets/dataset_list.csv 18 | 19 | 20 | Transition Matrices 21 | -------------------------------------------- 22 | 23 | * generic_monthly 24 | * generic_multiperiod 25 | * JLT 26 | * sp 2017 27 | 28 | -------------------------------------------------------------------------------- /datasets/JLT.json: -------------------------------------------------------------------------------- 1 | [ 2 | [ 3 | 0.891, 4 | 0.0963, 5 | 0.0078, 6 | 0.0019, 7 | 0.003, 8 | 0.0, 9 | 0.0, 10 | 0.0 11 | ], 12 | [ 13 | 0.0086, 14 | 0.901, 15 | 0.0747, 16 | 0.0099, 17 | 0.0029, 18 | 0.0029, 19 | 0.0, 20 | 0.0 21 | ], 22 | [ 23 | 0.0009, 24 | 0.0291, 25 | 0.8894, 26 | 0.0649, 27 | 0.0101, 28 | 0.0045, 29 | 0.0, 30 | 0.0009 31 | ], 32 | [ 33 | 0.0006, 34 | 0.0043, 35 | 0.0656, 36 | 0.8427, 37 | 0.0644, 38 | 0.016, 39 | 0.0018, 40 | 0.0045 41 | ], 42 | [ 43 | 0.0004, 44 | 0.0022, 45 | 0.0079, 46 | 0.0719, 47 | 0.7764, 48 | 0.1043, 49 | 0.0127, 50 | 0.0241 51 | ], 52 | [ 53 | 0.0, 54 | 0.0019, 55 | 0.0031, 56 | 0.0066, 57 | 0.0517, 58 | 0.8246, 59 | 0.0435, 60 | 0.0685 61 | ], 62 | [ 63 | 0.0, 64 | 0.0, 65 | 0.0116, 66 | 0.0116, 67 | 0.0203, 68 | 0.0754, 69 | 0.6493, 70 | 0.2319 71 | ], 72 | [ 73 | 0.0, 74 | 0.0, 75 | 0.0, 76 | 0.0, 77 | 0.0, 78 | 0.0, 79 | 0.0, 80 | 1.0 81 | ] 82 | ] -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # Read the Docs configuration file for Sphinx projects 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 3 | 4 | # Required 5 | version: 2 6 | 7 | # Set the OS, Python version and other tools you might need 8 | build: 9 | os: ubuntu-22.04 10 | tools: 11 | python: "3.10" 12 | # You can also specify other tool versions: 13 | # nodejs: "20" 14 | # rust: "1.70" 15 | # golang: "1.20" 16 | 17 | # Build documentation in the "docs/" directory with Sphinx 18 | sphinx: 19 | configuration: docs/source/conf.py 20 | # You can configure Sphinx to use a different builder, for instance use the dirhtml builder for simpler URLs 21 | # builder: "dirhtml" 22 | # Fail on all warnings to avoid broken references 23 | # fail_on_warning: true 24 | 25 | # Optionally build your docs in additional formats such as PDF and ePub 26 | # formats: 27 | # - pdf 28 | # - epub 29 | 30 | # Optional but recommended, declare the Python requirements required 31 | # to build your documentation 32 | # See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html 33 | python: 34 | install: 35 | - requirements: requirements-dev.txt 36 | -------------------------------------------------------------------------------- /docs/source/credit_curves.rst: -------------------------------------------------------------------------------- 1 | Credit Curves 2 | ======================== 3 | 4 | A Credit Curve denotes a grouping of credit risk metrics (parameters) that provide estimates that a legal entity experiences a Credit Event over different (an increasing sequence of longer) time periods. `See Credit Curves `_ 5 | 6 | A multi-period matrix and a credit curve are closely related objects (under some circumstances the later can be thought of as a subset of the former). The transitionMatrix package offers the following main functionality concerning credit curves: 7 | 8 | * The :class:`transitionMatrix.creditratings.creditcurve.CreditCurve` class for storing and working with credit curves 9 | * The :meth:`transitionMatrix.model.TransitionMatrixSet.default_curves` transitionMatrixSet method that extracts from a matrix set the default curve 10 | 11 | 12 | Example: Calculate and Plot Credit Curves 13 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 14 | 15 | Example of using transitionMatrix to calculate and visualize multi-period 16 | 17 | * Script: examples/python/credit_curves.py 18 | 19 | .. image:: ../../examples/credit_curves.png -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. transitionMatrix documentation master file 2 | 3 | transitionMatrix Documentation 4 | =============================== 5 | 6 | .. image:: ../../examples/overview.png 7 | 8 | transitionMatrix is a pure Python powered library for the statistical analysis and visualization of state transition phenomena. It can be used to analyze any dataset that captures *timestamped transitions in a discrete state space.* 9 | 10 | Use cases include applications in finance (for example credit rating transitions), IT (system state event logs) and more. 11 | 12 | **NB: transitionMatrix is still in alpha release / active development. If you encounter issues please raise them in our github repository** 13 | 14 | .. toctree:: 15 | :maxdepth: 2 16 | :caption: Contents: 17 | 18 | description 19 | getting_started 20 | data_formats 21 | datasets 22 | preprocessing 23 | credit_ratings 24 | estimators 25 | postprocessing 26 | data_generators 27 | federation 28 | examples 29 | modules 30 | testing 31 | roadmap 32 | changelog 33 | 34 | 35 | Indexes and tables 36 | ================== 37 | 38 | * :ref:`genindex` 39 | * :ref:`modindex` 40 | * :ref:`search` 41 | -------------------------------------------------------------------------------- /docs/source/transitionMatrix.rst: -------------------------------------------------------------------------------- 1 | transitionMatrix Package 2 | ============================ 3 | 4 | The core module 5 | 6 | .. automodule:: transitionMatrix.model 7 | :noindex: 8 | 9 | transitionMatrix Classes 10 | ------------------------------ 11 | 12 | TransitionMatrix 13 | ~~~~~~~~~~~~~~~~~~~ 14 | 15 | .. autoclass:: transitionMatrix.model.TransitionMatrix 16 | :members: 17 | 18 | .. automethod:: __new__ 19 | 20 | TransitionMatrixSet 21 | ~~~~~~~~~~~~~~~~~~~ 22 | 23 | .. autoclass:: transitionMatrix.model.TransitionMatrixSet 24 | :members: 25 | 26 | .. automethod:: __init__ 27 | 28 | .. automethod:: __mul__ 29 | 30 | 31 | EmpiricalTransitionMatrix 32 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 33 | 34 | .. todo:: This is future functionality 35 | 36 | .. autoclass:: transitionMatrix.model.EmpiricalTransitionMatrix 37 | :members: 38 | 39 | .. automethod:: __init__ 40 | 41 | 42 | transitionMatrix Subpackages 43 | ============================= 44 | 45 | .. toctree:: 46 | 47 | transitionMatrix.estimators 48 | transitionMatrix.statespaces 49 | transitionMatrix.creditratings 50 | transitionMatrix.generators 51 | transitionMatrix.visualization 52 | transitionMatrix.utils 53 | -------------------------------------------------------------------------------- /docs/source/example_with_jlt.rst: -------------------------------------------------------------------------------- 1 | Working with an actual matrix 2 | ============================== 3 | 4 | The core capability of transitionMatrix is to produce estimated matrices but getting a realistic example requires quite some work. In this section we assume we have estimated one. 5 | 6 | Lets look at a realistic example from the JLT paper 7 | 8 | .. code:: 9 | 10 | # Reproduce JLT Generator 11 | # We load it using different sources 12 | E = tm.TransitionMatrix(values=JLT) 13 | E_2 = tm.TransitionMatrix(json_file=dataset_path + "JLT.json") 14 | E_3 = tm.TransitionMatrix(csv_file=dataset_path + "JLT.csv") 15 | # Lets check there are no errors 16 | Error = E - E_3 17 | print(np.linalg.norm(Error)) 18 | # Lets look at validation and generators" 19 | # Empirical matrices will not satisfy constraints exactly 20 | print(E.validate(accuracy=1e-3)) 21 | print(E.characterize()) 22 | print(E.generator()) 23 | Error = E - expm(E.generator()) 24 | # Frobenious norm 25 | print(np.linalg.norm(Error)) 26 | # L1 norm 27 | print(np.linalg.norm(Error, 1)) 28 | # Use pandas style API for saving to files 29 | E.to_csv("JLT.csv") 30 | E.to_json("JLT.json") 31 | 32 | -------------------------------------------------------------------------------- /docs/source/cohort_estimator.rst: -------------------------------------------------------------------------------- 1 | Cohort Estimator 2 | ======================== 3 | 4 | A cohort estimator (more accurately a discrete-time estimator) is class of estimators of multi-state transitions that is a simpler alternative to Duration type estimators 5 | 6 | 7 | Estimate a Transition Matrix from Cohort Data 8 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 9 | 10 | Example workflows using transitionMatrix to estimate a transition matrix from data that are already grouped in cohorts 11 | 12 | * Script: examples/python/matrix_from_cohort_data.py 13 | * Example ID: 3 14 | 15 | 16 | .. code:: 17 | 18 | data = pd.read_csv(dataset_path + 'synthetic_data6.csv', dtype={'State': str}) 19 | sorted_data = data.sort_values(['ID', 'Timestep'], ascending=[True, True]) 20 | myState = tm.StateSpace() 21 | myState.generic(2) 22 | print(myState.validate_dataset(dataset=sorted_data)) 23 | myEstimator = es.CohortEstimator(states=myState, ci={'method': 'goodman', 'alpha': 0.05}) 24 | result = myEstimator.fit(sorted_data) 25 | myMatrixSet = tm.TransitionMatrixSet(values=result, temporal_type='Incremental') 26 | 27 | myEstimator.print(select='Counts', period=0) 28 | myEstimator.print(select='Frequencies', period=18) -------------------------------------------------------------------------------- /docs/source/transitionMatrix.estimators.rst: -------------------------------------------------------------------------------- 1 | Estimators SubPackage 2 | ====================================== 3 | 4 | This subpackage implements the various estimators 5 | 6 | transitionMatrix.estimators.simple\_estimator module 7 | ---------------------------------------------------- 8 | 9 | .. automodule:: transitionMatrix.estimators.simple_estimator 10 | :members: 11 | :undoc-members: 12 | :show-inheritance: 13 | 14 | 15 | transitionMatrix.estimators.cohort\_estimator module 16 | ---------------------------------------------------- 17 | 18 | .. automodule:: transitionMatrix.estimators.cohort_estimator 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | 24 | transitionMatrix.estimators.aalen\_johansen\_estimator module 25 | ------------------------------------------------------------- 26 | 27 | .. automodule:: transitionMatrix.estimators.aalen_johansen_estimator 28 | :members: 29 | :undoc-members: 30 | :show-inheritance: 31 | 32 | transitionMatrix.estimators.kaplan\_meier\_estimator module 33 | ----------------------------------------------------------- 34 | 35 | .. todo:: This is future functionality 36 | 37 | .. automodule:: transitionMatrix.estimators.kaplan_meier_estimator 38 | :members: 39 | :undoc-members: 40 | :show-inheritance: 41 | 42 | -------------------------------------------------------------------------------- /datasets/dataset_list.csv: -------------------------------------------------------------------------------- 1 | File,Format,Events,Entities,States,Generator,Description 2 | rating_data_raw.csv,Compact,4000,1829,9,Extract,A typical credit rating dataset 3 | rating_data.csv,Compact,3780,1642,9,Data cleaning script,A typical credit rating dataset 4 | scenario_data.csv,Compact,550,50,5,, 5 | synthetic_data.csv,Compact,100,10,2,, 6 | synthetic_data1.csv,Compact,100,1,4,Generator(=1),DURATION TYPE DATASETS (Compact format) 7 | synthetic_data2.csv,Compact,10000,1000,2,Generator(=2),DURATION TYPE DATASETS (Compact format) 8 | synthetic_data3.csv,Compact,2000,100,7,Generator(=3),DURATION TYPE DATASETS (Compact format) 9 | synthetic_data4.csv,Compact,10000,1000,8,Generator(=4),Cohort type dataset (Generic Rating Matrix). Offers a semi-realistic example 10 | synthetic_data5.csv,Compact,50000,10000,3,Generator(=5),Large cohort type dataset useful for testing convergence 11 | synthetic_data6.csv,Compact,20000,1000,2,Generator(=6),COHORT TYPE DATASETS 12 | synthetic_data7.csv,Canonical,1295,1000,8,Generator(=7),Duration type datasets in Long Format 13 | synthetic_data8.csv,Canonical,10000,10000,2,Generator(=8),Duration type datasets in Long Format 14 | synthetic_data9.csv,Canonical,1338,1000,8,Generator(=9),Duration type datasets in Long Format 15 | synthetic_data10.csv,Canonical,12000,2000,9,Generator(=10),Credit Rating Migrations in Long Format / Compact Form 16 | test.csv,Compact,14,7,3,, 17 | -------------------------------------------------------------------------------- /tests/test_datasets.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | # (c) 2017-2024 Open Risk, all rights reserved 4 | # 5 | # TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included 6 | # in the source distribution of TransitionMatrix. This is notwithstanding any licenses of 7 | # third-party software included in this distribution. You may not use this file except in 8 | # compliance with the License. 9 | # 10 | # Unless required by applicable law or agreed to in writing, software distributed under 11 | # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 12 | # either express or implied. See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | import unittest 17 | 18 | import transitionMatrix as tm 19 | from transitionMatrix import dataset_path 20 | from transitionMatrix.creditratings.predefined import Minimal 21 | 22 | ACCURATE_DIGITS = 7 23 | 24 | 25 | class TestDatasets(unittest.TestCase): 26 | ''' 27 | Load in-memory matrices 28 | ''' 29 | 30 | def test_minimal_matrix(self): 31 | a = tm.TransitionMatrix(values=Minimal) 32 | a.validate() 33 | self.assertEqual(a.dimension, 3) 34 | 35 | def test_matrix_set_load_csv(self): 36 | a = tm.TransitionMatrixSet(csv_file=dataset_path + "sp_1981-2016.csv", temporal_type='Cumulative') 37 | a.validate() 38 | self.assertEqual(a.periods, [1, 2, 3, 5, 7, 10, 15, 20]) 39 | 40 | 41 | if __name__ == "__main__": 42 | unittest.main() 43 | -------------------------------------------------------------------------------- /docs/source/multi-period_transitions.rst: -------------------------------------------------------------------------------- 1 | Multi-Period Transitions 2 | ======================== 3 | 4 | Th transitionMatrix package adopts a *multi-period paradigm* that is more general than a Markov-Chain framework that imposes the Markov assumption over successive periods. In this direction, the **TransitionMatrixSet object** stores a family of TransitionMatrix objects as a time ordered list. Besides basic storage this structure allows a variety of simultaneous operations on the collection of related matrices 5 | 6 | There are two basic representations of the a multi-period set of transitions: 7 | 8 | - The first (*cumulative form*) is the most fundamental. Each successive (k-th) element stores transition rates from an initial time to timepoint k. This could be for example the input of an empirical transition matrix dataset 9 | - In the second (*incremental form*) successive elements store transition rates from timepoint k-1 to timepoint k. 10 | 11 | The TransitionMatrixSet class allows converting between the two representations 12 | 13 | 14 | Matrix *Set* Operations 15 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 16 | 17 | * Script: matrix_set_operations.py 18 | 19 | Contains examples using transitionMatrix to perform various transition matrix **set** operations (Multi-period measurement context) 20 | 21 | 22 | Default Curves 23 | -------------- 24 | 25 | Absorbing states (in credit risk context a borrower default) are particularly important therefore some specific functionality to isolate the corresponding default rate *curve*. (See Also the CreditCurve object) 26 | 27 | -------------------------------------------------------------------------------- /docs/source/examples.rst: -------------------------------------------------------------------------------- 1 | Usage Examples 2 | ====================== 3 | 4 | The examples directory includes both **standalone python scripts** and **jupyter notebooks** to help you get started. (NB: Currently there are more scripts than notebooks). 5 | 6 | A selection of topics covered: 7 | 8 | - Generating transition matrices from data (using various estimators) 9 | - Manipulating transition matrices 10 | - Computing and visualizing credit curves corresponding to a set of transition matrices 11 | - Mapping rating states between different rating systems 12 | 13 | Python Scripts 14 | ------------------------------------------- 15 | 16 | The scripts are located in examples/python. For testing purposes all examples can be run using the run_examples.py script located in the root directory. Some scripts have an example flag that selects alternative input data or estimators. 17 | 18 | .. csv-table:: List of Example Scripts 19 | :header-rows: 1 20 | :widths: 20 5 20 55 21 | :file: ../../examples/python/example_list.csv 22 | 23 | 24 | Jupyter Notebooks 25 | ------------------------------------------- 26 | 27 | * Adjust_NotRated_State.ipynb 28 | * Matrix_Operations.ipynb 29 | * Monthly_from_Annual.ipynb 30 | 31 | Open Risk Academy Scripts 32 | ------------------------------------------- 33 | 34 | Additional examples are available in the Open Risk Academy course `Analysis of Credit Migration using Python TransitionMatrix `_. The scripts developed in the course are `available here `_ 35 | -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | # (c) 2017-2024 Open Risk, all rights reserved 4 | # 5 | # TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included 6 | # in the source distribution of TransitionMatrix. This is notwithstanding any licenses of 7 | # third-party software included in this distribution. You may not use this file except in 8 | # compliance with the License. 9 | # 10 | # Unless required by applicable law or agreed to in writing, software distributed under 11 | # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 12 | # either express or implied. See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import unittest 16 | 17 | import pandas as pd 18 | 19 | import transitionMatrix as tm 20 | from transitionMatrix import source_path 21 | 22 | ACCURATE_DIGITS = 7 23 | 24 | 25 | class TestPreprocessing(unittest.TestCase): 26 | 27 | def test_bin_timestamps(self): 28 | """ Check that grouping events in cohorts does not miss any events""" 29 | 30 | dataset_path = source_path + "datasets/" 31 | data = pd.read_csv(dataset_path + 'synthetic_data1.csv') 32 | event_count = data['ID'].count() # the raw event count from the input data 33 | cohort_data, cohort_intervals = tm.utils.bin_timestamps(data, cohorts=5, remove_stale=False) 34 | cohort_data['Count'] = cohort_data['Count'].astype(int) # count of events in cohorted format 35 | self.assertEqual(event_count, cohort_data['Count'].sum()) 36 | 37 | 38 | class TestDataSetGenerators(unittest.TestCase): 39 | pass 40 | 41 | 42 | if __name__ == "__main__": 43 | unittest.main() 44 | -------------------------------------------------------------------------------- /transitionMatrix/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | # (c) 2017-2024 Open Risk, all rights reserved 4 | # 5 | # TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included 6 | # in the source distribution of TransitionMatrix. This is notwithstanding any licenses of 7 | # third-party software included in this distribution. You may not use this file except in 8 | # compliance with the License. 9 | # 10 | # Unless required by applicable law or agreed to in writing, software distributed under 11 | # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 12 | # either express or implied. See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """ This module contains various helper classes and functions that do not fit 16 | into any of the main modules of the library 17 | 18 | """ 19 | 20 | from __future__ import print_function, division 21 | 22 | from .preprocessing import * 23 | from .converters import * 24 | 25 | 26 | def print_matrix(A, format_type='Standard', accuracy=2): 27 | """ Pretty print a matrix 28 | 29 | :param format_type: formatting options (Standard, Percent) 30 | :type format_type: str 31 | :param accuracy: number of decimals to display 32 | :type accuracy: int 33 | 34 | """ 35 | for s_in in range(A.shape[0]): 36 | for s_out in range(A.shape[1]): 37 | if format_type == 'Standard': 38 | format_string = "{0:." + str(accuracy) + "f}" 39 | print(format_string.format(A[s_in, s_out]) + ' ', end='') 40 | elif format_type == 'Percent': 41 | print("{0:.2f}%".format(100 * A[s_in, s_out]) + ' ', end='') 42 | print('') 43 | print('') 44 | -------------------------------------------------------------------------------- /examples/python/characterize_datasets.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | # (c) 2017-2024 Open Risk (https://www.openriskmanagement.com) 4 | # 5 | # TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included 6 | # in the source distribution of TransitionMatrix. This is notwithstanding any licenses of 7 | # third-party software included in this distribution. You may not use this file except in 8 | # compliance with the License. 9 | # 10 | # Unless required by applicable law or agreed to in writing, software distributed under 11 | # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 12 | # either express or implied. See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | """ Characterize datasets (Summary statistics etc) 17 | 18 | """ 19 | 20 | import pprint as pp 21 | 22 | import pandas as pd 23 | 24 | from transitionMatrix import source_path 25 | from transitionMatrix.utils import transitions_summary 26 | 27 | dataset_path = source_path + "datasets/" 28 | 29 | dataset_list = [ 30 | 'rating_data_raw.csv', 31 | 'rating_data.csv', 32 | 'scenario_data.csv', 33 | 'synthetic_data.csv', 34 | 'synthetic_data1.csv', 35 | 'synthetic_data2.csv', 36 | 'synthetic_data3.csv', 37 | 'synthetic_data4.csv', 38 | 'synthetic_data5.csv', 39 | 'synthetic_data6.csv', 40 | 'synthetic_data7.csv', 41 | 'synthetic_data8.csv', 42 | 'synthetic_data9.csv', 43 | 'test.csv' 44 | ] 45 | 46 | for dataset in dataset_list: 47 | input_data = pd.read_csv('../../datasets/' + dataset) 48 | print(dataset) 49 | pp.pprint(transitions_summary(input_data)) 50 | print(80 * '-') 51 | 52 | 53 | def main(): 54 | print("Done") 55 | 56 | 57 | if __name__ == "__main__": 58 | main() 59 | -------------------------------------------------------------------------------- /docs/source/visualization.rst: -------------------------------------------------------------------------------- 1 | Visualization 2 | =============== 3 | 4 | transitionMatrix aims to support native (Python-based) visualization of various transition related datasets using matplotlib and other native python visualization libraries. 5 | 6 | .. note:: The visualization functionality is not yet refactored into a reusable API. For now the visualization functionality is implemented separately as a demo script. 7 | 8 | 9 | Visualization Examples 10 | ---------------------- 11 | 12 | Example workflows using transitionMatrix to generate visualizations of migration phenomena 13 | 14 | * Script: examples/python/generate_visuals.py 15 | 16 | Example 1 17 | """""""""""""""""""""""""""" 18 | Plotting the state space trajectory of a single entity 19 | 20 | .. image:: ../../examples/single_entity.png 21 | 22 | Example 2 23 | """""""""""""""""""""""""""" 24 | Plotting the state space trajectory of multiple entities 25 | 26 | .. image:: ../../examples/sampled_histories.png 27 | 28 | Example 3 29 | """""""""""""""""""""""""""" 30 | Histogram plot of transition frequencies 31 | 32 | .. image:: ../../examples/estimation.png 33 | 34 | Example 4 35 | """""""""""""""""""""""""""" 36 | Colored scatterplot of entity transitions over time 37 | 38 | .. image:: ../../examples/scatterplot.png 39 | 40 | Example 5 41 | """""""""""""""""""""""""""" 42 | Colored scatterplot of entity transitions over time (alternative form) 43 | 44 | .. image:: ../../examples/scatterplot2.png 45 | 46 | Example 6 47 | """""""""""""""""""""""""""" 48 | Visualize a transition matrix using Hinton-style visual 49 | 50 | .. image:: ../../examples/TransitionMatrix.png 51 | 52 | Example 7 53 | """""""""""""""""""""""""""" 54 | Visualize a transition matrix using a sankey visual (a logarithmic adaptation that is useful for qualitative insight) 55 | 56 | .. image:: ../../examples/sankey.png 57 | -------------------------------------------------------------------------------- /run_examples.py: -------------------------------------------------------------------------------- 1 | # (c) 2017-2024 Open Risk, all rights reserved 2 | # 3 | # TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included 4 | # in the source distribution of TransitionMatrix. This is notwithstanding any licenses of 5 | # third-party software included in this distribution. You may not use this file except in 6 | # compliance with the License. 7 | # 8 | # Unless required by applicable law or agreed to in writing, software distributed under 9 | # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 10 | # either express or implied. See the License for the specific language governing permissions and 11 | # limitations under the License. 12 | 13 | 14 | """ Run all examples for a high-level test that everything is working with the current version of the library 15 | 16 | """ 17 | 18 | import os 19 | 20 | examples_path = os.path.join("examples", "python") 21 | filelist = ['adjust_nr_state', 'credit_curves', 22 | 'empirical_transition_matrix', 'fix_multiperiod_matrix', 'generate_synthetic_data', 23 | 'generate_visuals', 'matrix_from_cohort_data', 'matrix_operations', 'matrix_set_operations'] 24 | 25 | # TODO additional examples 26 | # 'matrix_from_duration_data', 'matrix_lendingclub', 'matrix_set_lendingclub', 27 | 28 | if __name__ == '__main__': 29 | 30 | for example in filelist: 31 | try: 32 | print('\nExecuting example file: ', example.upper()) 33 | print('-----------------------' + '-' * len(example)) 34 | path = os.path.join(examples_path, example + ".py") 35 | exec(open(path).read()) 36 | except: 37 | print('**********************' + '*' * len(example)) 38 | print('ERROR in example file', example) 39 | print('**********************' + '*' * len(example)) 40 | pass 41 | -------------------------------------------------------------------------------- /docs/source/testing.rst: -------------------------------------------------------------------------------- 1 | Testing 2 | ================== 3 | 4 | Testing transitionMatrix has two major components: 5 | 6 | * normal code testing aiming to certify the correctness of code execution 7 | * algorithm testing aiming to validate the correctness of algorithmic implementation 8 | 9 | .. note:: In general algorithmic testing is not as precise as code testing and may be more subject to uncertainties such as numerical accuracy. To make those tests as revealing as possible transitionMatrix implements a number of standardized *round-trip tests*: 10 | 11 | * starting with a matrix 12 | * generating compatible data 13 | * estimate a matrix from the data 14 | * comparing the values of input and estimated matrices 15 | 16 | Running all the examples 17 | ------------------------ 18 | Running all the examples is a quick way to check that everything is installed properly, all paths are defined etc. At the root of the distribution: 19 | 20 | .. code:: bash 21 | 22 | python3 run_examples.py 23 | 24 | 25 | The file simply iterates and executes a standalone list of :ref:`Usage Examples`. 26 | 27 | .. code:: python 28 | 29 | filelist = ['adjust_nr_state', 'credit_curves', 'empirical_transition_matrix', 'fix_multiperiod_matrix', 'generate_synthetic_data', 'generate_visuals', 'matrix_from_cohort_data', 'matrix_from_duration_data', 'matrix_lendingclub', 'matrix_set_lendingclub', 'matrix_operations', 'matrix_set_operations'] 30 | 31 | .. warning:: The script might generate a number of files / images at random places within the distribution 32 | 33 | 34 | Test Suite 35 | ------------- 36 | The testing framework is based on unittest. Before you get started and depending on how you obtained / installed the library check: 37 | 38 | - If required adjust the source directory path in transitionMatrix/__init__ 39 | - Unzip the data files in the datasets directory 40 | 41 | Then run all tests 42 | 43 | .. code:: bash 44 | 45 | python3 test.py 46 | 47 | For an individual test: 48 | 49 | .. code:: bash 50 | 51 | pytest tests/test_TESTNAME.py 52 | 53 | 54 | -------------------------------------------------------------------------------- /examples/python/deterministic_paths.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | # (c) 2017-2024 Open Risk, all rights reserved 4 | # 5 | # TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included 6 | # in the source distribution of TransitionMatrix. This is notwithstanding any licenses of 7 | # third-party software included in this distribution. You may not use this file except in 8 | # compliance with the License. 9 | # 10 | # Unless required by applicable law or agreed to in writing, software distributed under 11 | # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 12 | # either express or implied. See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | """ 17 | Create deterministic transitions 18 | 19 | """ 20 | 21 | import pandas as pd 22 | 23 | import transitionMatrix as tm 24 | from transitionMatrix.estimators import cohort_estimator as es 25 | from transitionMatrix.generators import dataset_generators 26 | from transitionMatrix.utils.converters import datetime_to_float, to_compact 27 | 28 | sequences = [[(0.0, 0), (0.5, 1), (1.0, 2)], 29 | [(0.0, 1), (0.3, 0), (0.8, 1)], 30 | [(0.0, 2), (0.2, 1), (0.7, 2)]] 31 | 32 | replication_count = 10 33 | 34 | definition = [('0', "A"), ('1', "B"), ('2', "C")] 35 | myState = tm.StateSpace(definition) 36 | 37 | # myState = tm.StateSpace(definition) 38 | input_data = dataset_generators.deterministic(sequences, replication_count) 39 | print(input_data) 40 | sorted_data = input_data.sort_values(['ID', 'Time'], ascending=[True, True]) 41 | cohort_data, cohort_bounds = tm.utils.bin_timestamps(sorted_data, cohorts=100) 42 | print(80*'=') 43 | print(cohort_data) 44 | myEstimator = es.CohortEstimator(states=myState, cohort_bounds=cohort_bounds, ci={'method': 'goodman', 'alpha': 0.05}) 45 | result = myEstimator.fit(cohort_data, labels={'Time': 'Time', 'State': 'State', 'ID': 'ID'}) 46 | myMatrix = tm.TransitionMatrix(myEstimator.average_matrix) 47 | myEstimator.print(select='Counts') 48 | myMatrix.print_matrix(accuracy=3) -------------------------------------------------------------------------------- /tests/test_duration_estimator.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | # (c) 2017-2024 Open Risk, all rights reserved 4 | # 5 | # TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included 6 | # in the source distribution of TransitionMatrix. This is notwithstanding any licenses of 7 | # third-party software included in this distribution. You may not use this file except in 8 | # compliance with the License. 9 | # 10 | # Unless required by applicable law or agreed to in writing, software distributed under 11 | # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 12 | # either express or implied. See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import unittest 16 | 17 | import pandas as pd 18 | 19 | import transitionMatrix as tm 20 | from transitionMatrix import source_path 21 | from transitionMatrix.estimators import aalen_johansen_estimator as aj 22 | 23 | ACCURATE_DIGITS = 2 24 | 25 | 26 | class TestAalenJohansenEstimator(unittest.TestCase): 27 | """ 28 | Test the estimation of a simple 2x2 transition matrix with absorbing state 29 | 30 | .. note: The result is subject to sampling error! Ensure the required accuracy corresponds to the input data size 31 | 32 | """ 33 | 34 | def test_aalenjohansen_simple_transitions(self): 35 | dataset_path = source_path + "datasets/" 36 | data = pd.read_csv(dataset_path + 'synthetic_data8.csv') 37 | sorted_data = data.sort_values(['Time', 'ID'], ascending=[True, True]) 38 | definition = [('0', "G"), ('1', "B")] 39 | myState = tm.StateSpace(definition) 40 | myEstimator = aj.AalenJohansenEstimator(states=myState) 41 | labels = {'Time': 'Time', 'From': 'From', 'To': 'To', 'ID': 'ID'} 42 | result, times = myEstimator.fit(sorted_data, labels=labels) 43 | self.assertAlmostEqual(result[0, 0, -1], 0.5, places=ACCURATE_DIGITS, msg=None, delta=None) 44 | self.assertAlmostEqual(result[0, 1, -1], 0.5, places=ACCURATE_DIGITS, msg=None, delta=None) 45 | self.assertEqual(result[1, 0, -1], 0.0) 46 | self.assertEqual(result[1, 1, -1], 1.0) 47 | -------------------------------------------------------------------------------- /tests/test_nr_transform.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | # (c) 2017-2024 Open Risk, all rights reserved 4 | # 5 | # TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included 6 | # in the source distribution of TransitionMatrix. This is notwithstanding any licenses of 7 | # third-party software included in this distribution. You may not use this file except in 8 | # compliance with the License. 9 | # 10 | # Unless required by applicable law or agreed to in writing, software distributed under 11 | # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 12 | # either express or implied. See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import unittest 16 | import logging 17 | import sys 18 | 19 | import transitionMatrix as tm 20 | from transitionMatrix import dataset_path 21 | from transitionMatrix.model import TransitionMatrix 22 | from transitionMatrix.creditratings.predefined import SP02, SP02NR 23 | 24 | # ACCURATE_DIGITS = 7 25 | ACCURATE_DIGITS = 2 26 | 27 | 28 | class TestNRTransform(unittest.TestCase): 29 | """ 30 | 1. Load in-memory matrices 31 | 2. Perform NR transformation 32 | 3. Test with S&P Result 33 | 34 | .. todo:: SnP result rounding seems large 35 | 36 | """ 37 | 38 | def test_nr_matrix_load(self): 39 | a = TransitionMatrix(values=SP02NR) 40 | # messages = a.validate() 41 | # log = logging.getLogger("Test.testNR") 42 | # log.debug("messages= %r", messages) 43 | # self.assertTrue(messages) 44 | self.assertEqual(a.shape[0], a.shape[1]) 45 | self.assertEqual(a.dimension, 9) 46 | 47 | def test_nr_remove(self): 48 | a = TransitionMatrix(values=SP02NR) 49 | b = TransitionMatrix(values=SP02) 50 | a = 0.01 * a 51 | b = 0.01 * b 52 | a = a.remove(8, method='noninform') 53 | for i in range(a.dimension): 54 | for j in range(a.dimension): 55 | self.assertAlmostEqual(a[i, j], b[i, j], places=ACCURATE_DIGITS) 56 | 57 | 58 | if __name__ == "__main__": 59 | logging.basicConfig(stream=sys.stderr) 60 | logging.getLogger("Test.testNR").setLevel(logging.DEBUG) 61 | unittest.main() 62 | -------------------------------------------------------------------------------- /examples/python/state_space_operations.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | # (c) 2017-2024 Open Risk, all rights reserved 4 | # 5 | # TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included 6 | # in the source distribution of TransitionMatrix. This is notwithstanding any licenses of 7 | # third-party software included in this distribution. You may not use this file except in 8 | # compliance with the License. 9 | # 10 | # Unless required by applicable law or agreed to in writing, software distributed under 11 | # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 12 | # either express or implied. See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | """ 17 | Examples using transitionMatrix to perform various state space operations 18 | 19 | """ 20 | 21 | from transitionMatrix.creditratings.creditsystems import SnP_Fitch2Moodys, Moodys2DBRS, SnP_SS 22 | from transitionMatrix.generators import dataset_generators as dg 23 | 24 | print("Some Basics") 25 | print(80 * "=") 26 | # Let us load a credit rating scale 27 | myState = SnP_SS 28 | # Print the states 29 | print("The States of our starting scale: ", myState.get_states()) 30 | # Print the state labels 31 | print("The State Labels: ", myState.get_state_labels()) 32 | # Print the complete definition 33 | print("The Full Description: ", myState.definition) 34 | 35 | # Convert SnP ratings to Moody's and DBRS 36 | # Escape R (regulatory default) and SD (selective default) 37 | print("") 38 | print("Convert labels to other rating scales scales") 39 | print(80 * "=") 40 | for state in myState.get_state_labels(): 41 | if state not in ['R', 'SD/D']: 42 | print(state, ' ----> (', SnP_Fitch2Moodys[state], Moodys2DBRS[SnP_Fitch2Moodys[state]], ')') 43 | 44 | print("") 45 | print("Convert data to other scales") 46 | print(80 * "=") 47 | print("Input S&P Labels: ") 48 | # Generate some portfolio data and map to CQS 49 | portfolio = dg.portfolio_labels(myState, 100) 50 | print(portfolio) 51 | print("") 52 | print("Output CQS Labels: ") 53 | mapped_portfolio = [] 54 | for label in portfolio: 55 | mapped_portfolio.append(myState.cqs_map(label)) 56 | print(mapped_portfolio) 57 | 58 | 59 | def main(): 60 | print("Done") 61 | 62 | 63 | if __name__ == "__main__": 64 | main() 65 | -------------------------------------------------------------------------------- /examples/python/compare_estimators.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | # (c) 2017-2024 Open Risk, all rights reserved 4 | # 5 | # TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included 6 | # in the source distribution of TransitionMatrix. This is notwithstanding any licenses of 7 | # third-party software included in this distribution. You may not use this file except in 8 | # compliance with the License. 9 | # 10 | # Unless required by applicable law or agreed to in writing, software distributed under 11 | # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 12 | # either express or implied. See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | """ 17 | Example workflows using transitionMatrix to estimate a matrix from duration type data 18 | Cohort type dataset (Generic Rating Matrix). Offers a semi-realistic example 19 | 20 | """ 21 | 22 | import pandas as pd 23 | 24 | import transitionMatrix as tm 25 | from transitionMatrix import source_path 26 | from transitionMatrix.estimators.aalen_johansen_estimator import AalenJohansenEstimator 27 | from transitionMatrix.estimators.cohort_estimator import CohortEstimator 28 | from transitionMatrix.statespaces.statespace import StateSpace 29 | from transitionMatrix.utils.converters import to_canonical 30 | from transitionMatrix.utils.preprocessing import unique_timestamps 31 | 32 | dataset_path = source_path + "datasets/" 33 | data = pd.read_csv(dataset_path + 'synthetic_data4.csv', dtype={'State': str}) 34 | myState = StateSpace(transition_data=data) 35 | cohort_bounds = unique_timestamps(data) 36 | 37 | # Estimate matrices using the Cohort estimator 38 | myEstimator = CohortEstimator(states=myState, cohort_bounds=cohort_bounds, ci={'method': 'goodman', 'alpha': 0.05}) 39 | result = myEstimator.fit(data) 40 | myMatrixSet = tm.TransitionMatrixSet(values=result, temporal_type='Incremental') 41 | myMatrixSet.cumulate() 42 | myMatrixSet.print_matrix(period=8) 43 | 44 | # Estimate matrices using the Aalen-Johansen estimator 45 | canonical_data = to_canonical(data) 46 | myEstimator2 = AalenJohansenEstimator(states=myState) 47 | etm, times = myEstimator2.fit(canonical_data) 48 | myMatrix2 = tm.TransitionMatrix(etm[:, :, -1]) 49 | print('Cumulative Empirical Matrix') 50 | myMatrix2.print_matrix() 51 | 52 | 53 | def main(): 54 | print("Done") 55 | 56 | 57 | if __name__ == "__main__": 58 | main() 59 | -------------------------------------------------------------------------------- /examples/python/adjust_nr_state.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | # (c) 2017-2024 Open Risk (https://www.openriskmanagement.com) 4 | # 5 | # TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included 6 | # in the source distribution of TransitionMatrix. This is notwithstanding any licenses of 7 | # third-party software included in this distribution. You may not use this file except in 8 | # compliance with the License. 9 | # 10 | # Unless required by applicable law or agreed to in writing, software distributed under 11 | # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 12 | # either express or implied. See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | """ Examples of using transitionMatrix to adjust the NR (not-rated) statistics. 17 | 18 | Input data are the Standard and Poor's historical data (1981 - 2016) for corporate credit rating migrations 19 | 20 | """ 21 | 22 | import transitionMatrix as tm 23 | from transitionMatrix import source_path 24 | from transitionMatrix.creditratings.predefined import SP02, SP02NR 25 | from transitionMatrix.utils import print_matrix 26 | 27 | dataset_path = source_path + "datasets/" 28 | 29 | example = 1 30 | 31 | if example == 1: 32 | a = tm.TransitionMatrix(values=SP02NR) 33 | b = tm.TransitionMatrix(values=SP02) 34 | a = 0.01 * a 35 | b = 0.01 * b 36 | a = a.remove(8, method='noninform') 37 | print_matrix(a, format_type='Standard', accuracy=5) 38 | print_matrix(b, format_type='Standard', accuracy=5) 39 | 40 | 41 | elif example == 2: 42 | 43 | print("> Load multi-period transitional matrices (cumulative mode) from json file") 44 | SnP_Set0 = tm.TransitionMatrixSet(json_file=dataset_path + "sp_1981-2016.json", temporal_type='Cumulative') 45 | print("> Valid Input Matrix? ", SnP_Set0.validate()) 46 | 47 | print("> Remove NR transitions and redistribute to other states") 48 | SnP_Set1 = SnP_Set0.remove(8, "noninform") 49 | print("> Valid Output Matrix? ", SnP_Set1.validate()) 50 | 51 | # 52 | # Hurrah, we have an NR adjusted matrix set. Lets save it 53 | # 54 | SnP_Set1.to_json(dataset_path + 'sp_NR_adjusted.json', accuracy=5) 55 | 56 | # Compare before / after 57 | SnP_Set0.print_matrix(period=2) 58 | SnP_Set1.print_matrix(period=2) 59 | 60 | 61 | def main(): 62 | print("Done") 63 | 64 | 65 | if __name__ == "__main__": 66 | main() 67 | -------------------------------------------------------------------------------- /examples/python/matrix_set_operations.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | # (c) 2017-2024 Open Risk, all rights reserved 4 | # 5 | # TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included 6 | # in the source distribution of TransitionMatrix. This is notwithstanding any licenses of 7 | # third-party software included in this distribution. You may not use this file except in 8 | # compliance with the License. 9 | # 10 | # Unless required by applicable law or agreed to in writing, software distributed under 11 | # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 12 | # either express or implied. See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | """ Examples using transitionMatrix to perform operations with transition matrix sets sequences 17 | 18 | """ 19 | 20 | import transitionMatrix as tm 21 | from transitionMatrix.creditratings.predefined import Generic as T1 22 | 23 | print("-- Lets seed the set with a 3x3 matrix") 24 | A = tm.TransitionMatrix(values=[[0.6, 0.2, 0.2], [0.2, 0.6, 0.2], [0.2, 0.2, 0.6]]) 25 | print(A) 26 | 27 | print("-- Identical future period transitions in incremental mode") 28 | A_Set = tm.TransitionMatrixSet(values=A, periods=3, method='Copy', temporal_type='Incremental') 29 | print(A_Set.entries) 30 | 31 | print("-- Identical future period transitions in cumulative mode using the power method") 32 | B_Set = tm.TransitionMatrixSet(values=A, periods=3, method='Power', temporal_type='Cumulative') 33 | print(B_Set.entries) 34 | 35 | print("-- Lets instantiate the set directly using a list of matrices") 36 | C_Vals = [[[0.75, 0.25], [0.0, 1.0]], [[0.75, 0.25], [0.0, 1.0]]] 37 | C_Set = tm.TransitionMatrixSet(values=C_Vals, temporal_type='Incremental') 38 | print(C_Set.entries) 39 | 40 | print("-- Validate the constructed sets") 41 | A_Set.validate() 42 | B_Set.validate() 43 | C_Set.validate() 44 | 45 | print("-- Convert to Cumulative") 46 | A_Set.cumulate() 47 | print(A_Set.entries) 48 | A_Set.validate() 49 | 50 | print("-- Convert back to Incremental") 51 | A_Set.incremental() 52 | print(A_Set.entries) 53 | A_Set.validate() 54 | 55 | print("-- Create a multiperiod matrix set and save to json file") 56 | T_Set = tm.TransitionMatrixSet(values=T1, periods=10, method='Power', temporal_type='Cumulative') 57 | T_Set.to_json('Tn.json') 58 | 59 | 60 | def main(): 61 | print("Done") 62 | 63 | 64 | if __name__ == "__main__": 65 | main() 66 | -------------------------------------------------------------------------------- /examples/python/matrix_set_lendingclub.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | # (c) 2017-2024 Open Risk, all rights reserved 4 | # 5 | # TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included 6 | # in the source distribution of TransitionMatrix. This is notwithstanding any licenses of 7 | # third-party software included in this distribution. You may not use this file except in 8 | # compliance with the License. 9 | # 10 | # Unless required by applicable law or agreed to in writing, software distributed under 11 | # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 12 | # either express or implied. See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | """ 17 | Example workflow using transitionMatrix to estimate a set of matrix from LendingClub data 18 | Input data are in a special cohort format as the published datasets have some limitations 19 | 20 | """ 21 | 22 | import pandas as pd 23 | 24 | import transitionMatrix as tm 25 | from transitionMatrix import source_path 26 | from transitionMatrix.estimators import simple_estimator as es 27 | 28 | dataset_path = source_path + "datasets/" 29 | 30 | # Example: LendingClub Style Migration Matrix Set 31 | # Load historical data into pandas frame 32 | # Format: 33 | # Expected Data Format is (ID, State_IN, State_OUT) 34 | 35 | definition = [('A', "Grade A"), ('B', "Grade B"), ('C', "Grade C"), 36 | ('D', "Grade D"), ('E', "Grade E"), ('F', "Grade F"), 37 | ('G', "Grade G"), ('H', "Delinquent"), ('I', "Charged Off"), 38 | ('J', "Repaid")] 39 | myState = tm.StateSpace(definition) 40 | 41 | # Load the data sets into a pandas frame in sequence 42 | # Check matrix_lendingclub.py for comments 43 | 44 | matrix_set = [] 45 | for letter in ['a', 'b', 'c', 'd']: 46 | # store the derived one-period matrices in a list 47 | data = pd.read_csv(dataset_path + 'LoanStats3' + letter + '_Step2.csv') 48 | myEstimator = es.SimpleEstimator(states=myState, ci={'method': 'goodman', 'alpha': 0.05}) 49 | result = myEstimator.fit(data) 50 | myEstimator.summary() 51 | myMatrix = tm.TransitionMatrix(result) 52 | myMatrix[7, 9] = 1.0 53 | myMatrix[8, 9] = 1.0 54 | myMatrix[9, 9] = 1.0 55 | matrix_set.append(myMatrix) 56 | 57 | # collect all matrices in a matrix set 58 | LC_Set = tm.TransitionMatrixSet(values=matrix_set, temporal_type='Incremental') 59 | LC_Set.print_matrix() 60 | -------------------------------------------------------------------------------- /docs/source/predefined_rating_scales.rst: -------------------------------------------------------------------------------- 1 | Predefined Rating Scales 2 | ======================== 3 | 4 | The transitionMatrix package supports a variety of credit rating scales. They are grouped together in :mod:`transitionMatrix.creditratings.creditsystems`. 5 | 6 | The key ones are described here in more detail. 7 | 8 | 9 | Rating Scales currently covered 10 | -------------------------------- 11 | 12 | The focus of the current selection is on *long-term issuer* ratings scales (others will be added): 13 | 14 | - AM Best Europe-Rating Services Ltd. 15 | - ARC Ratings S.A. 16 | - Cerved Rating Agency S.p.A. 17 | - Creditreform Rating AG 18 | - DBRS Ratings Limited 19 | - Fitch Ratings 20 | - Moody’s Investors Service 21 | - Scope Ratings AG 22 | - Standard & Poor’s Ratings Services 23 | 24 | Data per Scale 25 | ------------------------------------------- 26 | 27 | Each rating scale is a StateSpace (see :ref:`State Spaces`) and thus inherits the attributes and methods of that object, namely: 28 | 29 | - The entity defining the scale (the originating entity) 30 | - The full name of the scale (as most originators of rating scales offer multiple scales with different meaning an/or use) 31 | - The definition of the scale (as a list of tuples in the form [('0', 'X1'), ... , ('N-1', 'XN)] where X are the symbols used to denote the credit state 32 | - The CQS (credit quality step) mapping of the scale as defined by regulatory authorities (see next section) 33 | 34 | 35 | CQS Mappings 36 | ------------ 37 | 38 | The Credit Quality Step (CQS) denotes a standardised indicator of Credit Risk that is recognized in the European Union 39 | 40 | * The CQS Credit Rating Scale is based on numbers, ranging from 1 to 6. 41 | * 1 is the highest quality, 6 is the lowest quality 42 | 43 | The European Supervisory Authorities maintain mappings between credit rating agencies and CQS 44 | 45 | 46 | .. note:: Consult the original documents from definitive mappings available at the `EBA Website `_ 47 | 48 | The Rating Agency State Spaces and mappings are obtained from the latest (20 May 2019) Regulatory Reference: 49 | 50 | :: 51 | 52 | JC 2018 11, FINAL REPORT: REVISED DRAFT ITS ON THE MAPPING OF ECAIS’ CREDIT ASSESSMENTS UNDER CRR 53 | 54 | Example of Label Conversion 55 | """""""""""""""""""""""""""" 56 | Convert labels between credit rating scales 57 | 58 | .. image:: ../../examples/scale_conversions.png 59 | 60 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /.idea 2 | /docs/build/html/ 3 | .env 4 | 5 | # Byte-compiled / optimized / DLL files 6 | __pycache__/ 7 | *.py[cod] 8 | *$py.class 9 | 10 | # C extensions 11 | *.so 12 | 13 | # Distribution / packaging 14 | .Python 15 | build/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | lib/ 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | wheels/ 27 | pip-wheel-metadata/ 28 | share/python-wheels/ 29 | *.egg-info/ 30 | .installed.cfg 31 | *.egg 32 | MANIFEST 33 | 34 | # PyInstaller 35 | # Usually these files are written by a python script from a template 36 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 37 | *.manifest 38 | *.spec 39 | 40 | # Installer logs 41 | pip-log.txt 42 | pip-delete-this-directory.txt 43 | 44 | # Unit test / coverage reports 45 | htmlcov/ 46 | .tox/ 47 | .nox/ 48 | .coverage 49 | .coverage.* 50 | .cache 51 | nosetests.xml 52 | coverage.xml 53 | *.cover 54 | *.py,cover 55 | .hypothesis/ 56 | .pytest_cache/ 57 | 58 | # Translations 59 | *.mo 60 | *.pot 61 | 62 | # Django stuff: 63 | *.log 64 | local_settings.py 65 | db.sqlite3 66 | db.sqlite3-journal 67 | 68 | # Flask stuff: 69 | instance/ 70 | .webassets-cache 71 | 72 | # Scrapy stuff: 73 | .scrapy 74 | 75 | # Sphinx documentation 76 | docs/_build/ 77 | 78 | # PyBuilder 79 | target/ 80 | 81 | # Jupyter Notebook 82 | .ipynb_checkpoints 83 | 84 | # IPython 85 | profile_default/ 86 | ipython_config.py 87 | 88 | # pyenv 89 | .python-version 90 | 91 | # pipenv 92 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 93 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 94 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 95 | # install all needed dependencies. 96 | #Pipfile.lock 97 | 98 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 99 | __pypackages__/ 100 | 101 | # Celery stuff 102 | celerybeat-schedule 103 | celerybeat.pid 104 | 105 | # SageMath parsed files 106 | *.sage.py 107 | 108 | # Environments 109 | .env 110 | .venv 111 | env/ 112 | venv/ 113 | ENV/ 114 | env.bak/ 115 | venv.bak/ 116 | 117 | # Spyder project settings 118 | .spyderproject 119 | .spyproject 120 | 121 | # Rope project settings 122 | .ropeproject 123 | 124 | # mkdocs documentation 125 | /site 126 | 127 | # mypy 128 | .mypy_cache/ 129 | .dmypy.json 130 | dmypy.json 131 | 132 | # Pyre type checker 133 | .pyre/ 134 | -------------------------------------------------------------------------------- /examples/python/fix_multiperiod_matrix.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | # (c) 2017-2024 Open Risk, all rights reserved 4 | # 5 | # TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included 6 | # in the source distribution of TransitionMatrix. This is notwithstanding any licenses of 7 | # third-party software included in this distribution. You may not use this file except in 8 | # compliance with the License. 9 | # 10 | # Unless required by applicable law or agreed to in writing, software distributed under 11 | # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 12 | # either express or implied. See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | """ Example of using transitionMatrix to detect and solve various pathologies that might be affecting transition matrix data 17 | 18 | """ 19 | 20 | import numpy as np 21 | 22 | import transitionMatrix as tm 23 | from transitionMatrix import dataset_path 24 | 25 | print("> Loading historical multi-period transitional matrices (cumulative mode) from csv file") 26 | SnP_Set0 = tm.TransitionMatrixSet(csv_file=dataset_path + "sp_1981-2016.csv", temporal_type='Cumulative') 27 | print("> Validate") 28 | print(SnP_Set0.validate()) 29 | print( 30 | "> We detect dimensionality problems. The matrices are not square (missing the trivial Default and NR transitions)") 31 | print("> We must fix that to proceed. Augment the matrices in the set by fixing Default and NR transitions") 32 | C_Vals = [] 33 | for matrix in SnP_Set0.entries: 34 | C = tm.TransitionMatrix(values=np.resize(matrix, (9, 9))) 35 | # set the migration from NR or D state to a rated state to zero 36 | C[7, 0:9] = 0.0 37 | C[8, 0:9] = 0.0 38 | # set the probability of remaining to a D state to unity 39 | C[7, 7] = 100.0 40 | # set the probability of remaining to an NR state to unity 41 | C[8, 8] = 100.0 42 | C_Vals.append(C) 43 | SnP_Set1 = tm.TransitionMatrixSet(values=C_Vals) 44 | print("> Validate Again") 45 | print(SnP_Set1.validate()) 46 | 47 | print("> Now we have square matrices but the format is not in probabilities!") 48 | print("> Divide all entries by 100") 49 | 50 | SnP_Set2 = SnP_Set1 * 0.01 51 | # SnP_Set2.print() 52 | print("> Validate Again") 53 | print(SnP_Set2.validate()) 54 | 55 | print("> Hurrah, we have a probability matrix set. Lets save it") 56 | 57 | SnP_Set2.to_json(dataset_path + 'sp_1981-2016.json', accuracy=5) 58 | 59 | 60 | def main(): 61 | print("Done") 62 | 63 | 64 | if __name__ == "__main__": 65 | main() 66 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | # (c) 2017-2024 Open Risk (https://www.openriskmanagement.com) 4 | # 5 | # TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included 6 | # in the source distribution of TransitionMatrix. This is notwithstanding any licenses of 7 | # third-party software included in this distribution. You may not use this file except in 8 | # compliance with the License. 9 | # 10 | # Unless required by applicable law or agreed to in writing, software distributed under 11 | # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 12 | # either express or implied. See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | from codecs import open 17 | 18 | from setuptools import setup 19 | 20 | __version__ = '0.5.1' 21 | 22 | ver = __version__ 23 | 24 | long_descr = open('docs/source/description.rst', 'r', encoding='utf8').read() 25 | 26 | setup(name='transitionMatrix', 27 | version=ver, 28 | description='A Python powered library for statistical analysis and visualization of state transition phenomena', 29 | long_description=long_descr, 30 | long_description_content_type='text/x-rst', 31 | author='Open Risk', 32 | author_email='info@openriskmanagement.com', 33 | packages=['transitionMatrix', 'transitionMatrix.estimators', 'transitionMatrix.creditratings', 34 | 'transitionMatrix.estimators', 'transitionMatrix.generators', 'transitionMatrix.statespaces', 35 | 'transitionMatrix.utils', 'datasets', 'examples.python'], 36 | include_package_data=True, 37 | url='https://github.com/open-risk/transitionMatrix', 38 | install_requires=[ 39 | 'pandas', 40 | 'numpy', 41 | 'scipy', 42 | 'statsmodels', 43 | 'sympy', 44 | 'matplotlib' 45 | ], 46 | zip_safe=False, 47 | provides=['transitionMatrix'], 48 | classifiers=[ 49 | 'Intended Audience :: Developers', 50 | 'Intended Audience :: Science/Research', 51 | 'Intended Audience :: Financial and Insurance Industry', 52 | 'Development Status :: 3 - Alpha', 53 | 'License :: OSI Approved :: Apache Software License', 54 | 'Operating System :: OS Independent', 55 | 'Programming Language :: Python :: 3 :: Only', 56 | 'Programming Language :: Python :: 3.10', 57 | 'Topic :: Scientific/Engineering', 58 | 'Topic :: Scientific/Engineering :: Information Analysis' 59 | ] 60 | 61 | ) 62 | -------------------------------------------------------------------------------- /tests/test_state_space.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | # (c) 2017-2024 Open Risk, all rights reserved 4 | # 5 | # TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included 6 | # in the source distribution of TransitionMatrix. This is notwithstanding any licenses of 7 | # third-party software included in this distribution. You may not use this file except in 8 | # compliance with the License. 9 | # 10 | # Unless required by applicable law or agreed to in writing, software distributed under 11 | # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 12 | # either express or implied. See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | import unittest 17 | 18 | import pandas as pd 19 | from scipy.linalg import expm 20 | 21 | import transitionMatrix as tm 22 | from transitionMatrix import source_path 23 | 24 | ACCURATE_DIGITS = 7 25 | 26 | 27 | class TestStateSpace(unittest.TestCase): 28 | 29 | def test_instantiate_state(self): 30 | definition = [('0', "AAA"), ('1', "AA"), ('2', "A"), ('3', "BBB"), 31 | ('4', "BB"), ('5', "B"), ('6', "CCC"), ('7', "D")] 32 | s = tm.StateSpace(definition) 33 | self.assertEqual(s.definition[0][1], 'AAA') 34 | 35 | def test_get_states(self): 36 | definition = [('0', "AAA"), ('1', "AA"), ('2', "A"), ('3', "BBB"), 37 | ('4', "BB"), ('5', "B"), ('6', "CCC"), ('7', "D")] 38 | s = tm.StateSpace(definition) 39 | self.assertEqual(s.get_states()[0], '0') 40 | 41 | def test_get_state_labels(self): 42 | definition = [('0', "AAA"), ('1', "AA"), ('2', "A"), ('3', "BBB"), 43 | ('4', "BB"), ('5', "B"), ('6', "CCC"), ('7', "D")] 44 | s = tm.StateSpace(definition) 45 | self.assertEqual(s.get_state_labels()[0], 'AAA') 46 | 47 | def test_generic(self): 48 | s = tm.StateSpace() 49 | n = 10 50 | s.generic(n=n) 51 | self.assertEqual(s.get_state_labels()[n-1], str(n-1)) 52 | 53 | def test_validate_dataset(self): 54 | dataset_path = source_path + "datasets/" 55 | data = pd.read_csv(dataset_path + 'test.csv', dtype={'State': int}) 56 | # definition = [('0', "Stage 1"), ('1', "Stage 2"), ('2', "Stage 3")] 57 | definition = [('0', "0"), ('1', "1"), ('2', "2")] 58 | s = tm.StateSpace(definition) 59 | self.assertEqual(s.validate_dataset(dataset=data)[0], "Dataset contains the expected states.") 60 | 61 | 62 | if __name__ == "__main__": 63 | 64 | unittest.main() 65 | 66 | -------------------------------------------------------------------------------- /datasets/synthetic_data1.csv: -------------------------------------------------------------------------------- 1 | ID,Time,State 2 | 0,0.032136548960821035,0 3 | 0,0.0328444887126442,2 4 | 0,0.1501764800072913,3 5 | 0,0.1632084066774684,3 6 | 0,0.2664970065502152,3 7 | 0,0.372697250597263,0 8 | 0,0.5924565019914576,1 9 | 0,0.6955465948842987,1 10 | 0,0.7363854494147481,2 11 | 0,0.8074414166206703,3 12 | 0,0.8963599443933054,3 13 | 0,1.0878377742419945,2 14 | 0,1.1675433779261155,3 15 | 0,1.2672954215676289,3 16 | 0,1.3109912447211465,2 17 | 0,1.3495603879374372,2 18 | 0,1.370858731221019,3 19 | 0,1.5283947083954086,1 20 | 0,1.9527668668150056,2 21 | 0,2.0403493255815226,0 22 | 0,2.061014920526969,0 23 | 0,2.232040388139631,2 24 | 0,2.23350880600617,3 25 | 0,2.475932497508623,1 26 | 0,2.5734462429038354,3 27 | 0,2.8435924407409914,1 28 | 0,3.0283948592185763,3 29 | 0,3.0761863088790395,0 30 | 0,3.1025963817494158,3 31 | 0,3.1104554859797053,3 32 | 0,3.460907110718253,2 33 | 0,3.62096698880944,1 34 | 0,3.9398462388242517,2 35 | 0,4.240435835436774,1 36 | 0,4.400104623276431,1 37 | 0,4.46527255463521,3 38 | 0,4.503623943006057,2 39 | 0,4.55782375521718,1 40 | 0,4.601418565334436,1 41 | 0,4.643198686494953,0 42 | 0,4.704378817300301,0 43 | 0,4.905177094728069,3 44 | 0,4.93505558095269,2 45 | 0,5.0098110911321125,3 46 | 0,5.041839513634524,0 47 | 0,5.127515712888846,0 48 | 0,5.130172859893318,2 49 | 0,5.233723732230302,0 50 | 0,5.23942731915469,3 51 | 0,5.5196674661386735,3 52 | 0,5.570626019796818,1 53 | 0,5.66539210462981,1 54 | 0,5.709583413264629,0 55 | 0,5.719329296729598,3 56 | 0,5.766704198443862,0 57 | 0,5.99396769122474,3 58 | 0,6.194187788093888,1 59 | 0,6.208220499730838,0 60 | 0,6.349330500400409,1 61 | 0,6.517519409753987,3 62 | 0,6.552156010261818,3 63 | 0,6.65736367212415,0 64 | 0,6.665898712154186,0 65 | 0,6.735692210858016,1 66 | 0,6.819200247067685,1 67 | 0,6.845107006166676,1 68 | 0,7.229096138465353,3 69 | 0,7.308196732991204,0 70 | 0,7.527652532940278,2 71 | 0,7.865265216314766,1 72 | 0,7.997095137774468,3 73 | 0,8.020050939875931,1 74 | 0,8.245600337188524,3 75 | 0,8.458249142732146,2 76 | 0,8.477379865694953,2 77 | 0,8.772627059837468,1 78 | 0,8.842277055249099,0 79 | 0,8.949471730149508,2 80 | 0,8.980930762708697,2 81 | 0,9.0649434165816,1 82 | 0,9.173044255991684,0 83 | 0,9.26382423823099,2 84 | 0,9.386244931606567,2 85 | 0,9.436483889422163,1 86 | 0,9.438836505765465,0 87 | 0,9.568464779377974,1 88 | 0,9.851174740848933,2 89 | 0,10.000169627963329,0 90 | 0,10.033727355266604,1 91 | 0,10.096652658790777,2 92 | 0,10.228395006959211,3 93 | 0,10.319379569557384,1 94 | 0,10.320643943485134,1 95 | 0,10.378435728441596,1 96 | 0,10.402515443650238,1 97 | 0,10.625052026724967,1 98 | 0,11.095139124670885,0 99 | 0,11.111732801649607,0 100 | 0,11.15574978115486,2 101 | 0,11.182183676758042,0 102 | -------------------------------------------------------------------------------- /docs/source/estimators.rst: -------------------------------------------------------------------------------- 1 | Estimation 2 | ======================== 3 | 4 | The estimation of a transition matrix is one of the core functionalities of transitionMatrix. Several methods and variations are available in the literature depending on aspects such as: 5 | 6 | * The nature of the observations / data (e.g., whether temporal homogeneity is a valid assumption) 7 | * Whether or not there are competing risk effects 8 | * Whether or not observations have coincident values 9 | * Treating the Right-Censorship of observations (Outcomes beyond the observation window) 10 | * Treating the Left-Truncation of observations (Outcomes prior to the the observation window) 11 | 12 | Estimator Types 13 | ---------------- 14 | * **Cohort Based Methods** that group observations in cohorts 15 | * **Duration** (also Hazard Rate or Intensity) Based Methods that utilize the actual duration of each state 16 | 17 | The main estimators currently implemented are as follows: 18 | 19 | 20 | .. toctree:: 21 | :maxdepth: 1 22 | :caption: Implemented Estimators 23 | 24 | simple_estimator 25 | cohort_estimator 26 | aalen-johansen_estimator 27 | 28 | 29 | Whichever the estimator choice, the outcome of the estimation is an *Empirical Transition Matrix* (or potentially a matrix set) 30 | 31 | Implementation Notes 32 | ^^^^^^^^^^^^^^^^^^^^^^ 33 | 34 | * All estimators derive from the highest level *BaseEstimator* class. 35 | * Duration type estimators derive from the *DurationEstimator* class 36 | 37 | 38 | Estimation Examples 39 | ---------------------- 40 | 41 | The first example of estimating a transition matrix is covered in the :ref:`Getting Started` section. Here we have a few more examples: 42 | 43 | 44 | Estimation Example 1 45 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 46 | 47 | Example workflows using transitionMatrix to estimate an empirical transition matrix from duration type data. The datasets are produced using examples/generate_synthetic_data.py This example uses the 48 | `Aalen-Johansen estimator `_ 49 | 50 | * Script: examples/python/empirical_transition_matrix.py 51 | 52 | By setting the example variable the script covers a number of variations: 53 | 54 | * Version 1: Credit Rating Migration example 55 | * Version 2: Simple 2x2 Matrix for testing 56 | * Version 3: Credit Rating Migration example with timestamps in raw date format 57 | 58 | 59 | Plot of estimated transition probabilities 60 | 61 | .. image:: ../../examples/transition_probabilities.png 62 | 63 | Estimation Example 2 64 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 65 | 66 | Example workflows using transitionMatrix to estimate a transition matrix from data that are in duration format. The datasets are first grouped in period cohorts 67 | 68 | * Script: examples/python/matrix_from_duration_data.py 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | -------------------------------------------------------------------------------- /datasets/synthetic_data.csv: -------------------------------------------------------------------------------- 1 | ID,Time,State 2 | 0,0.06723778542948632,1 3 | 0,0.09597485751071153,1 4 | 0,0.12840651857400373,1 5 | 0,0.08884293081078959,0 6 | 0,0.017236797703826454,1 7 | 0,0.04142719971495457,0 8 | 0,0.060835676140441775,0 9 | 0,0.14740930981852776,0 10 | 0,0.167079439827748,1 11 | 0,0.03147147525189556,0 12 | 1,0.004968788531962705,1 13 | 1,0.053293651962532565,0 14 | 1,0.009651801128417029,1 15 | 1,0.03048332703605733,1 16 | 1,0.06638471310209446,0 17 | 1,0.015776033078408927,1 18 | 1,0.1021635631003995,0 19 | 1,0.014436258946810497,1 20 | 1,0.33924462590246196,1 21 | 1,0.037562012730767716,0 22 | 2,0.03951811328208232,1 23 | 2,0.22774260421673156,0 24 | 2,0.5404083467427436,0 25 | 2,0.010541775279632345,1 26 | 2,0.1244625928538286,1 27 | 2,0.08656719159933775,0 28 | 2,0.11748287413740154,0 29 | 2,0.1996826430920634,1 30 | 2,0.05975975335994709,1 31 | 2,0.10746790512691595,0 32 | 3,0.03535447307255651,1 33 | 3,0.014017522819624196,1 34 | 3,0.06477811805559093,0 35 | 3,0.45733057346998296,1 36 | 3,0.040897175945409064,1 37 | 3,0.019462973334769346,1 38 | 3,0.058580089535660664,0 39 | 3,0.06769213659128215,1 40 | 3,0.03638618673069476,0 41 | 3,0.24097479304202274,0 42 | 4,0.021460964952152665,0 43 | 4,0.14254178110884028,0 44 | 4,0.10235577729249797,1 45 | 4,0.3566478760015271,1 46 | 4,0.11519119147622381,0 47 | 4,0.05284980067354113,0 48 | 4,0.12539878956421344,1 49 | 4,0.06759543835987696,0 50 | 4,0.43413128766899595,1 51 | 4,0.3302626971706799,1 52 | 5,0.02446531950030919,1 53 | 5,0.02143744522917154,1 54 | 5,0.0535243521033605,1 55 | 5,0.061210309683403956,0 56 | 5,0.15324405096610794,0 57 | 5,0.06922647175316272,0 58 | 5,0.09047743285921706,1 59 | 5,0.25173581506157733,1 60 | 5,0.053053964822302214,1 61 | 5,0.053615546123845594,1 62 | 6,0.07553868934120597,0 63 | 6,0.02186767955671588,0 64 | 6,0.05402675765240426,1 65 | 6,0.12432030442705991,0 66 | 6,0.015009455687755703,1 67 | 6,0.009406884087957135,0 68 | 6,0.06256146033856605,0 69 | 6,0.017157773924665463,1 70 | 6,0.2420313204877975,1 71 | 6,0.08117769174618861,1 72 | 7,0.11538446507115581,0 73 | 7,0.11580125393382101,1 74 | 7,0.15062545305165453,0 75 | 7,0.013037261082576352,0 76 | 7,0.07262346101160896,1 77 | 7,0.0032108575734070967,0 78 | 7,0.09997016226647133,0 79 | 7,0.1991857270316179,1 80 | 7,0.1620064688117187,0 81 | 7,0.05412995405001916,0 82 | 8,0.12048582884180803,1 83 | 8,0.07350556198860529,1 84 | 8,0.021637712152722574,0 85 | 8,0.10813078548139338,0 86 | 8,0.03218653490971687,1 87 | 8,0.10189682146066731,0 88 | 8,0.008380510023153324,1 89 | 8,0.07556215382911058,0 90 | 8,0.08050993047122985,1 91 | 8,0.011675169933270849,1 92 | 9,0.021016620610991025,0 93 | 9,0.010082662513525357,1 94 | 9,0.1120830245455466,1 95 | 9,0.06447942096025573,1 96 | 9,0.004509385352343166,0 97 | 9,0.08766604704313445,0 98 | 9,0.013323721563095981,1 99 | 9,0.06822361317027965,1 100 | 9,0.028054150357278308,0 101 | 9,0.1419454627504486,1 102 | -------------------------------------------------------------------------------- /datasets/sp_1981-2016.csv: -------------------------------------------------------------------------------- 1 | From States,To States,Periods,Tenor,Tenor,Tenor,Tenor,Tenor,Tenor,Tenor,Tenor 2 | 7,9,8,1,2,3,5,7,10,15,20 3 | 87.05,9.03,0.53,0.05,0.08,0.03,0.05,0,3.17,, 4 | 0.52,86.82,8,0.51,0.05,0.07,0.02,0.02,3.99,, 5 | 0.03,1.77,87.79,5.33,0.32,0.13,0.02,0.06,4.55,, 6 | 0.01,0.1,3.51,85.56,3.79,0.51,0.12,0.18,6.23,, 7 | 0.01,0.03,0.12,4.97,76.98,6.92,0.61,0.72,9.63,, 8 | 0,0.03,0.09,0.19,5.15,74.26,4.46,3.76,12.06,, 9 | 0,0,0.13,0.19,0.63,12.91,43.97,26.78,15.39,, 10 | 75.74,16.08,1.44,0.11,0.19,0.05,0.11,0.03,6.26,, 11 | 0.91,75.47,14.17,1.31,0.19,0.15,0.02,0.06,7.73,, 12 | 0.04,3.19,77.22,9.24,0.81,0.29,0.05,0.15,9.02,, 13 | 0.02,0.19,6.43,73.67,6.01,1.13,0.22,0.52,11.82,, 14 | 0.01,0.05,0.31,8.79,59.41,10.31,1.1,2.25,17.76,, 15 | 0,0.04,0.16,0.46,8.68,55.13,5.11,8.56,21.86,, 16 | 0,0,0.17,0.54,1.08,16.61,22.03,35.53,24.03,, 17 | 65.51,22.03,2.36,0.32,0.19,0.08,0.11,0.13,9.27,, 18 | 1.21,65.83,18.69,2.11,0.36,0.23,0.03,0.13,11.42,, 19 | 0.06,4.15,68.49,11.83,1.34,0.46,0.1,0.26,13.31,, 20 | 0.02,0.29,8.54,64.33,7.08,1.69,0.3,0.91,16.83,, 21 | 0.01,0.06,0.54,11.22,46.65,11.61,1.28,4.07,24.55,, 22 | 0,0.03,0.23,0.84,10.48,41.37,4.66,12.78,29.62,, 23 | 0,0,0.14,0.61,1.65,16.62,10.9,40.68,29.39,, 24 | 49.58,28.37,4.86,0.81,0.24,0.16,0.08,0.35,15.53,, 25 | 1.49,50.29,24.87,3.71,0.59,0.39,0.04,0.34,18.26,, 26 | 0.08,5.22,54.95,15.13,2.15,0.71,0.16,0.57,21.04,, 27 | 0.03,0.47,10.51,51.02,7.68,2.29,0.4,1.93,25.68,, 28 | 0.01,0.08,1.06,12.72,30.83,11.08,1.32,7.84,35.06,, 29 | 0.01,0.03,0.28,1.63,10.55,24.83,2.99,19.25,40.42,, 30 | 0,0,0.12,0.74,2.98,12.18,2.53,46.96,34.49,, 31 | 38.31,31.58,6.99,1.5,0.3,0.19,0.11,0.53,20.49,, 32 | 1.55,39.26,28.08,4.91,0.79,0.4,0.03,0.57,24.42,, 33 | 0.08,5.46,45.54,16.73,2.71,0.86,0.15,0.98,27.5,, 34 | 0.03,0.61,10.93,42.12,7.4,2.47,0.39,3,33.03,, 35 | 0,0.09,1.43,12.5,21.96,9.75,1.06,11.17,42.04,, 36 | 0.01,0.02,0.38,2.1,8.92,15.71,1.75,24.15,46.96,, 37 | 0,0,0.23,0.97,3.51,7.95,1.48,49.51,36.34,, 38 | 26.01,32.25,9.82,2.87,0.18,0.21,0.06,0.74,27.87,, 39 | 1.32,28.24,29.36,6.82,1.05,0.44,0.03,0.83,31.9,, 40 | 0.11,5.35,35.26,17.61,3,0.99,0.14,1.61,35.92,, 41 | 0.02,0.74,10.85,32.58,6.7,2.46,0.33,4.56,41.77,, 42 | 0.02,0.07,1.79,11.24,14.64,7.75,0.67,15.39,48.43,, 43 | 0,0.04,0.45,2.57,6.87,8.67,0.9,28.71,51.8,, 44 | 0,0,0.18,0.84,3.5,4.53,0.36,50.57,40.01,, 45 | 13.3,29.98,14.94,2.86,0.61,0.44,0.03,0.92,36.9,, 46 | 0.96,16.42,27.4,9.1,1.32,0.66,0.03,1.15,42.95,, 47 | 0.12,3.99,24.47,17.49,3.21,1.21,0.16,2.71,46.64,, 48 | 0,0.77,8.35,23.14,5.49,2.51,0.26,7.65,51.83,, 49 | 0,0.14,1.99,8.31,8.25,5.23,0.44,21.81,53.83,, 50 | 0,0.07,0.51,2.43,3.67,3.89,0.47,36.94,52.02,, 51 | 0,0,0.59,1.07,2.63,1.07,0.2,59.41,35.02,, 52 | 5.72,24.84,18.94,3.59,0.93,0.93,0.04,1.38,43.61,, 53 | 0.68,9.24,22.74,11.76,1.61,0.9,0.06,1.84,51.17,, 54 | 0.11,2.81,18,15.43,3.17,1.51,0.19,3.91,54.87,, 55 | 0,0.71,6.86,18.5,4.01,1.88,0.16,9.66,58.21,, 56 | 0,0.06,1.63,6.85,4.13,3.73,0.46,24.39,58.75,, 57 | 0,0.02,0.44,2.63,2.77,2.1,0.28,36.21,55.54,, 58 | 0,0,0.36,0.72,2.15,0.54,0,56.63,39.61,, 59 | -------------------------------------------------------------------------------- /examples/python/estimate_matrix.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | # (c) 2017-2024 Open Risk, all rights reserved 4 | # 5 | # TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included 6 | # in the source distribution of TransitionMatrix. This is notwithstanding any licenses of 7 | # third-party software included in this distribution. You may not use this file except in 8 | # compliance with the License. 9 | # 10 | # Unless required by applicable law or agreed to in writing, software distributed under 11 | # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 12 | # either express or implied. See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """ 16 | An end-to-end example of estimating a credit rating matrix from historical data using two different estimators 17 | 18 | """ 19 | import pprint as pp 20 | 21 | import pandas as pd 22 | from scipy.linalg import expm 23 | 24 | import transitionMatrix as tm 25 | from transitionMatrix.estimators.aalen_johansen_estimator import AalenJohansenEstimator 26 | from transitionMatrix.estimators.cohort_estimator import CohortEstimator 27 | from transitionMatrix.statespaces.statespace import StateSpace 28 | from transitionMatrix.utils import transitions_summary 29 | from transitionMatrix.utils.converters import to_canonical 30 | 31 | # Load the data into a pandas frame 32 | input_data = pd.read_csv('../../datasets/rating_data.csv') 33 | print('> Transitions Summary Input Data') 34 | pp.pprint(transitions_summary(input_data)) 35 | 36 | # Infer and describe state space 37 | myState = StateSpace(transition_data=input_data) 38 | myState.describe() 39 | print('> The order of states is not important for estimation but it is important for presentation!') 40 | 41 | # Convert format to canonical form 42 | canonical_data = to_canonical(input_data) 43 | 44 | # Group the data into temporal cohorts 45 | print(80 * '=') 46 | cohort_data, cohort_intervals = tm.utils.bin_timestamps(input_data, cohorts=5, remove_stale=True) 47 | print('Intervals : ', cohort_intervals) 48 | 49 | print('> Transitions Summary Cohorted Data') 50 | pp.pprint(transitions_summary(cohort_data)) 51 | 52 | myEstimator = CohortEstimator(states=myState, cohort_bounds=cohort_intervals, ci={'method': 'goodman', 'alpha': 0.05}) 53 | 54 | myEstimator.fit(cohort_data) 55 | 56 | myMatrix = tm.TransitionMatrix(myEstimator.average_matrix, states=myState) 57 | myMatrix.print_matrix(accuracy=3, format_type='Standard', labels=False) 58 | 59 | myEstimator2 = AalenJohansenEstimator(states=myState) 60 | labels = {'Time': 'Time', 'From': 'From', 'To': 'To', 'ID': 'ID'} 61 | etm, times = myEstimator2.fit(canonical_data, labels=labels) 62 | myMatrix2 = tm.TransitionMatrix(etm[:, :, -1]) 63 | G = myMatrix2.generator() 64 | oneyear = tm.TransitionMatrix(expm(0.2 * G)) 65 | oneyear.print_matrix(accuracy=3) 66 | 67 | 68 | def main(): 69 | print("Done") 70 | 71 | 72 | if __name__ == "__main__": 73 | main() 74 | -------------------------------------------------------------------------------- /examples/python/matrix_lendingclub.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | # (c) 2017-2024 Open Risk, all rights reserved 4 | # 5 | # TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included 6 | # in the source distribution of TransitionMatrix. This is notwithstanding any licenses of 7 | # third-party software included in this distribution. You may not use this file except in 8 | # compliance with the License. 9 | # 10 | # Unless required by applicable law or agreed to in writing, software distributed under 11 | # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 12 | # either express or implied. See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | """ 17 | Example workflow using transitionMatrix to estimate a matrix from LendingClub data 18 | Input data are in a special cohort format as the published datasets have some limitations 19 | 20 | """ 21 | 22 | import pandas as pd 23 | 24 | import transitionMatrix as tm 25 | from transitionMatrix import source_path 26 | from transitionMatrix.estimators import simple_estimator as es 27 | 28 | dataset_path = source_path + "datasets/" 29 | 30 | # Example: LendingClub Style Migration Matrix 31 | # Load historical data into pandas frame 32 | # Format: 33 | # Expected Data Format is (ID, State_IN, State_OUT) 34 | 35 | # Step 1 36 | # Load the data set into a pandas frame 37 | # Make sure state is read as a string and not as integer 38 | print("Step 1") 39 | data = pd.read_csv(dataset_path + 'LoanStats3a_Step2.csv') 40 | # Data is in pandas frame, all pandas methods are available 41 | print(data.describe()) 42 | 43 | # Step 2 44 | # Describe and validate the State Space against the data 45 | print("Step 2") 46 | definition = [('A', "Grade A"), ('B', "Grade B"), ('C', "Grade C"), 47 | ('D', "Grade D"), ('E', "Grade E"), ('F', "Grade F"), 48 | ('G', "Grade G"), ('H', "Delinquent"), ('I', "Charged Off"), 49 | ('J', "Repaid")] 50 | myState = tm.StateSpace(definition) 51 | myState.describe() 52 | labels = {'State': 'State_IN'} 53 | print(myState.validate_dataset(dataset=data, labels=labels)) 54 | labels = {'State': 'State_OUT'} 55 | print(myState.validate_dataset(dataset=data, labels=labels)) 56 | 57 | # Step 3 58 | # Estimate matrices using Simple Estimator (Frequency count) 59 | # compute confidence interval using goodman method at 95% confidence level 60 | 61 | print("Step 3") 62 | myEstimator = es.SimpleEstimator(states=myState, ci={'method': 'goodman', 'alpha': 0.05}) 63 | # resulting matrix array is returned as result 64 | result = myEstimator.fit(data) 65 | # confidence levels are stored with the estimator 66 | myEstimator.summary() 67 | 68 | # Step 4 69 | # Review numerical results 70 | print("Step 4") 71 | myMatrix = tm.TransitionMatrix(result) 72 | myMatrix.print_matrix() 73 | 74 | # In the LendingClub example we need to fix some matrix rows 75 | # because there are no state_IN observations besides initial grade assignment 76 | myMatrix[7, 9] = 1.0 77 | myMatrix[8, 9] = 1.0 78 | myMatrix[9, 9] = 1.0 79 | print(myMatrix.validate()) 80 | print(myMatrix.characterize()) 81 | myMatrix.print_matrix() 82 | 83 | 84 | def main(): 85 | print("Done") 86 | 87 | 88 | if __name__ == "__main__": 89 | main() 90 | -------------------------------------------------------------------------------- /docs/source/cohorts.rst: -------------------------------------------------------------------------------- 1 | Cohorts 2 | =================== 3 | 4 | Organizing data in `cohorts `_ can be an important step in understating transition data or towards applying a :ref:`cohort estimator`. Cohorts in this context are understood as the grouping of entities within a temporal interval. 5 | 6 | For example, in a credit rating analysis context, cohorts could be groups of annual observations. The implication of cohorting data is that the more granular information embedded in a more precise timestamp is not relevant. It is also possible that input data are only available in cohort form (when the precise timestamp information is not recorded at the source) 7 | 8 | 9 | .. note:: Cohorting can bias the estimation in various subtle ways, so it is important that any procedure is well documented. 10 | 11 | 12 | 13 | Cohorting Utilities 14 | -------------------- 15 | 16 | Cohorting utilities are part of :ref:`preprocessing`. Presently the core algorithm is implemented in :func:`transitionMatrix.utils.preprocessing.bin_timestamps`. 17 | 18 | 19 | 20 | 21 | 22 | Intermediate Cohort Data Formats 23 | ------------------------------------------- 24 | 25 | The cohort data format is a tabular representation of time series data that records the states (measurements) of multiple entities. Its defining characteristic is that each table row contains data pertaining to one entity at one point in time. 26 | 27 | The *canonical form* used as input to duration based estimators uses normalized timestamps (from 0 to T_max, where T_max is the last timepoint) and looks as follows: 28 | 29 | +----+------+------+----+ 30 | | ID | Time | From | To | 31 | +----+------+------+----+ 32 | | 1 | 1.1 | 0 | 1 | 33 | +----+------+------+----+ 34 | | 1 | 2.0 | 1 | 2 | 35 | +----+------+------+----+ 36 | | 1 | 3.4 | 2 | 3 | 37 | +----+------+------+----+ 38 | | 1 | 4.0 | 3 | 2 | 39 | +----+------+------+----+ 40 | | 2 | 1.2 | 0 | 1 | 41 | +----+------+------+----+ 42 | | 2 | 2.4 | 1 | 2 | 43 | +----+------+------+----+ 44 | | 2 | 3.5 | 2 | 3 | 45 | +----+------+------+----+ 46 | 47 | Cohorting Examples 48 | --------------------- 49 | 50 | 51 | Cohorting Example 1 52 | ^^^^^^^^^^^^^^^^^^^^^^^^^^ 53 | 54 | An example with limited data (dataset contains only one entity). It is illustrated in script examples/python./matrix_from_duration_data.py with example flag set to 1. Input data set is synthetic_data1.csv 55 | 56 | The state space is as follows (for brevity we work directly with the integer representation) 57 | 58 | .. code:: 59 | 60 | [('0', "A"), ('1', "B"), ('2', "C"), ('3', "D")] 61 | 62 | The cohorting algorithm that assigns the last state to the cohort results in the following table. We notice that there is alot of movement inside each cohort (high count) and that only two of the states are represented at the cohort level (0 and 1). 63 | 64 | .. code:: 65 | 66 | ID Cohort State Time Count 67 | 0 0 0 0 2.061015 21.0 68 | 1 0 1 1 4.400105 14.0 69 | 2 0 2 0 6.665899 28.0 70 | 3 0 3 0 8.842277 14.0 71 | 4 0 4 0 11.111733 21.0 72 | 5 0 5 0 11.182184 2.0 73 | 74 | -------------------------------------------------------------------------------- /examples/python/credit_curves.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | # (c) 2017-2024 Open Risk, all rights reserved 4 | # 5 | # TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included 6 | # in the source distribution of TransitionMatrix. This is notwithstanding any licenses of 7 | # third-party software included in this distribution. You may not use this file except in 8 | # compliance with the License. 9 | # 10 | # Unless required by applicable law or agreed to in writing, software distributed under 11 | # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 12 | # either express or implied. See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | """ Compute and Visualize credit curves 17 | 18 | """ 19 | 20 | import matplotlib.pyplot as plt 21 | 22 | import transitionMatrix as tm 23 | from transitionMatrix.creditratings.predefined import Generic 24 | 25 | # Initialize a single period transition matrix 26 | # Generic is a Typical Credit Rating Transition Matrix with seven rating states and one absorbing (Default) state 27 | 28 | print("> Load the generic transition matrix") 29 | M = tm.TransitionMatrix(values=Generic) 30 | # Lets take a look at the values 31 | M.print_matrix() 32 | M.validate() 33 | 34 | # The size of the rating scale 35 | Ratings = M.dimension 36 | 37 | # The Default (absorbing state) 38 | Default = Ratings - 1 39 | 40 | # Lets extend the matrix into ten periods (assume they represent annual intervals) 41 | # We do this using the power method 42 | Periods = 10 43 | print("> Extend the matrix into 10 periods using the power method") 44 | T = tm.TransitionMatrixSet(values=M, periods=Periods, method='Power', temporal_type='Cumulative') 45 | 46 | # Lets take a look at what we have created 47 | print("> Display the calculated transition matrix set") 48 | T.print_matrix() 49 | 50 | # Now lets compute the default curves 51 | # We do this one initial rating state at a time 52 | 53 | # For example for the best rating (least likely to default) we obtain 54 | print("> Compute the default curves") 55 | incremental_PD, cumulative_PD, hazard_Rate, survival_Rate = T.default_curves(0) 56 | 57 | # Construct a credit curve set 58 | credit_curves = T.default_curve_set() 59 | credit_curves.print_curve(accuracy=5) 60 | 61 | # Now lets plot a collection of curves for all ratings 62 | print("> Plot the default curves") 63 | 64 | curves = [] 65 | periods = range(0, Periods) 66 | 67 | for ri in range(0, Ratings - 1): 68 | print("RI: ", ri) 69 | iPD, cPD, hR, sR = T.default_curves(ri) 70 | # for k in range(0, Periods): 71 | # value = cPD[k] 72 | # line = [(k, value), (k + 1.0, value)] 73 | curves.append(cPD) 74 | 75 | fig, ax = plt.subplots() 76 | for ri in range(0, Ratings - 1): 77 | ax.plot(periods, curves[ri], label="RI=%d" % (ri,)) 78 | 79 | ax.autoscale() 80 | ax.margins(0.1) 81 | ax.set_xlabel("Periods") 82 | ax.set_ylabel("Cumulative Default Probability") 83 | ax.grid(True) 84 | plt.title("Credit Curves of Generic Transition Matrix") 85 | 86 | leg = plt.legend(loc='best', ncol=2, mode="expand", shadow=True, fancybox=True) 87 | leg.get_frame().set_alpha(0.5) 88 | 89 | plt.savefig("credit_curves.png") 90 | 91 | 92 | def main(): 93 | print("Done") 94 | 95 | 96 | if __name__ == "__main__": 97 | main() 98 | -------------------------------------------------------------------------------- /tests/test_roundtrip.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | # (c) 2017-2024 Open Risk, all rights reserved 4 | # 5 | # TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included 6 | # in the source distribution of TransitionMatrix. This is notwithstanding any licenses of 7 | # third-party software included in this distribution. You may not use this file except in 8 | # compliance with the License. 9 | # 10 | # Unless required by applicable law or agreed to in writing, software distributed under 11 | # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 12 | # either express or implied. See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import unittest 16 | 17 | import pandas as pd 18 | 19 | import transitionMatrix as tm 20 | from transitionMatrix import source_path 21 | from transitionMatrix.estimators import cohort_estimator as es 22 | from transitionMatrix.generators import dataset_generators 23 | from transitionMatrix.utils import to_canonical 24 | from transitionMatrix.utils.converters import to_compact 25 | 26 | ACCURATE_DIGITS = 7 27 | 28 | Identity = [ 29 | [1.0, 0.0, 0.0, 0.0], 30 | [0.0, 1.0, 0.0, 0.0], 31 | [0.0, 0.0, 1.0, 0.0], 32 | [0.0, 0.0, 0.0, 1.0] 33 | ] 34 | 35 | dataset_path = source_path + "datasets/" 36 | 37 | class TestRoundTrip(unittest.TestCase): 38 | """ 39 | Round-trip testing: Identity Matrix Markov Chain 40 | Generate some trivial identity migrations 41 | Check that the cohort estimator computes identity 42 | 43 | """ 44 | 45 | def test_roundtrip_identity(self): 46 | definition = [('0', "A"), ('1', "B"), ('2', "C"), ('3', "D")] 47 | myState = tm.StateSpace(definition) 48 | input_data = dataset_generators.long_format(myState, Identity, n=100, timesteps=2, mode='Canonical') 49 | compact_data = to_compact(input_data) 50 | cohort_data, cohort_bounds = tm.utils.bin_timestamps(compact_data, cohorts=1) 51 | sorted_data = cohort_data.sort_values(['ID', 'Time'], ascending=[True, True]) 52 | myEstimator = es.CohortEstimator(states=myState, cohort_bounds=cohort_bounds, 53 | ci={'method': 'goodman', 'alpha': 0.05}) 54 | result = myEstimator.fit(sorted_data, labels={'Time': 'Time', 'State': 'State', 'ID': 'ID'}) 55 | myMatrix = tm.TransitionMatrix(myEstimator.average_matrix) 56 | 57 | self.assertAlmostEqual(myMatrix[0, 0], 1.0, places=ACCURATE_DIGITS, msg=None, delta=None) 58 | self.assertAlmostEqual(myMatrix[1, 1], 1.0, places=ACCURATE_DIGITS, msg=None, delta=None) 59 | self.assertAlmostEqual(myMatrix[2, 2], 1.0, places=ACCURATE_DIGITS, msg=None, delta=None) 60 | self.assertAlmostEqual(myMatrix[2, 2], 1.0, places=ACCURATE_DIGITS, msg=None, delta=None) 61 | 62 | """ 63 | Round-trip testing: Data Formats 64 | Load a data set in compact format 65 | Convert to canonical, back to compact and compare 66 | 67 | """ 68 | 69 | def test_roundtrip_formats(self): 70 | input_data = pd.read_csv(dataset_path + 'rating_data.csv') 71 | canonical_data = to_canonical(input_data) 72 | compact_data = to_compact(canonical_data) 73 | 74 | self.assertEqual(len(compact_data.compare(input_data)), 0, msg=None) 75 | 76 | 77 | if __name__ == "__main__": 78 | unittest.main() 79 | -------------------------------------------------------------------------------- /examples/python/example_list.csv: -------------------------------------------------------------------------------- 1 | Script Name,Flag,Input Data,Description 2 | adjust_nr_state.py,1,,Adjust the NR (not-rated) statistics. 3 | adjust_nr_state.py,2,,Adjust the NR (not-rated) statistics. 4 | credit_curves.py,,,Compute and Visualize credit curves 5 | characterize_datasets.py,,,Load the available datasets and compute various statistics 6 | compare_estimators.py,,synthetic_data4.csv,Compare the cohort and aalen-johansen estimators on a discrete timestep sample 7 | data_cleaning_example.py,,rating_data_raw.csv,Prepare transition data sets (data cleansing) using some provided methods 8 | deterministic_paths.py,,,Create a transition dataset by replicating give trajectories through a graph 9 | empirical_transition_matrix.py,1,synthetic_data7.csv,Credit Rating Migration example 10 | empirical_transition_matrix.py,2,synthetic_data8.csv,Simple 2x2 Matrix for testing 11 | empirical_transition_matrix.py,3,synthetic_data9.csv,Credit Rating Migration example with timestamps in raw date format 12 | estimate_matrix.py,,rating_data.csv,An end-to-end example of estimating a credit rating matrix from historical data 13 | fix_multiperiod_matrix.py,,sp_1981-2016.csv,Detect and solve various pathologies that might be affecting transition matrix data 14 | generate_full_multiperiod_set.py,,sp_NR_adjusted.json,Use infinitesimal generator methods to generate a full multi-period matrix set. 15 | generate_synthetic_data.py,1,, Generate synthetic data. The first set of examples produces duration type data. 16 | generate_synthetic_data.py,2,,The second set of examples produces cohort type data using markov chain simulation 17 | generate_synthetic_data.py,3,,The second set of examples produces cohort type data using markov chain simulation 18 | generate_visuals.py,6,JLT.json,Plot Transition Probabilities 19 | generate_visuals.py,7,JLT.json,Logarithmic Sankey Diagram of Credit Migration Rates 20 | generate_visuals.py,5,scenario_data.csv,Plot Entity Transitions Plot 21 | generate_visuals.py,1,synthetic_data1.csv,Step Plot of a single observation 22 | generate_visuals.py,4,synthetic_data3.csv,Entity Transitions Plot 23 | generate_visuals.py,2,synthetic_data4.csv,Step Plot of individual observations 24 | generate_visuals.py,3,synthetic_data5.csv,Histogram Plots of transition frequencies 25 | matrix_from_cohort_data.py,3,synthetic_data4.csv,S&P Style Credit Rating Migration Matrix 26 | matrix_from_cohort_data.py,2,synthetic_data5.csv,IFRS 9 Style Migration Matrix (Large sample for testing) 27 | matrix_from_cohort_data.py,1,synthetic_data6.csv,Simplest Absorbing Case for validation 28 | matrix_from_duration_data.py,1,synthetic_data1.csv,Duration example with limited data (dataset contains only one entity) 29 | matrix_from_duration_data.py,2,synthetic_data2.csv,"Duration example n entities with ~10 observations each, [0,1] state, 50%/50% transition matrix" 30 | matrix_from_duration_data.py,3,synthetic_data3.csv, 31 | matrix_lendingclub.py,,,Estimate a matrix from LendingClub data. Input data are in a special cohort format as the published datasets have some limitations 32 | matrix_operations.py,,,Perform various transition matrix operations illustrating the matrix algebra 33 | matrix_set_lendingclub.py,,,Estimate a matrix from LendingClub data. Input data are in a special cohort format as the published datasets have some limitations 34 | matrix_set_operations.py,,,Perform operations with multi-period transition matrix sequences 35 | state_space_operations.py,,,Examples working with state spaces (mappings) 36 | -------------------------------------------------------------------------------- /datasets/sp 2017.csv: -------------------------------------------------------------------------------- 1 | AAA,87.05,5.78,2.56,0.69,0.16,0.24,0.13,0,0.05,0,0.03,0.05,0,0,0.03,0,0.05,0 2 | AA+,2.42,77.53,11.54,3.78,0.76,0.4,0.2,0.05,0.1,0.05,0,0,0,0,0,0,0,0 3 | AA,0.44,1.29,80.25,8.71,2.83,1.21,0.39,0.4,0.13,0.08,0.05,0.03,0.02,0.02,0,0.02,0.05,0.02 4 | AA-,0.04,0.12,3.97,78.01,10.07,2.34,0.61,0.28,0.16,0.07,0.03,0,0,0.03,0.09,0,0,0.03 5 | A+,0,0.06,0.48,4.58,77.51,9.1,2.29,0.66,0.35,0.09,0.06,0.1,0.01,0.07,0.03,0,0,0.05 6 | A,0.04,0.05,0.24,0.46,5.26,78.04,7.04,2.57,0.93,0.29,0.12,0.11,0.08,0.1,0.02,0,0.02,0.06 7 | A-,0.04,0.01,0.07,0.17,0.48,6.72,76.84,7.62,2.22,0.62,0.15,0.15,0.13,0.12,0.03,0.01,0.03,0.07 8 | BBB+,0,0.01,0.06,0.07,0.23,0.86,7.26,74.4,8.41,1.8,0.41,0.34,0.15,0.18,0.12,0.03,0.07,0.12 9 | BBB,0.01,0.01,0.05,0.03,0.11,0.34,1.12,7.68,75.01,6.41,1.41,0.66,0.3,0.25,0.13,0.04,0.06,0.17 10 | BBB-,0.01,0.01,0.02,0.05,0.06,0.16,0.31,1.26,9.11,71.63,5.85,2.18,0.92,0.41,0.25,0.17,0.23,0.26 11 | BB+,0.05,0,0,0.03,0.02,0.1,0.08,0.46,1.84,11.51,63.56,7.8,2.95,1.04,0.65,0.26,0.43,0.36 12 | BB,0,0,0.04,0.01,0,0.07,0.05,0.19,0.56,2.26,9.67,64.74,8.13,2.34,1.07,0.35,0.6,0.58 13 | BB-,0,0,0,0.01,0.01,0.01,0.05,0.11,0.25,0.39,1.87,9.34,63.09,8.64,3.19,0.83,0.75,1.05 14 | B+,0,0.01,0,0.03,0,0.03,0.07,0.05,0.06,0.12,0.31,1.51,8.07,63.14,8.91,2.55,1.76,2.15 15 | B,0,0,0.01,0.01,0,0.04,0.05,0.02,0.07,0.04,0.14,0.26,1.28,7.94,61.36,8.55,4.17,3.89 16 | B-,0,0,0,0,0.02,0.04,0,0.08,0.06,0.12,0.1,0.18,0.47,2.32,10.16,53.36,11.77,7.49 17 | CCC,0,0,0,0,0.03,0,0.1,0.06,0.06,0.06,0.03,0.16,0.44,1.08,2.73,9.11,43.97,26.78 18 | ,,,,,,,,,,,,,,,,,, 19 | ,,,,,,,,,,,,,,,,,, 20 | AAA,0.8705,0.0578,0.0256,0.0069,0.0016,0.0024,0.0013,0,0.0005,0,0.0003,0.0005,0,0,0.0003,0,0.0005,0 21 | AA+,0.0242,0.7753,0.1154,0.0378,0.0076,0.004,0.002,0.0005,0.001,0.0005,0,0,0,0,0,0,0,0 22 | AA,0.0044,0.0129,0.8025,0.0871,0.0283,0.0121,0.0039,0.004,0.0013,0.0008,0.0005,0.0003,0.0002,0.0002,0,0.0002,0.0005,0.0002 23 | AA-,0.0004,0.0012,0.0397,0.7801,0.1007,0.0234,0.0061,0.0028,0.0016,0.0007,0.0003,0,0,0.0003,0.0009,0,0,0.0003 24 | A+,0,0.0006,0.0048,0.0458,0.7751,0.091,0.0229,0.0066,0.0035,0.0009,0.0006,0.001,0.0001,0.0007,0.0003,0,0,0.0005 25 | A,0.0004,0.0005,0.0024,0.0046,0.0526,0.7804,0.0704,0.0257,0.0093,0.0029,0.0012,0.0011,0.0008,0.001,0.0002,0,0.0002,0.0006 26 | A-,0.0004,0.0001,0.0007,0.0017,0.0048,0.0672,0.7684,0.0762,0.0222,0.0062,0.0015,0.0015,0.0013,0.0012,0.0003,0.0001,0.0003,0.0007 27 | BBB+,0,0.0001,0.0006,0.0007,0.0023,0.0086,0.0726,0.744,0.0841,0.018,0.0041,0.0034,0.0015,0.0018,0.0012,0.0003,0.0007,0.0012 28 | BBB,0.0001,0.0001,0.0005,0.0003,0.0011,0.0034,0.0112,0.0768,0.7501,0.0641,0.0141,0.0066,0.003,0.0025,0.0013,0.0004,0.0006,0.0017 29 | BBB-,0.0001,0.0001,0.0002,0.0005,0.0006,0.0016,0.0031,0.0126,0.0911,0.7163,0.0585,0.0218,0.0092,0.0041,0.0025,0.0017,0.0023,0.0026 30 | BB+,0.0005,0,0,0.0003,0.0002,0.001,0.0008,0.0046,0.0184,0.1151,0.6356,0.078,0.0295,0.0104,0.0065,0.0026,0.0043,0.0036 31 | BB,0,0,0.0004,0.0001,0,0.0007,0.0005,0.0019,0.0056,0.0226,0.0967,0.6474,0.0813,0.0234,0.0107,0.0035,0.006,0.0058 32 | BB-,0,0,0,0.0001,0.0001,0.0001,0.0005,0.0011,0.0025,0.0039,0.0187,0.0934,0.6309,0.0864,0.0319,0.0083,0.0075,0.0105 33 | B+,0,0.0001,0,0.0003,0,0.0003,0.0007,0.0005,0.0006,0.0012,0.0031,0.0151,0.0807,0.6314,0.0891,0.0255,0.0176,0.0215 34 | B,0,0,0.0001,0.0001,0,0.0004,0.0005,0.0002,0.0007,0.0004,0.0014,0.0026,0.0128,0.0794,0.6136,0.0855,0.0417,0.0389 35 | B-,0,0,0,0,0.0002,0.0004,0,0.0008,0.0006,0.0012,0.001,0.0018,0.0047,0.0232,0.1016,0.5336,0.1177,0.0749 36 | CCC,0,0,0,0,0.0003,0,0.001,0.0006,0.0006,0.0006,0.0003,0.0016,0.0044,0.0108,0.0273,0.0911,0.4397,0.2678 37 | -------------------------------------------------------------------------------- /docs/source/basic_operations.rst: -------------------------------------------------------------------------------- 1 | Basic Operations 2 | ======================== 3 | 4 | The core TransitionMatrix object implements a typical (one period) transition matrix. It supports a variety of operations (more details are documented in the API section) 5 | 6 | - Initialize a matrix (from data, predefined matrices etc) 7 | - Validate a matrix 8 | - Attempt to fix a matrix 9 | - Compute generators, powers etc. 10 | - Print a matrix 11 | - Output to json/csv/xlsx formats 12 | - Output to html format 13 | 14 | 15 | Simple Operation Examples 16 | ---------------------------------------- 17 | 18 | .. note:: The script examples/python/matrix_operations.py contains the below and plenty more simple single matrix examples 19 | 20 | 21 | Initialize a matrix with values 22 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 23 | 24 | There is a growing list of ways to initialize a transition matrix 25 | 26 | * Initialize a generic matrix of dimension n 27 | * Any list can be used for initialization (but not all shapes are valid transition matrices!) 28 | * Any numpy array can be used for initialization (but not all are valid transition matrices!) 29 | * Values can be loaded from json or csv files 30 | * The transitionMatrix.creditratings.predefined module includes a number of predefined matrices 31 | 32 | 33 | .. code:: 34 | 35 | A = tm.TransitionMatrix(values=[[0.6, 0.2, 0.2], [0.2, 0.6, 0.2], [0.2, 0.2, 0.6]]) 36 | print(A) 37 | A.print_matrix(format_type='Standard', accuracy=2) 38 | 39 | [[0.6 0.2 0.2] 40 | [0.2 0.6 0.2] 41 | [0.2 0.2 0.6]] 42 | 43 | 0.60 0.20 0.20 44 | 0.20 0.60 0.20 45 | 0.20 0.20 0.60 46 | 47 | A.print_matrix(format_type='Standard', accuracy=2) 48 | 49 | 60.0% 20.0% 20.0% 50 | 20.0% 60.0% 20.0% 51 | 20.0% 20.0% 60.0% 52 | 53 | Both the intrinsic print function and the specific print_matrix will print you the matrix, but the print_matrix method clearly aims to present the values in a more legible formats. 54 | 55 | 56 | General Matrix Algebra 57 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 58 | .. note:: All standard numerical matrix operations are available as per the numpy API. 59 | 60 | Some example operations that leverage the underlying numpy API: 61 | 62 | .. code:: 63 | 64 | E = tm.TransitionMatrix(values=[[0.75, 0.25], [0.0, 1.0]]) 65 | print(E.validate()) 66 | # ATTRIBUTES 67 | # Getting matrix info (dimensions, shape) 68 | print(E.ndim) 69 | print(E.shape) 70 | # Obtain the matrix transpose 71 | print(E.T) 72 | # Obtain the matrix inverse 73 | print(E.I) 74 | # Summation methods: 75 | # - along columns 76 | print(E.sum(0)) 77 | # - along rows 78 | print(E.sum(1)) 79 | # Multiplying all elements of a matrix by a scalar 80 | print(0.01 * A) 81 | # Transition Matrix algebra is very intuitive 82 | print(A * A) 83 | print(A ** 2) 84 | print(A ** 10) 85 | 86 | 87 | Validating, Fixing and Characterizing a matrix 88 | ----------------------------------------------------------- 89 | 90 | Validate a Matrix 91 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 92 | 93 | The validate() method of the object checks for required properties of a valid transition matrix: 94 | 95 | 1. check squareness 96 | 2. check that all values are probabilities (between 0 and 1) 97 | 3. check that all rows sum to one 98 | 99 | .. code:: 100 | 101 | C = tm.TransitionMatrix(values=[1.0, 3.0]) 102 | print(C.validate()) 103 | 104 | [('Matrix Dimensions Differ: ', (1, 2))] 105 | 106 | 107 | Characterise a Matrix 108 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 109 | 110 | The characterise() method attempts to characterise a matrix 111 | 112 | 1. diagonal dominance -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Documentation Status](https://readthedocs.org/projects/transitionmatrix/badge/?version=latest)](https://transitionmatrix.readthedocs.io/en/latest/?badge=latest) 2 | ![made-with-python](https://img.shields.io/badge/Made%20with-Python-1f425f.svg) 3 | [![GitHub license](https://img.shields.io/github/license/Naereen/StrapDown.js.svg)](https://github.com/Naereen/StrapDown.js/blob/master/LICENSE) 4 | [![Percentage of issues still open](http://isitmaintained.com/badge/open/Naereen/badges.svg)](http://isitmaintained.com/project/Naereen/badges "Percentage of issues still open") 5 | 6 | 7 | Intro 8 | ========================= 9 | transitionMatrix is a Python powered library for the statistical analysis and visualization of state transition phenomena. It can be used to analyze any dataset that captures timestamped transitions in a discrete state space. Use cases include credit rating transitions, system state event logs etc. 10 | 11 | You can use transitionMatrix to 12 | 13 | - Estimate transition matrices from historical event data using a variety of estimators 14 | - Manipulate transition matrices (generators, comparisons etc.) 15 | - Visualize event data and transition matrices 16 | - Provide standardized data sets for testing 17 | - Model transitions using threshold processes 18 | - Map credit ratings using mapping tables between popularly used rating systems 19 | 20 | Key Information 21 | ================ 22 | 23 | * Author: [Open Risk](http://www.openriskmanagement.com) 24 | * License: Apache 2.0 25 | * Code Documentation: [Read The Docs](https://transitionmatrix.readthedocs.io/en/latest/index.html) 26 | * Mathematical Documentation: [Open Risk Manual](https://www.openriskmanual.org/wiki/Transition_Matrix) 27 | * Development website: [Github](https://github.com/open-risk/transitionMatrix) 28 | * Project Chat: [Open Risk Commons](https://www.openriskcommons.org/c/open-source/transitionmatrix/15) 29 | 30 | **NB: transitionMatrix is still in active development. If you encounter issues or have suggestions please raise them in our github repository or come discuss at our discourse server** 31 | 32 | Support and Training 33 | ========================= 34 | 35 | * The Open Risk Academy has free courses demonstrating the use of the library. The current list is: 36 | * [Analysis of Credit Migration using Python TransitionMatrix](https://www.openriskacademy.com/course/view.php?id=38) 37 | * Support for transitionMatrix and other open source libraries developed by [Open Risk](https://www.openriskmanagement.com) is available upon request 38 | 39 | 40 | Examples 41 | ======== 42 | 43 | The [code documentation](https://transitionmatrix.readthedocs.io/en/latest/index.html) includes a large number of examples, jupyter notebooks and more. 44 | 45 | 46 | Plotting individual transition trajectories 47 | 48 | ![single entity](examples/single_entity.png) 49 | 50 | Sampling transition data 51 | 52 | ![sampled histories](examples/sampled_histories.png) 53 | 54 | Estimation of transition matrices using cohort methods 55 | 56 | ![estimation](examples/estimation.png) 57 | 58 | Estimation of transition matrices using duration methods 59 | 60 | ![transition probabilities](examples/transition_probabilities.png) 61 | 62 | Visualization of a transition matrix 63 | 64 | ![transition matrix](examples/TransitionMatrix.png) 65 | 66 | Visualization using a Logarithmic Sankey diagram 67 | 68 | ![logarithmic sankey](examples/sankey.png) 69 | 70 | Generating stochastic process transition thresholds 71 | 72 | ![thresholds](../portfolioAnalytics/examples/Thresholds.png) 73 | 74 | Stressing Transition Matrices 75 | 76 | ![stressing transition matrices](../portfolioAnalytics/examples/stressed_density.png) 77 | 78 | Computation and Visualization of Credit Curves 79 | 80 | ![credit curves](examples/credit_curves.png) 81 | 82 | Working with credit states 83 | 84 | ![image](examples/scale_conversions.png) 85 | 86 | -------------------------------------------------------------------------------- /tests/test_cohort_estimator.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | # (c) 2017-2024 Open Risk, all rights reserved 4 | # 5 | # TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included 6 | # in the source distribution of TransitionMatrix. This is notwithstanding any licenses of 7 | # third-party software included in this distribution. You may not use this file except in 8 | # compliance with the License. 9 | # 10 | # Unless required by applicable law or agreed to in writing, software distributed under 11 | # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 12 | # either express or implied. See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import unittest 16 | 17 | import pandas as pd 18 | 19 | import transitionMatrix as tm 20 | from transitionMatrix import source_path 21 | from transitionMatrix.estimators import cohort_estimator as es 22 | 23 | ACCURATE_DIGITS = 2 24 | 25 | 26 | class TestSimpleEstimator(unittest.TestCase): 27 | pass 28 | 29 | 30 | class TestCohortEstimator(unittest.TestCase): 31 | """ 32 | Test the estimation of a simple 3x3 transition matrix with absorbing state 33 | 34 | .. note: The result is subject to sampling error! Ensure the required accuracy corresponds to the input data size 35 | 36 | """ 37 | 38 | def test_cohort_estimator_counts(self): 39 | """ 40 | Test that the total counts constructed by the estimator is the same as the event count in the dataset 41 | 42 | """ 43 | dataset_path = source_path + "datasets/" 44 | data = pd.read_csv(dataset_path + 'synthetic_data5.csv') 45 | event_count = data['ID'].count() 46 | # event_count = data[data['Time'] < 4]['ID'].count() 47 | definition = [('0', "Stage 1"), ('1', "Stage 2"), ('2', "Stage 3")] 48 | myState = tm.StateSpace(definition) 49 | sorted_data = data.sort_values(['ID', 'Time'], ascending=[True, True]) 50 | myEstimator = es.CohortEstimator(states=myState, cohort_bounds=[0, 1, 2, 3, 4], 51 | ci={'method': 'goodman', 'alpha': 0.05}) 52 | result = myEstimator.fit(sorted_data) 53 | self.assertEqual(event_count, myEstimator.counts) 54 | 55 | def test_cohort_estimator_matrix(self): 56 | """ 57 | Test that the estimated matrix is same as the matrix that was used to generate the data 58 | 59 | matrix = [[0.8, 0.15, 0.05], 60 | [0.1, 0.7, 0.2], 61 | [0.0, 0.0, 1.0]] 62 | 63 | """ 64 | dataset_path = source_path + "datasets/" 65 | data = pd.read_csv(dataset_path + 'synthetic_data5.csv') 66 | definition = [('0', "Stage 1"), ('1', "Stage 2"), ('2', "Stage 3")] 67 | myState = tm.StateSpace(definition) 68 | sorted_data = data.sort_values(['ID', 'Time'], ascending=[True, True]) 69 | myEstimator = es.CohortEstimator(states=myState, cohort_bounds=[0, 1, 2, 3, 4], 70 | ci={'method': 'goodman', 'alpha': 0.05}) 71 | result = myEstimator.fit(sorted_data) 72 | am = myEstimator.average_matrix 73 | self.assertAlmostEqual(am[0, 0], 0.8, places=ACCURATE_DIGITS, msg=None, delta=None) 74 | self.assertAlmostEqual(am[0, 1], 0.15, places=ACCURATE_DIGITS, msg=None, delta=None) 75 | self.assertAlmostEqual(am[0, 2], 0.05, places=ACCURATE_DIGITS, msg=None, delta=None) 76 | self.assertAlmostEqual(am[1, 0], 0.1, places=ACCURATE_DIGITS, msg=None, delta=None) 77 | self.assertAlmostEqual(am[1, 1], 0.7, places=ACCURATE_DIGITS, msg=None, delta=None) 78 | self.assertAlmostEqual(am[1, 2], 0.2, places=ACCURATE_DIGITS, msg=None, delta=None) 79 | self.assertAlmostEqual(am[2, 0], 0.0, places=ACCURATE_DIGITS, msg=None, delta=None) 80 | self.assertAlmostEqual(am[2, 1], 0.0, places=ACCURATE_DIGITS, msg=None, delta=None) 81 | self.assertAlmostEqual(am[2, 2], 1.0, places=ACCURATE_DIGITS, msg=None, delta=None) 82 | -------------------------------------------------------------------------------- /examples/python/generate_full_multiperiod_set.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | # (c) 2017-2024 Open Risk, all rights reserved 4 | # 5 | # TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included 6 | # in the source distribution of TransitionMatrix. This is notwithstanding any licenses of 7 | # third-party software included in this distribution. You may not use this file except in 8 | # compliance with the License. 9 | # 10 | # Unless required by applicable law or agreed to in writing, software distributed under 11 | # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 12 | # either express or implied. See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | """ Example of using the transitionMatrix data generator methods to generate a full multi-period matrix set 17 | The input data are processed Standard and Poor's matrices for a selection of cumulative observation points 18 | 19 | .. note:: This example requires a substantial amount of custom code! 20 | 21 | """ 22 | 23 | from scipy.linalg import expm 24 | 25 | import transitionMatrix as tm 26 | from transitionMatrix import source_path 27 | 28 | dataset_path = source_path + "datasets/" 29 | 30 | print("> Loading multi-period transitional matrices (cumulative mode) from json file") 31 | SnP_Set0 = tm.TransitionMatrixSet(json_file=dataset_path + "sp_NR_adjusted.json", temporal_type='Cumulative') 32 | print("> Validate") 33 | print(SnP_Set0.validate()) 34 | # SnP_Set0.print(format='Percent') 35 | 36 | print("> Set the timesteps at which we have matrix observations") 37 | # We skip the 15 and 20 year time points as they require further processing 38 | SnP_Set0.timesteps = [1, 2, 3, 5, 7, 10] 39 | print(SnP_Set0.timesteps) 40 | 41 | # we will store the results here 42 | timesteps = SnP_Set0.timesteps[len(SnP_Set0.timesteps) - 1] 43 | SnP = tm.TransitionMatrixSet(dimension=8, periods=timesteps) 44 | 45 | print("> Fill in the gaps between periods") 46 | t_list = SnP_Set0.timesteps 47 | # TODO Assumption is that first entry starts at 1 48 | # First matrix 49 | ts = 1 50 | SnP.entries[ts - 1] = SnP_Set0.entries[0] 51 | # Loop over timestep list 52 | for k in t_list: 53 | i = t_list.index(k) 54 | # While not at the final matrix 55 | if i < len(t_list) - 1: 56 | # compute the gap period 57 | gap = t_list[i + 1] - t_list[i] 58 | # If the gap to next timestep is larger than one period 59 | if gap > 1: 60 | # Divide right matrix by left matrix to derive forward gap transition matrix the for gap-periods 61 | lm = SnP_Set0.entries[i] 62 | lm.fix_rowsums() 63 | rm = SnP_Set0.entries[i + 1] 64 | rm.fix_rowsums() 65 | # TODO Fix Negative probabilities for gap transition matrix 66 | q = rm * lm.I 67 | q.fix_rowsums() 68 | # From gap transition matrix derive gap one-year matrices (via generator) 69 | # Fill in gap years with cumulative matrices 70 | q.fix_negativerates() 71 | G = q.generator(t=gap) 72 | for gap_period in range(1, gap + 1): 73 | gm = expm(gap_period * G) 74 | cm = gm * lm 75 | cm.fix_negativerates() 76 | ts += 1 77 | SnP.entries[ts - 1] = cm 78 | # There is no gap, store matrix as is 79 | else: 80 | ts += 1 81 | SnP.entries[ts - 1] = SnP_Set0.entries[i + 1] 82 | # Final matrix 83 | else: 84 | ts = timesteps 85 | SnP.entries[ts - 1] = SnP_Set0.entries[i] 86 | 87 | SnP.timesteps = t_list 88 | SnP.temporal_type = 'Cumulative' 89 | SnP.print_matrix(accuracy=4) 90 | # TODO Handle strictly zero transition probabilities 91 | # TODO Handle non-monotonic transition probabilities 92 | SnP.to_json(dataset_path + "sp_multiperiod.json", accuracy=8) 93 | 94 | 95 | def main(): 96 | print("Done") 97 | 98 | 99 | if __name__ == "__main__": 100 | main() 101 | -------------------------------------------------------------------------------- /docs/source/roadmap.rst: -------------------------------------------------------------------------------- 1 | Roadmap 2 | ========================= 3 | 4 | transitionMatrix is an ongoing project. Several significant extensions are already in the pipeline. transitionMatrix aims to become the most intuitive and versatile tool to analyse discrete transition data. The **Roadmap** lays out upcoming steps / milestones in this journey. The **Todo** list is a more granular collection of outstanding items. 5 | 6 | You are welcome to contribute to the development of transitionMatrix by creating Issues or Pull Requests on the github repository. Feature requests, bug reports and any other issues are welcome to log at the `Github Repository `_ 7 | 8 | Discussing general usage of the library is `happening here `_ 9 | 10 | 11 | 0.5 12 | -------------------------- 13 | The 0.5 will be the next major release (still considered alpha) that will be available e.g. on PyPI 14 | 15 | 16 | 0.4.X 17 | -------------------------- 18 | 19 | The 0.4.X family of updates will focus on rounding out and (above all) documenting a number of functionalities already introduced 20 | 21 | 22 | Todo List 23 | ========================= 24 | 25 | A list of todo items, no triaging / prioritisation implied 26 | 27 | Core Architecture and API 28 | --------------------------------------------------- 29 | 30 | - Introduce exceptions / error handling throughout 31 | - Solve numpy.matrix deprecation (implement equivalent API in terms of ndarray) 32 | - Complete testing framework 33 | 34 | Input Data Preprocessing 35 | --------------------------------------------------- 36 | 37 | - Handing of markov chain transition formats (single entity) 38 | - Native handling of Wide Data Formats (concrete data sets missing) 39 | - Generalize cohorting algorithm to user specified function 40 | 41 | Reference Data 42 | --------------------------------------------------- 43 | 44 | - Additional credit rating scales (e.g short term ratings) 45 | - Integration with credit rating ontology 46 | 47 | 48 | Transition Matrix Analysis Functionality 49 | --------------------------------------------------- 50 | 51 | - Further validation and characterisation of transition matrices (mobility indexes) 52 | - Generate random matrix subject to constraints 53 | - Fixing common problems encountered by empirically estimated transition matrices 54 | 55 | Statistical Analysis Functionality 56 | --------------------------------------------------- 57 | 58 | - Aalen Johansen Estimator 59 | - Covariance calculation 60 | - Various other improvements / tests 61 | - Cohort Estimator 62 | - Read Data by labels 63 | - Edge cases 64 | - Kaplan Meier Estimator NEW 65 | - (link to survival frameworks) 66 | - Duration based methods 67 | - Bootstrap based confidence intervals 68 | 69 | 70 | State Space package 71 | --------------------------------------------------- 72 | 73 | - Multiple absorbing states (competing risks) 74 | - Automated coarsening of states (merging of similar) 75 | 76 | Credit Rating Related 77 | --------------------------------------------------- 78 | - Import data defined according to CRO ontology 79 | - Absorbing State Identification, Competing Risks 80 | - Compute hazard rates 81 | - Characterize hazard rates 82 | 83 | 84 | Utilities 85 | --------------------------------------------------- 86 | 87 | - Continuous time data generation from arbitrary chain 88 | 89 | Further Refactoring of packages 90 | --------------------------------------------------- 91 | 92 | - Introduce visualization objects / API 93 | 94 | 95 | Performance / Big data 96 | --------------------------------------------------- 97 | 98 | - Handling very large data sets, moving away from in-memory processing 99 | 100 | 101 | Documentation 102 | --------------------------------------------------- 103 | - Sphinx documentation (complete) 104 | - Expand the jupyter notebook collection to (at least) match the standalone scripts 105 | 106 | Releases / Distribution 107 | --------------------------------------------------- 108 | 109 | - Adopt regular github/PyPI release schedule 110 | - Conda distribution 111 | 112 | -------------------------------------------------------------------------------- /examples/python/matrix_operations.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | # (c) 2017-2024 Open Risk, all rights reserved 4 | # 5 | # TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included 6 | # in the source distribution of TransitionMatrix. This is notwithstanding any licenses of 7 | # third-party software included in this distribution. You may not use this file except in 8 | # compliance with the License. 9 | # 10 | # Unless required by applicable law or agreed to in writing, software distributed under 11 | # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 12 | # either express or implied. See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | """ 17 | Examples using transitionMatrix to perform various transition matrix operations. 18 | 19 | """ 20 | 21 | import numpy as np 22 | from scipy.linalg import expm 23 | 24 | import transitionMatrix as tm 25 | from transitionMatrix import dataset_path 26 | from transitionMatrix.creditratings.predefined import JLT 27 | 28 | print("> Initialize a 3x3 matrix with values") 29 | A = tm.TransitionMatrix(values=[[0.6, 0.2, 0.2], [0.2, 0.6, 0.2], [0.2, 0.2, 0.6]]) 30 | print(A) 31 | A.print_matrix(format_type='Standard', accuracy=2) 32 | A.print_matrix(format_type='Percent', accuracy=1) 33 | 34 | print("> Initialize a generic matrix of dimension n") 35 | B = tm.TransitionMatrix(dimension=4) 36 | print(B) 37 | 38 | print("> Any list can be used for initialization (but not all shapes are valid transition matrices!)") 39 | C = tm.TransitionMatrix(values=[1.0, 3.0]) 40 | print(C) 41 | 42 | print("> Any numpy array can be used for initialization (but not all are valid transition matrices!)") 43 | D = tm.TransitionMatrix(values=np.identity(5)) 44 | print(D) 45 | 46 | print("> Values can be loaded from json or csv files") 47 | F = tm.TransitionMatrix(json_file=dataset_path + "JLT.json") 48 | print(F) 49 | 50 | print("> Validate that a matrix satisfies probability matrix properties") 51 | print(A.validate()) 52 | print(B.validate()) 53 | print(C.validate()) 54 | print(D.validate()) 55 | print(F.validate()) 56 | 57 | print("> All the numpy.matrix / ndarray functionality is available") 58 | E = tm.TransitionMatrix(values=[[0.75, 0.25], [0.0, 1.0]]) 59 | print(E.validate()) 60 | # ATTRIBUTES 61 | # Getting matrix info (dimensions, shape) 62 | print(E.ndim) 63 | print(E.shape) 64 | # Obtain the matrix transpose 65 | print(E.T) 66 | # Obtain the matrix inverse 67 | print(E.I) 68 | # Summation methods: 69 | # - along columns 70 | print(E.sum(0)) 71 | # - along rows 72 | print(E.sum(1)) 73 | 74 | print("> Multiplying all elements of a matrix by a scalar") 75 | print(0.01 * A) 76 | 77 | print("> Transition Matrix algebra is very intuitive") 78 | print(A * A) 79 | print(A ** 2) 80 | print(A ** 10) 81 | 82 | print("> Lets fix the invalid matrix C") 83 | # numpy operations that return numpy arrays can be used as follows: 84 | C = tm.TransitionMatrix(values=np.resize(C, (2, 2))) 85 | C[0, 1] = 0.0 86 | C[1, 0] = 0.0 87 | C[1, 1] = 1.0 88 | print(C.validate()) 89 | 90 | print("> Computing the generator of a transition matrix") 91 | # Generator of A 92 | G = A.generator() 93 | print(A, expm(G)) 94 | 95 | print("> Transition matrices properties can be analyzed") 96 | print(A.characterize()) 97 | 98 | print("> Lets look at a realistic example from the JLT paper") 99 | # Reproduce JLT Generator 100 | # We load it using different sources 101 | E = tm.TransitionMatrix(values=JLT) 102 | E_2 = tm.TransitionMatrix(json_file=dataset_path + "JLT.json") 103 | E_3 = tm.TransitionMatrix(csv_file=dataset_path + "JLT.csv") 104 | # Lets check there are no errors 105 | Error = E - E_3 106 | print(np.linalg.norm(Error)) 107 | print("> Lets look at validation and generators") 108 | # Empirical matrices will not satisfy constraints exactly 109 | print(E.validate(accuracy=1e-3)) 110 | print(E.characterize()) 111 | print(E.generator()) 112 | Error = E - expm(E.generator()) 113 | # Frobenious norm 114 | print(np.linalg.norm(Error)) 115 | # L1 norm 116 | print(np.linalg.norm(Error, 1)) 117 | 118 | print("> Use pandas style API for saving to files") 119 | E.to_csv("JLT.csv") 120 | E.to_json("JLT.json") 121 | 122 | 123 | def main(): 124 | print("Done") 125 | 126 | 127 | if __name__ == "__main__": 128 | main() 129 | -------------------------------------------------------------------------------- /transitionMatrix/utils/converters.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | # (c) 2017-2024 Open Risk (https://www.openriskmanagement.com) 4 | # 5 | # TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included 6 | # in the source distribution of TransitionMatrix. This is notwithstanding any licenses of 7 | # third-party software included in this distribution. You may not use this file except in 8 | # compliance with the License. 9 | # 10 | # Unless required by applicable law or agreed to in writing, software distributed under 11 | # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 12 | # either express or implied. See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """ Converter utilities to help switch between various formats """ 16 | 17 | import pandas as pd 18 | import numpy as np 19 | 20 | 21 | def frame_to_array(dataframe): 22 | """ 23 | Convert pandas to numpy array 24 | :param dataframe: 25 | :return: 26 | """ 27 | event_count = dataframe.shape[0] 28 | entity_id = np.empty(event_count, int) 29 | entity_state = np.empty(event_count, int) 30 | event_time = np.empty(event_count, float) 31 | 32 | i = 0 33 | for row in dataframe.itertuples(index=False): 34 | entity_id[i] = row.ID 35 | event_time[i] = row.Time 36 | entity_state[i] = row.State 37 | i += 1 38 | return entity_id, event_time, entity_state 39 | 40 | 41 | def datetime_to_float(dataframe, time_column='Time', format=None): 42 | """datetime_to_float() converts dates from string format to the canonical float format 43 | 44 | :param time_column: the column label of the observation times 45 | :param dataframe: Pandas dataframe with dates in string format 46 | :return: Pandas dataframe with dates in float format 47 | :rtype: object 48 | 49 | .. note:: The date string must be recognizable by the pandas to_datetime function. 50 | 51 | """ 52 | 53 | dataframe[time_column] = dataframe[time_column].apply( 54 | lambda x: (pd.to_datetime(x, format=format))) 55 | 56 | # Find the start and end dates of the sample 57 | start_date = dataframe[time_column].min() 58 | end_date = dataframe[time_column].max() 59 | # Find the total days in the sample 60 | total_days = (pd.to_datetime(end_date) - pd.to_datetime(start_date)).days 61 | 62 | # Apply the transformation 63 | # If total_days == 0 simply set to zero 64 | if total_days > 0: 65 | dataframe[time_column] = dataframe[time_column].apply( 66 | lambda x: (pd.to_datetime(x) - pd.to_datetime(start_date)).days / total_days) 67 | else: 68 | dataframe[time_column] = dataframe[time_column].apply( 69 | lambda x: 0.0 70 | ) 71 | 72 | return [start_date, end_date, total_days], dataframe 73 | 74 | 75 | def to_canonical(dataframe): 76 | """to_canonical() converts a dataframe that is in compact form into a canonical form 77 | 78 | :param dataframe: 79 | :return: dataframe 80 | 81 | """ 82 | 83 | event_count = dataframe.shape[0] 84 | entity_id = np.empty(event_count, int) 85 | state = np.empty(event_count, int) 86 | event_from_state = np.empty(event_count, int) 87 | event_to_state = np.empty(event_count, int) 88 | event_time = np.empty(event_count, float) 89 | 90 | i = 0 91 | for row in dataframe.itertuples(index=False): 92 | entity_id[i] = row.ID 93 | event_time[i] = row.Time 94 | state[i] = row.State 95 | i += 1 96 | 97 | rows = [] 98 | # boostrap first event 99 | i = 0 100 | event_from_state[i] = state[i] 101 | event_to_state[i] = state[i] 102 | rows.append((entity_id[i], event_time[i], event_from_state[i], event_to_state[i])) 103 | for i in range(1, event_count): 104 | if entity_id[i - 1] == entity_id[i]: # same entity transition 105 | event_from_state[i] = event_to_state[i - 1] 106 | event_to_state[i] = state[i] 107 | else: # new entity 108 | event_from_state[i] = state[i] 109 | event_to_state[i] = state[i] 110 | 111 | rows.append((entity_id[i], event_time[i], event_from_state[i], event_to_state[i])) 112 | return pd.DataFrame(rows, columns=['ID', 'Time', 'From', 'To']) 113 | 114 | 115 | def to_compact(dataframe): 116 | """to_compact() converts a dataframe that is in canonical form into a compact form 117 | 118 | :param dataframe: 119 | :return: dataframe 120 | 121 | """ 122 | 123 | data = dataframe.drop(['From'], axis=1) 124 | data.rename(columns={'To': 'State'}, inplace=True) 125 | 126 | return data 127 | -------------------------------------------------------------------------------- /docs/source/description.rst: -------------------------------------------------------------------------------- 1 | The transitionMatrix Library 2 | ============================= 3 | 4 | .. image:: ../../examples/overview.png 5 | 6 | transitionMatrix is a pure Python powered library for the statistical analysis and visualization of state transition phenomena. It can be used to analyze any dataset that captures *timestamped transitions in a discrete state space.* 7 | 8 | 9 | * Author: `Open Risk `_ 10 | * License: Apache 2.0 11 | * Development Website: `Github `_ 12 | * Code Documentation: `Read The Docs `_ 13 | * Mathematical Documentation: `Open Risk Manual `_ 14 | * Chat: `Open Risk Commons `_ 15 | * Training: `Open Risk Academy `_ 16 | * Showcase: `Blog Posts `_ 17 | 18 | Functionality 19 | ------------- 20 | 21 | You can use transitionMatrix to: 22 | 23 | - **Estimate** transition matrices from historical event data using a variety of estimators 24 | - **Characterise** transition matrices (identify their key properties) 25 | - **Visualize** event data and transition matrices 26 | - **Manipulate** transition matrices (derive generators, perform comparisons, stress transition rates etc.) 27 | - Access standardized Datasets for testing 28 | - Extract and work with credit default curves (absorbing states) 29 | - Map credit ratings using mapping tables 30 | - More (still to be documented :-) 31 | 32 | Architecture 33 | ------------ 34 | 35 | * transitionMatrix provides intuitive objects for handling transition matrices individually and as sets (based on numpy arrays) 36 | * supports file input/output in json and csv formats 37 | * it has a powerful API for handling event data (based on pandas and numpy) 38 | * supports visualization using matplotlib 39 | 40 | 41 | Installation 42 | ======================= 43 | 44 | You can install and use the transitionMatrix package in any system that supports the `Scipy ecosystem of tools `_ 45 | 46 | Dependencies 47 | ----------------- 48 | 49 | - TransitionMatrix requires Python 3 (currently 3.7) 50 | - It depends on numerical and data processing Python libraries (Numpy, Scipy, Pandas). 51 | - The Visualization API depends on Matplotlib. 52 | - The precise dependencies are listed in the requirements.txt file. 53 | - TransitionMatrix may work with earlier versions of python / these packages but it is not tested. 54 | 55 | From PyPI 56 | ------------- 57 | 58 | .. code:: bash 59 | 60 | pip3 install transitionMatrix 61 | 62 | From sources 63 | ------------- 64 | 65 | Download the sources in your preferred directory: 66 | 67 | .. code:: bash 68 | 69 | git clone https://github.com/open-risk/transitionMatrix 70 | 71 | 72 | Using virtualenv 73 | ---------------- 74 | 75 | It is advisable to install the package in a virtualenv so as not to interfere with your system's python distribution 76 | 77 | .. code:: bash 78 | 79 | virtualenv -p python3 tm_test 80 | source tm_test/bin/activate 81 | 82 | If you do not have pandas already installed make sure you install it first (this will also install numpy and other required dependencies). 83 | 84 | .. code:: bash 85 | 86 | pip3 install -r requirements.txt 87 | 88 | Finally issue the install command and you are ready to go! 89 | 90 | .. code:: bash 91 | 92 | python3 setup.py install 93 | 94 | File structure 95 | ----------------- 96 | The distribution has the following structure: 97 | 98 | :: 99 | 100 | | transitionMatrix/ Directory with the library source code 101 | | -- model.py File with main data structures 102 | | -- estimators/ Directory with the estimator methods 103 | | -- statespaces/ Directory with state space objects and methods 104 | | -- creditratings/ Directory with predefined credit rating structures 105 | | -- generators/ Directory with data generator methods 106 | | -- utils/ Directory with helper classes and methods 107 | | -- examples/ Directory with usage examples 108 | | ---- python/ Examples as standalone python scripts 109 | | ---- notebooks/ Examples as jupyter notebooks 110 | | -- datasets/ Directory with a variety of datasets useful for getting started 111 | | -- tests/ Directory with the testing suite 112 | 113 | 114 | Other similar open source software 115 | ----------------------------------- 116 | 117 | - etm, an R package for estimating empirical transition matrices 118 | - msSurv, an R Package for Nonparametric Estimation of Multistate Models 119 | - msm, Multi-state modelling with R 120 | - mstate, competing risks and multistate models in R 121 | - lifelines, python survival package 122 | -------------------------------------------------------------------------------- /tests/test_model.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | # (c) 2017-2024 Open Risk, all rights reserved 4 | # 5 | # TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included 6 | # in the source distribution of TransitionMatrix. This is notwithstanding any licenses of 7 | # third-party software included in this distribution. You may not use this file except in 8 | # compliance with the License. 9 | # 10 | # Unless required by applicable law or agreed to in writing, software distributed under 11 | # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 12 | # either express or implied. See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | import unittest 17 | 18 | import pandas as pd 19 | from scipy.linalg import expm 20 | 21 | import transitionMatrix as tm 22 | from transitionMatrix import source_path 23 | 24 | ACCURATE_DIGITS = 7 25 | 26 | 27 | class TestTransitionMatrix(unittest.TestCase): 28 | ''' 29 | Default instance (2x2 identity matrix) 30 | ''' 31 | def test_instantiate_matrix(self): 32 | a = tm.TransitionMatrix() 33 | self.assertAlmostEqual(a[0, 0], 1.0, places=ACCURATE_DIGITS, msg=None, delta=None) 34 | self.assertAlmostEqual(a[0, 1], 0.0, places=ACCURATE_DIGITS, msg=None, delta=None) 35 | self.assertAlmostEqual(a[1, 0], 0.0, places=ACCURATE_DIGITS, msg=None, delta=None) 36 | self.assertAlmostEqual(a[1, 1], 1.0, places=ACCURATE_DIGITS, msg=None, delta=None) 37 | 38 | b = tm.TransitionMatrix([[1.0, 3.0], [1.0, 4.0]]) 39 | self.assertAlmostEqual(b[0, 0], 1.0, places=ACCURATE_DIGITS, msg=None, delta=None) 40 | self.assertAlmostEqual(b[0, 1], 3.0, places=ACCURATE_DIGITS, msg=None, delta=None) 41 | self.assertAlmostEqual(b[1, 0], 1.0, places=ACCURATE_DIGITS, msg=None, delta=None) 42 | self.assertAlmostEqual(b[1, 1], 4.0, places=ACCURATE_DIGITS, msg=None, delta=None) 43 | 44 | def test_csv_io(self): 45 | a = tm.TransitionMatrix() 46 | a.to_csv("test.csv") 47 | b = tm.TransitionMatrix(csv_file="test.csv") 48 | self.assertAlmostEqual(a[0, 0], b[0, 0], places=ACCURATE_DIGITS, msg=None, delta=None) 49 | self.assertAlmostEqual(a[0, 1], b[0, 1], places=ACCURATE_DIGITS, msg=None, delta=None) 50 | self.assertAlmostEqual(a[1, 0], b[1, 0], places=ACCURATE_DIGITS, msg=None, delta=None) 51 | self.assertAlmostEqual(a[1, 1], b[1, 1], places=ACCURATE_DIGITS, msg=None, delta=None) 52 | 53 | def test_json_io(self): 54 | a = tm.TransitionMatrix() 55 | a.to_json("test.json") 56 | b = tm.TransitionMatrix(json_file="test.json") 57 | self.assertAlmostEqual(a[0, 0], b[0, 0], places=ACCURATE_DIGITS, msg=None, delta=None) 58 | self.assertAlmostEqual(a[0, 1], b[0, 1], places=ACCURATE_DIGITS, msg=None, delta=None) 59 | self.assertAlmostEqual(a[1, 0], b[1, 0], places=ACCURATE_DIGITS, msg=None, delta=None) 60 | self.assertAlmostEqual(a[1, 1], b[1, 1], places=ACCURATE_DIGITS, msg=None, delta=None) 61 | 62 | def test_validation(self): 63 | a = tm.TransitionMatrix() 64 | self.assertEqual(a.validate(), True) 65 | b = tm.TransitionMatrix(values=[1.0, 3.0]) 66 | self.assertEqual(b.validate()[0][0], 'Matrix Dimensions Differ: ') 67 | c = tm.TransitionMatrix(values=[[0.75, 0.25], [0.0, 0.9]]) 68 | self.assertEqual(c.validate()[0][0], 'Rowsum not equal to one: ') 69 | d = tm.TransitionMatrix(values=[[0.75, 0.25], [-0.1, 1.1]]) 70 | self.assertEqual(d.validate()[0][0], 'Negative Probabilities: ') 71 | 72 | def test_generator(self): 73 | a = tm.TransitionMatrix([[1.0, 3.0], [1.0, 4.0]]) 74 | self.assertAlmostEqual(a[0, 0], expm(a.generator())[0, 0], places=ACCURATE_DIGITS, msg=None, delta=None) 75 | self.assertAlmostEqual(a[0, 1], expm(a.generator())[0, 1], places=ACCURATE_DIGITS, msg=None, delta=None) 76 | self.assertAlmostEqual(a[1, 0], expm(a.generator())[1, 0], places=ACCURATE_DIGITS, msg=None, delta=None) 77 | self.assertAlmostEqual(a[1, 1], expm(a.generator())[1, 1], places=ACCURATE_DIGITS, msg=None, delta=None) 78 | 79 | 80 | class TestTransitionMatrixSet(unittest.TestCase): 81 | 82 | def test_instantiate_matrix_set(self): 83 | periods = 5 84 | a = tm.TransitionMatrixSet(dimension=2, periods=periods) 85 | self.assertEqual(a.temporal_type, 'Incremental') 86 | self.assertAlmostEqual(a.entries[0][0, 0], 1.0, places=ACCURATE_DIGITS, msg=None, delta=None) 87 | self.assertAlmostEqual(a.entries[periods-1][0, 0], 1.0, places=ACCURATE_DIGITS, msg=None, delta=None) 88 | pass 89 | 90 | def test_set_validation(self): 91 | a = tm.TransitionMatrixSet(dimension=2, periods=5) 92 | self.assertEqual(a.validate(), True) 93 | 94 | def test_set_cumulate_incremental(self): 95 | a = tm.TransitionMatrix(values=[[0.6, 0.2, 0.2], [0.2, 0.6, 0.2], [0.2, 0.2, 0.6]]) 96 | a_set = tm.TransitionMatrixSet(values=a, periods=3, method='Copy', temporal_type='Incremental') 97 | b_set = a_set 98 | b_set.cumulate() 99 | b_set.incremental() 100 | self.assertAlmostEqual(a_set.entries[2][0, 0], b_set.entries[2][0, 0], places=ACCURATE_DIGITS, msg=None, delta=None) 101 | pass 102 | 103 | def test_set_csv_io(self): 104 | pass 105 | 106 | def test_set_json_io(self): 107 | pass 108 | 109 | 110 | if __name__ == "__main__": 111 | 112 | unittest.main() 113 | 114 | -------------------------------------------------------------------------------- /transitionMatrix/estimators/simple_estimator.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # (c) 2017-2024 Open Risk, all rights reserved 4 | # 5 | # TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included 6 | # in the source distribution of TransitionMatrix. This is notwithstanding any licenses of 7 | # third-party software included in this distribution. You may not use this file except in 8 | # compliance with the License. 9 | # 10 | # Unless required by applicable law or agreed to in writing, software distributed under 11 | # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 12 | # either express or implied. See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from __future__ import print_function 16 | import numpy as np 17 | 18 | from transitionMatrix.estimators import BaseEstimator 19 | import statsmodels.stats.proportion as st 20 | 21 | 22 | class SimpleEstimator(BaseEstimator): 23 | """ 24 | Class for implementing a simple estimator suitable for single period transitions 25 | 26 | This is useful for testing, getting a first feel about the transition landscape. 27 | 28 | """ 29 | 30 | def __init__(self, states=None, ci=None): 31 | BaseEstimator.__init__(self) 32 | 33 | if states is not None: 34 | self.states = states 35 | if ci is not None: 36 | assert (ci['method'] in ['goodman', 'sison-glaz', 'binomial']) 37 | self.ci_method = ci['method'] 38 | self.ci_alpha = ci['alpha'] 39 | 40 | def fit(self, data): 41 | """ 42 | Parameters 43 | ---------- 44 | data : array-like 45 | The data to use for the estimation 46 | 47 | Returns 48 | ------- 49 | matrix : estimated transition matrix 50 | confint_lower: lower confidence interval 51 | confint_upper: upper confidence interval 52 | 53 | Notes 54 | ------ 55 | 56 | * loop over data rows 57 | * expected format is (id, state_in, state_out) 58 | * calculate population count N^i_k per state i 59 | * calculate migrations count N^{ij}_{kl} from i to j 60 | * calculate transition matrix as ratio T^{ij}_{kl} = N^{ij}_{kl} / N^i_k 61 | 62 | """ 63 | 64 | # In the simple estimator all events are part of the same cohort 65 | state_count = self.states.cardinality 66 | state_list = self.states.get_states() 67 | 68 | # create storage for counts and transitions 69 | tm_count = np.ndarray(state_count) 70 | tmn_count = np.ndarray((state_count, state_count)) 71 | tm_count.fill(0.0) 72 | tmn_count.fill(0.0) 73 | 74 | i = 0 75 | for row in data.itertuples(index=False): 76 | # state_in = state_list.index(row[2]) 77 | # state_out = state_list.index(row[3]) 78 | state_in = row[2] 79 | state_out = row[3] 80 | tm_count[state_in] += 1 81 | tmn_count[state_in, state_out] += 1 82 | i += 1 83 | 84 | self.counts = int(tm_count.sum()) 85 | 86 | if self.ci_method: 87 | '''Confidence intervals for multinomial proportions. See the statsmodels URL 88 | http://www.statsmodels.org/devel/_modules/statsmodels/stats/proportion.html 89 | 90 | Parameters 91 | ---------- 92 | counts : array_like of int, 1-D 93 | Number of observations in each category. 94 | alpha : float in (0, 1), optional 95 | Significance level, defaults to 0.05. 96 | method : {'goodman', 'sison-glaz'}, optional 97 | Method to use to compute the confidence intervals; available methods 98 | are: 99 | 100 | - `goodman`: based on a chi-squared approximation, valid if all 101 | values in `counts` are greater or equal to 5 [2]_ 102 | - `sison-glaz`: less conservative than `goodman`, but only valid if 103 | `counts` has 7 or more categories (``len(counts) >= 7``) [3]_ 104 | 105 | Returns 106 | ------- 107 | confint : ndarray, 2-D 108 | Array of [lower, upper] confidence levels for each category, such that 109 | overall coverage is (approximately) `1-alpha`. 110 | ''' 111 | 112 | confint_lower = np.ndarray((state_count, state_count, 1)) 113 | confint_upper = np.ndarray((state_count, state_count, 1)) 114 | for s1 in range(state_count): 115 | intervals = st.multinomial_proportions_confint(tmn_count[s1, :], alpha=self.ci_alpha, method=self.ci_method) 116 | for s2 in range(state_count): 117 | confint_lower[s1, s2, 0] = intervals[s2][0] 118 | confint_upper[s1, s2, 0] = intervals[s2][1] 119 | self.confint_lower = confint_lower 120 | self.confint_upper = confint_upper 121 | 122 | # Normalization of counts to produce family of probability matrices 123 | for s1 in range(state_count): 124 | for s2 in range(state_count): 125 | if tm_count[s1] > 0: 126 | tmn_count[(s1, s2)] = tmn_count[(s1, s2)] / tm_count[s1] 127 | 128 | # We store and return the matrix in matrix set (but there is only one instance) 129 | self.matrix_set.append(tmn_count) 130 | 131 | return self.matrix_set 132 | -------------------------------------------------------------------------------- /examples/python/empirical_transition_matrix.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | # (c) 2017-2024 Open Risk, all rights reserved 4 | # 5 | # TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included 6 | # in the source distribution of TransitionMatrix. This is notwithstanding any licenses of 7 | # third-party software included in this distribution. You may not use this file except in 8 | # compliance with the License. 9 | # 10 | # Unless required by applicable law or agreed to in writing, software distributed under 11 | # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 12 | # either express or implied. See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | """ 17 | Example workflows using transitionMatrix to estimate an empirical transition matrix from duration type data. The datasets are produced in examples/generate_synthetic_data.py 18 | 19 | """ 20 | import matplotlib.pyplot as plt 21 | import numpy as np 22 | import pandas as pd 23 | 24 | import transitionMatrix as tm 25 | from transitionMatrix import source_path 26 | from transitionMatrix.estimators import aalen_johansen_estimator as aj 27 | from transitionMatrix.utils.converters import datetime_to_float 28 | 29 | dataset_path = source_path + "datasets/" 30 | 31 | # Example 1: Credit Rating Migration example 32 | # Example 2: Simple 2x2 Matrix for testing 33 | # Example 3: Credit Rating Migration example with timestamps in raw date format 34 | 35 | example = 3 36 | 37 | # Step 1 38 | # Load the data set into a pandas frame 39 | # Make sure state is read as a string and not as integer 40 | # Second synthetic data example: 41 | # n entities with ~10 observations each, [0,1] state, 50%/50% transition matrix 42 | print("> Step 1: Load the data set into a pandas frame") 43 | if example == 1: 44 | data = pd.read_csv(dataset_path + 'synthetic_data7.csv', dtype={'State': str}) 45 | elif example == 2: 46 | data = pd.read_csv(dataset_path + 'synthetic_data8.csv', dtype={'State': str}) 47 | elif example == 3: 48 | data = pd.read_csv(dataset_path + 'synthetic_data9.csv', parse_dates=True) 49 | # convert datetime data to floats, return also the observation window data 50 | bounds, data = datetime_to_float(data) 51 | print('Start and End dates', bounds) 52 | 53 | sorted_data = data.sort_values(['Time', 'ID'], ascending=[True, True]) 54 | print(sorted_data.head(5)) 55 | print(sorted_data.describe()) 56 | 57 | # Step 2 58 | # Describe and validate the State Space against the data 59 | print("> Step 2: Describe and validate the State Space against the data") 60 | # We insert the expected labels of the state space 61 | if example == 1 or example == 3: 62 | definition = [('0', "AAA"), ('1', "AA"), ('2', "A"), ('3', "BBB"), 63 | ('4', "BB"), ('5', "B"), ('6', "CCC"), ('7', "D")] 64 | elif example == 2: 65 | definition = [('0', "G"), ('1', "B")] 66 | myState = tm.StateSpace(definition) 67 | myState.describe() 68 | # We validate that indeed the data set conforms to our expectations 69 | labels = {'State': 'From'} 70 | print(myState.validate_dataset(dataset=sorted_data, labels=labels)) 71 | labels = {'State': 'To'} 72 | print(myState.validate_dataset(dataset=sorted_data, labels=labels)) 73 | 74 | # Step 3 75 | # Estimate matrices using the Aalen-Johansen estimator 76 | print("> Step 3: Estimate matrices using the Aalen-Johansen estimator") 77 | myEstimator = aj.AalenJohansenEstimator(states=myState) 78 | # labels = {'Timestamp': 'Time', 'From_State': 'From', 'To_State': 'To', 'ID': 'ID'} 79 | labels = {'Time': 'Time', 'From': 'From', 'To': 'To', 'ID': 'ID'} 80 | etm, times = myEstimator.fit(sorted_data, labels=labels) 81 | 82 | # Step 4 83 | # Print the cumulative computed matrix 84 | print("> Step 4: Print the cumulative computed matrix") 85 | print(etm[:, :, -1]) 86 | 87 | # Step 5 88 | # Create a visualization of the transition rates 89 | if example == 1 or example == 3: 90 | # Now lets plot a collection of curves for all ratings 91 | print("> Plot the transition curves") 92 | 93 | Periods = 10 94 | Ratings = 8 95 | 96 | m = 4 97 | n = 2 98 | f, axarr = plt.subplots(m, n) 99 | f.subplots_adjust(left=0.05, bottom=0.05, right=0.95, top=0.90, wspace=0.0, hspace=0.1) 100 | # plt.style.use(['ggplot']) 101 | 102 | for ri in range(0, Ratings): 103 | axj = int(ri / 2) 104 | axi = ri % 2 105 | print(ri, axj, axi) 106 | curves = [] 107 | for rf in range(0, Ratings): 108 | cPD = etm[ri, rf, :] 109 | curves.append(cPD) 110 | # axarr[axj, axi].set_aspect(5) 111 | axarr[axj, axi].set_ylabel('State ' + str(ri), fontsize=12) 112 | axarr[axj, axi].set_xlabel("Time") 113 | axarr[axj, axi].plot(times[1:], curves[rf], label="RI=%d" % (rf,)) 114 | # axarr[axj, axi].set_xticks(range(10), minor=False) 115 | axarr[axj, axi].set_yticks(np.linspace(0, 1, 5), minor=False) 116 | # axarr[axj, axi].yaxis.grid(True, which='minor') 117 | axarr[axj, axi].margins(y=0.05, x=0.05) 118 | axarr[axj, axi].autoscale() 119 | axarr[axj, axi].grid(True) 120 | 121 | # plt.tight_layout() 122 | f.suptitle("Multi-period Transition Probabilities", fontsize=12) 123 | # plt.title("Multi-period Transition Probabilities") 124 | plt.savefig("transition_probabilities.png") 125 | plt.show() 126 | 127 | 128 | def main(): 129 | print("Done") 130 | 131 | 132 | if __name__ == "__main__": 133 | main() 134 | -------------------------------------------------------------------------------- /transitionMatrix/estimators/__init__.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | # (c) 2017-2024 Open Risk, all rights reserved 4 | # 5 | # TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included 6 | # in the source distribution of TransitionMatrix. This is notwithstanding any licenses of 7 | # third-party software included in this distribution. You may not use this file except in 8 | # compliance with the License. 9 | # 10 | # Unless required by applicable law or agreed to in writing, software distributed under 11 | # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 12 | # either express or implied. See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from __future__ import print_function 16 | 17 | 18 | class BaseEstimator(object): 19 | 20 | """ Base class for implementing any transition matrix estimator 21 | 22 | Offers basic methods common to all estimators 23 | 24 | """ 25 | 26 | def __init__(self): 27 | self.states = None 28 | self.matrix_set = [] 29 | self.count_set = [] 30 | self.count_normalization = [] 31 | self.average_matrix = [] 32 | self.ci_alpha = None 33 | self.ci_method = None 34 | self.confint_lower = None 35 | self.confint_upper = None 36 | self.counts = None 37 | self.nans = None 38 | 39 | def get_matrix_set(self): 40 | return self.matrix_set 41 | 42 | def print(self, select='Frequencies', period=None): 43 | """ 44 | Pretty print the estimated transition matrices 45 | :return: 46 | """ 47 | if select == 'Counts': 48 | if period is not None: 49 | print("Period: ", period) 50 | print("Starting Count: ") 51 | print(self.count_normalization[period]) 52 | print("Migration Counts: ") 53 | print(self.count_set[period][:, :]) 54 | else: 55 | for k in range(len(self.count_set)): 56 | print("Period: ", k) 57 | print("Starting Count: ") 58 | print(self.count_normalization[k]) 59 | print("Migration Counts: ") 60 | print(self.count_set[k][:, :]) 61 | elif select == 'Frequencies': 62 | if period is not None: 63 | print("Period: ", period) 64 | print(self.matrix_set[period][:, :]) 65 | else: 66 | for k in range(len(self.matrix_set)): 67 | print("Period: ", k) 68 | print(self.matrix_set[k][:, :]) 69 | 70 | return 71 | 72 | def summary(self, k=0): 73 | """ 74 | Pretty-print a summary of estimation results (values and confidence intervals) 75 | """ 76 | if self.ci_method: 77 | state_count = self.states.cardinality 78 | print(' Transition Matrix Estimation Results ') 79 | print('==============================================================================') 80 | print('Confidence Level: ', self.ci_alpha) 81 | print('Confidence Level Method: ', self.ci_method) 82 | print('------------------------------------------------------------------------------') 83 | print('Row Col Lower Bound Value Upper Bound') 84 | for s1 in range(state_count): 85 | for s2 in range(state_count): 86 | lv = self.confint_lower[s1, s2, k] 87 | rv = self.confint_upper[s1, s2, k] 88 | cv = self.matrix_set[k][s1, s2] 89 | print('{0:3} {1:4} {2:12f} {3:10f} {4:12f}'.format(s1, s2, lv, cv, rv)) 90 | print('..............................................................................') 91 | print('==============================================================================') 92 | else: 93 | state_count = self.states.cardinality 94 | print(' Transition Matrix Estimation Results ') 95 | print('==============================================================================') 96 | print('Row Col Value') 97 | for s1 in range(state_count): 98 | for s2 in range(state_count): 99 | cv = self.matrix_set[k][s1, s2] 100 | print('{0:3} {1:4} {2:10f}'.format(s1, s2, cv)) 101 | print('..............................................................................') 102 | print('==============================================================================') 103 | return 104 | 105 | 106 | class DurationEstimator(BaseEstimator): 107 | 108 | """ Base class for implementing any duration based transition matrix estimator 109 | 110 | Offers methods common to all duration based estimators 111 | Two subclasses: 112 | 113 | * Time homogeneous estimator (constant transition rates) 114 | * Time inhomogeneous estimator (variable transition probabilities) Aalen-Johansen 115 | 116 | T(s, t) = T(0, t) (transition from start=0) 117 | Compute transition_times(k) T^ij(t) numpy(i,j,k) 118 | 119 | Transitions at cohort intervals 120 | Approximate numpy(i,j, k_index : largest k-value that is less than t(boundary)) 121 | 122 | """ 123 | 124 | def __init__(self, cohort_intervals=None, states=None): 125 | BaseEstimator.__init__(self) 126 | self.cohort_intervals = cohort_intervals 127 | if states is not None: 128 | self.states = states 129 | self.timepoint_count = None 130 | -------------------------------------------------------------------------------- /CHANGELOG.rst: -------------------------------------------------------------------------------- 1 | ChangeLog 2 | =========================== 3 | 4 | PLEASE NOTE THAT THE API OF TRANSITION MATRIX IS STILL UNSTABLE AS MORE USE CASES / FEATURES ARE ADDED REGULARLY 5 | 6 | v0.5.2 (XX-12-2024) 7 | -------------------- 8 | * Documentation: Streamlining visualization workflows (issue #12) 9 | 10 | v0.5.1 (29-09-2023) 11 | -------------------- 12 | * Installation: 13 | * Bump python dependency to 3.10 14 | 15 | v0.5.0 (21-02-2022) 16 | ------------------- 17 | * Installation: 18 | * Bump python dependency to 3.7 19 | * PyPI release update 20 | 21 | v0.4.9 (04-05-2021) 22 | ------------------- 23 | 24 | * Refactoring: All non-core functionality moved to separate directories/sub-packages 25 | * credit curve stuff moved to credit ratings modules 26 | * data generators moved to generators modules 27 | * etc. 28 | * Documentation: Major expansion (Still incomplete) 29 | * Expanded Data Formats 30 | * Rating Scales, CQS etc 31 | * Listing all datasets and examples 32 | * Testing / Training: An interesting use case raised as issue #20 33 | * Added an end-to-end example of estimating a credit rating matrix from raw data 34 | * Includes various data preprocessing examples 35 | * Datasets: 36 | * rating_data.csv (cleaned up credit data) 37 | * synthetic_data10.csv Credit Rating Migrations in Long Format / Compact Form (for testing) 38 | * deterministic generator (replicate given trajectories) 39 | * Tests: 40 | * test_roundtrip.py testing via roundtriping methods 41 | 42 | 43 | v0.4.8 (07-02-2021) 44 | ------------------- 45 | 46 | * Documentation: Pulled all rst files in docs 47 | * Refactoring: credit rating data moved into separate module 48 | 49 | 50 | v0.4.7 (29-09-2020) 51 | ------------------- 52 | 53 | * Documentation: Expanded and updated description of classes 54 | * Documentation: Including Open Risk Academy code examples 55 | * Feature: logarithmic sankey visualization 56 | 57 | v0.4.6 (22-05-2019) 58 | ------------------- 59 | 60 | * Feature: Update of CQS Mappings, addition of new rating scales 61 | * Documentation: Documentation of rating scale structure and mappings 62 | * Training: Example of mapping portfolio data to CQS 63 | 64 | v0.4.5 (21-04-2019) 65 | ------------------- 66 | 67 | * Training: Monthly_from_Annual.ipynb (a Jupyter notebook illustrating how to obtain interpolate transition rates on monthly intervals) 68 | * Datasets: generic_monthly.json 69 | * Feature: print_matrix function for generic matrix pretty printing 70 | * Feature: matrix_exponent function for obtaining arbitrary integral matrices from a given generator 71 | 72 | v0.4.4 (03-04-2019) 73 | ------------------- 74 | 75 | * Documentation: Cleanup of docs following separation of threshold / portfolio models 76 | * Datasets: generic_multiperiod.json 77 | * Feature: CreditCurve class for holding credit curves 78 | 79 | 80 | v0.4.3 (29-03-2019) 81 | ------------------- 82 | 83 | * Refactoring: Significant rearrangement of code (the threshold models package moved to portfolioAnalytics for more consistent structure of the code base / functionality) 84 | 85 | v0.4.2 (29-01-2019) 86 | ------------------- 87 | 88 | * Feature: converter function in transitionMatrix.utils.converters to convert long form dataframes into canonical float form 89 | * Datasets: synthetic_data9.csv (datetime in string format) 90 | * Training: new data generator in examples/generate_synthetic_data.py to generate long format with string dates 91 | * Training: Additional example (=3) in examples/empirical_transition_matrix.py to process long format with string dates 92 | * Documentation: More detailed explanation of Long Data Formats with links to Open Risk Manual 93 | * Documentation: Enabled sphinx.ext.autosectionlabel for easy internal links / removed duplicate labels 94 | 95 | v0.4.1 (31-10-2018) 96 | ------------------- 97 | 98 | * Feature: Added functionality for conditioning multi-period transition matrices 99 | * Training: Example calculation and visualization of conditional matrices 100 | * Datasets: State space description and CGS mappings for top-6 credit rating agencies 101 | 102 | 103 | v0.4.0 (23-10-2018) 104 | ------------------- 105 | 106 | * Installation: First PyPI and wheel installation options 107 | * Feature: Added Aalen-Johansen Duration Estimator 108 | * Documentation: Major overhaul of documentation, now targeting ReadTheDocs distribution 109 | * Training: Streamlining of all examples 110 | * Datasets: Synthetic Datasets in long format 111 | 112 | v0.3.1 (21-09-2018) 113 | ------------------- 114 | 115 | * Feature: Expanded functionality to compute and visualize credit curves 116 | 117 | v0.3 (27-08-2018) 118 | ------------------- 119 | 120 | * Feature: Addition of portfolio models (formerly portfolio_analytics_library) for data generation and testing 121 | * Training: Added examples in jupyter notebook format 122 | 123 | v0.2 (05-06-2018) 124 | ------------------- 125 | 126 | * Feature: Addition of threshold generation algorithms 127 | 128 | v0.1.3 (04-05-2018) 129 | ------------------- 130 | 131 | * Documentation: Sphinx based documentation 132 | * Training: Additional visualization examples 133 | 134 | v0.1.2 (05-12-2017) 135 | ------------------- 136 | 137 | * Refactoring: Dataset paths 138 | * Bugfix: Correcting requirement dependencies (missing matplotlib) 139 | * Documentation: More detailed instructions 140 | 141 | v0.1.1 (03-12-2017) 142 | ------------------- 143 | 144 | * Feature: TransitionMatrix model: new methods to merge States, fix problematic probability matrices, I/O API's 145 | * Feature: TransitionMatrixSet mode: json and csv readers, methods for set-wise manipulations 146 | * Datasets: Additional multiperiod datasets (Standard and Poors historical corporate rating transition rates) 147 | * Feature: Enhanced matrix comparison functionality 148 | * Training: Three additional example workflows 149 | * fixing multiperiod matrices (completing State Space) 150 | * adjusting matrices for withdrawn entries 151 | * generating full multi-period sets from limited observations 152 | 153 | v0.1.0 (11-11-2017) 154 | ------------------- 155 | 156 | * First public release of the package -------------------------------------------------------------------------------- /examples/python/matrix_from_duration_data.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | # (c) 2017-2024 Open Risk, all rights reserved 4 | # 5 | # TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included 6 | # in the source distribution of TransitionMatrix. This is notwithstanding any licenses of 7 | # third-party software included in this distribution. You may not use this file except in 8 | # compliance with the License. 9 | # 10 | # Unless required by applicable law or agreed to in writing, software distributed under 11 | # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 12 | # either express or implied. See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | """ 17 | Example workflows using transitionMatrix to estimate a matrix from duration type data 18 | The datasets are produced in examples/generate_synthetic_data.py 19 | 20 | """ 21 | 22 | import pandas as pd 23 | 24 | import transitionMatrix as tm 25 | from transitionMatrix import source_path 26 | from transitionMatrix.estimators import cohort_estimator as es 27 | from transitionMatrix.utils.converters import datetime_to_float 28 | 29 | dataset_path = source_path + "datasets/" 30 | 31 | # Select the example to run 32 | # 1-> An example with limited data (dataset contains only one entity) 33 | # 2-> A full example with a 2x2 matrix 34 | # 3-> A full example with a 8x8 matrix 35 | 36 | example = 1 37 | 38 | if example == 1: 39 | 40 | # An example with limited data (dataset contains only one entity) 41 | data = pd.read_csv(dataset_path + 'synthetic_data1.csv', dtype={'State': str}) 42 | sorted_data = data.sort_values(['ID', 'Time'], ascending=[True, True]) 43 | myState = tm.StateSpace([('0', "A"), ('1', "B"), ('2', "C"), ('3', "D")]) 44 | print("> Validate data set") 45 | print(myState.validate_dataset(dataset=sorted_data)) 46 | # Bin the data into 5 intervals 47 | cohort_data, cohort_intervals = tm.utils.bin_timestamps(data, cohorts=5) 48 | print("> Cohort intervals: ", cohort_intervals) 49 | print(80 * '=') 50 | print("> Cohort data") 51 | print(cohort_data) 52 | myEstimator = es.CohortEstimator(states=myState, ci={'method': 'goodman', 'alpha': 0.05}) 53 | labels = {'Time': 'Cohort', 'State': 'State', 'ID': 'ID'} 54 | print(80 * '=') 55 | result = myEstimator.fit(cohort_data, labels=labels) 56 | print(80 * '=') 57 | print("> Display results") 58 | myMatrixSet = tm.TransitionMatrixSet(values=result, temporal_type='Incremental') 59 | print(myMatrixSet.temporal_type) 60 | myMatrixSet.print_matrix() 61 | 62 | 63 | elif example == 2: 64 | 65 | # Step 1 66 | # Load the data set into a pandas frame 67 | # Make sure state is read as a string and not as integer 68 | # Second synthetic data example: 69 | # n entities with ~10 observations each, [0,1] state, 50%/50% transition matrix 70 | print("> Step 1: Load the data") 71 | data = pd.read_csv(dataset_path + 'synthetic_data2.csv', dtype={'State': str}) 72 | sorted_data = data.sort_values(['ID', 'Time'], ascending=[True, True]) 73 | print(sorted_data.describe()) 74 | 75 | # Step 2 76 | # Describe and validate the State Space against the data 77 | print("> Step 2: Validate against state space") 78 | myState = tm.StateSpace([('0', "Basic"), ('1', "Default")]) 79 | myState.describe() 80 | print(myState.validate_dataset(dataset=sorted_data)) 81 | 82 | # Step 3 83 | # Arrange the data in period cohorts 84 | print("> Step 3: Arrange the data in period cohorts") 85 | cohort_data, cohort_intervals = tm.utils.bin_timestamps(data, cohorts=5) 86 | 87 | # Step 4 88 | # Estimate matrices using method of choice 89 | # compute confidence interval using goodman method at 95% confidence level 90 | print("> Step 4: Estimate matrices") 91 | myEstimator = es.CohortEstimator(states=myState, ci={'method': 'goodman', 'alpha': 0.05}) 92 | labels = {'Timestamp': 'Cohort', 'State': 'State', 'ID': 'ID'} 93 | result = myEstimator.fit(cohort_data, labels=labels) 94 | 95 | # Step 5 96 | # Print out the set of estimated matrices 97 | print("> Step 5: Display results") 98 | myMatrixSet = tm.TransitionMatrixSet(values=result, temporal_type='Incremental') 99 | print(myMatrixSet.temporal_type) 100 | myMatrixSet.print_matrix() 101 | 102 | 103 | elif example == 3: 104 | 105 | data = pd.read_csv(dataset_path + 'synthetic_data3.csv', dtype={'State': str}) 106 | sorted_data = data.sort_values(['ID', 'Time'], ascending=[True, True]) 107 | myState = tm.StateSpace([('0', "A"), ('1', "B"), ('2', "C"), ('3', "D"), ('4', "E"), ('5', "F"), ('6', "G")]) 108 | print(myState.validate_dataset(dataset=sorted_data)) 109 | cohort_data, cohort_intervals = tm.utils.bin_timestamps(data, cohorts=5) 110 | myEstimator = es.CohortEstimator(states=myState, ci={'method': 'goodman', 'alpha': 0.05}) 111 | labels = {'Time': 'Cohort', 'State': 'State', 'ID': 'ID'} 112 | result = myEstimator.fit(cohort_data, labels=labels) 113 | myMatrixSet = tm.TransitionMatrixSet(values=result, temporal_type='Incremental') 114 | myMatrixSet.print_matrix() 115 | 116 | elif example == 4: 117 | 118 | data = pd.read_csv(dataset_path + 'synthetic_data10.csv', dtype={'State': str}) 119 | sorted_data = data.sort_values(['ID', 'Time'], ascending=[True, True]) 120 | myState = tm.StateSpace(transition_data=sorted_data) 121 | myState.describe() 122 | print(myState.validate_dataset(dataset=sorted_data)) 123 | [start_date, end_date, total_days], data = datetime_to_float(sorted_data) 124 | print(data.head()) 125 | cohort_data, cohort_intervals = tm.utils.bin_timestamps(data, cohorts=6) 126 | myEstimator = es.CohortEstimator(states=myState, ci={'method': 'goodman', 'alpha': 0.05}) 127 | print(cohort_data.head()) 128 | result = myEstimator.fit(cohort_data, labels={'Time': 'Cohort', 'State': 'State', 'ID': 'ID'}) 129 | myMatrix = tm.TransitionMatrix(myEstimator.average_matrix) 130 | myMatrix.print_matrix(accuracy=3) 131 | 132 | 133 | def main(): 134 | print("Done") 135 | 136 | 137 | if __name__ == "__main__": 138 | main() 139 | -------------------------------------------------------------------------------- /examples/python/data_cleaning_example.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | # (c) 2017-2024 Open Risk (https://www.openriskmanagement.com) 4 | # 5 | # TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included 6 | # in the source distribution of TransitionMatrix. This is notwithstanding any licenses of 7 | # third-party software included in this distribution. You may not use this file except in 8 | # compliance with the License. 9 | # 10 | # Unless required by applicable law or agreed to in writing, software distributed under 11 | # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 12 | # either express or implied. See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import pprint as pp 16 | 17 | import pandas as pd 18 | 19 | from transitionMatrix.utils.converters import frame_to_array, datetime_to_float 20 | from transitionMatrix.utils.preprocessing import transitions_summary, validate_absorbing_state 21 | 22 | """ Examples of using transitionMatrix to prepare data sets (data cleansing). The functionality is primarily based on pandas, with transition data specific procedures supported by the utils sub-package. For some operations (and large datasets) it might be advisable to work with numpy arrays 23 | 24 | """ 25 | 26 | # Load the raw data into a pandas frame 27 | raw_data = pd.read_csv('../../datasets/rating_data_raw.csv') 28 | 29 | # Print a generic summary based on pandas describe() method 30 | print(raw_data.describe()) 31 | 32 | # Bring the column names to a standard convention 33 | raw_data.rename(columns={"RatingNum": "State", "Date": "Time", "CustomerId": "ID"}, inplace=True) 34 | 35 | print(raw_data.head()) 36 | 37 | # Print a summary of transition statistics 38 | pp.pprint(transitions_summary(raw_data)) 39 | 40 | # Drop redundant column 41 | raw_data = raw_data.drop(columns=['Rating']) 42 | 43 | # Move the NR column to the end 44 | reorder_dict = { 45 | 0: 8, 46 | 1: 0, 47 | 2: 1, 48 | 3: 2, 49 | 4: 3, 50 | 5: 4, 51 | 6: 5, 52 | 7: 6, 53 | 8: 7 54 | } 55 | raw_data = raw_data.replace({"State": reorder_dict}) 56 | 57 | print(raw_data.head(10)) 58 | 59 | # Convert date strings to floats 60 | [start_date, end_date, total_days], converted_data = datetime_to_float(raw_data, time_column='Time') 61 | print([start_date, end_date, total_days]) 62 | 63 | # NB: In the below the D = 7, NR = 8 special states are hardwired 64 | 65 | # remove an initial observation for an entity if it is classified as D 66 | # Reason: an initial defaulted observation is unusual / non-sensical 67 | rows = [] 68 | entity_id, event_time, entity_state = frame_to_array(converted_data) 69 | for i in range(len(entity_id)): 70 | if entity_id[i - 1] != entity_id[i] and entity_state[i] == 7: 71 | pass 72 | else: 73 | rows.append((entity_id[i], event_time[i], entity_state[i])) 74 | clean_data0 = pd.DataFrame(rows, columns=['ID', 'Time', 'State']) 75 | 76 | # remove an initial observation for an entity if it is classified as NR 77 | # Reason: left truncation of observations must be handled consistently 78 | 79 | rows = [] 80 | entity_id, event_time, entity_state = frame_to_array(clean_data0) 81 | for i in range(len(entity_id)): 82 | if entity_id[i - 1] != entity_id[i] and entity_state[i] == 8: 83 | pass 84 | else: 85 | rows.append((entity_id[i], event_time[i], entity_state[i])) 86 | clean_data1 = pd.DataFrame(rows, columns=['ID', 'Time', 'State']) 87 | 88 | 89 | # remove an intermediate observation for an entity if it is classified as NR 90 | # Reason: it is non-informative and it complicates the handling of NR state (non-absorbing) 91 | rows = [] 92 | entity_id, event_time, entity_state = frame_to_array(clean_data1) 93 | for i in range(len(entity_id) - 1): 94 | if entity_id[i + 1] == entity_id[i] and entity_state[i] == 8 and entity_state[i + 1] != 8: 95 | pass 96 | else: 97 | rows.append((entity_id[i], event_time[i], entity_state[i])) 98 | clean_data2 = pd.DataFrame(rows, columns=['ID', 'Time', 'State']) 99 | 100 | # remove an intermediate observation for an entity if it is classified as D 101 | # Reason: this is (presumably) a 're-emergence from default' type event. complicates the handling of D state (non-absorbing) 102 | rows = [] 103 | entity_id, event_time, entity_state = frame_to_array(clean_data2) 104 | for i in range(len(entity_id) - 1): 105 | if entity_id[i + 1] == entity_id[i] and entity_state[i] == 7 and entity_state[i + 1] != 7: 106 | pass 107 | else: 108 | rows.append((entity_id[i], event_time[i], entity_state[i])) 109 | clean_data3 = pd.DataFrame(rows, columns=['ID', 'Time', 'State']) 110 | 111 | # remove NR observations of defaulted entities 112 | # Reason: non-informative, ensure D is truly an absorbing state 113 | # (NB: the labels 0, 8 are hardwired for this data set) 114 | rows = [] 115 | entity_id, event_time, entity_state = frame_to_array(clean_data3) 116 | 117 | for i in range(len(entity_id)): 118 | if entity_state[i] == 8 and entity_state[i - 1] == 7: 119 | pass 120 | else: 121 | rows.append((entity_id[i], event_time[i], entity_state[i])) 122 | clean_data4 = pd.DataFrame(rows, columns=['ID', 'Time', 'State']) 123 | 124 | # check that NR and D are absorbing states 125 | print(validate_absorbing_state(clean_data4, 7)) 126 | print(validate_absorbing_state(clean_data4, 8)) 127 | 128 | pp.pprint(transitions_summary(clean_data4)) 129 | 130 | # if the first entry is not at the earliest global observation timepoint, add the initial observation 131 | # this assumption removes left truncation condition but may bias the data 132 | # NB: 0.0 is hardwired as left observation window 133 | rows = [] 134 | entity_id, event_time, entity_state = frame_to_array(clean_data4) 135 | for i in range(len(entity_id)): 136 | if entity_id[i - 1] != entity_id[i] and event_time[i] > 0: 137 | rows.append((entity_id[i], event_time[i], entity_state[i])) 138 | rows.append((entity_id[i], 0.0, entity_state[i])) 139 | else: 140 | rows.append((entity_id[i], event_time[i], entity_state[i])) 141 | 142 | clean_data = pd.DataFrame(rows, columns=['ID', 'Time', 'State']) 143 | 144 | # Sort by entity ID, then event Time 145 | sorted_data = clean_data.sort_values(['ID', 'Time'], ascending=[True, True]) 146 | 147 | pp.pprint(transitions_summary(sorted_data)) 148 | sorted_data.to_csv('../../datasets/rating_data.csv', index=False) 149 | -------------------------------------------------------------------------------- /examples/python/matrix_from_cohort_data.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | # (c) 2017-2024 Open Risk, all rights reserved 4 | # 5 | # TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included 6 | # in the source distribution of TransitionMatrix. This is notwithstanding any licenses of 7 | # third-party software included in this distribution. You may not use this file except in 8 | # compliance with the License. 9 | # 10 | # Unless required by applicable law or agreed to in writing, software distributed under 11 | # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 12 | # either express or implied. See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | """ 17 | Example workflows using transitionMatrix to estimate a transition matrix from data in cohort format 18 | 19 | """ 20 | 21 | import pprint as pp 22 | 23 | import pandas as pd 24 | 25 | import transitionMatrix as tm 26 | from transitionMatrix import source_path 27 | from transitionMatrix.creditratings.creditsystems import Generic_SS 28 | from transitionMatrix.estimators import cohort_estimator as es 29 | from transitionMatrix.utils.preprocessing import transitions_summary, unique_timestamps 30 | 31 | dataset_path = source_path + "datasets/" 32 | 33 | # Select the example to run 34 | # 1-> S&P Style Credit Rating Migration Matrix 35 | # 2-> An IFRS 9 Style 3x3 Migration Matrix 36 | # 3-> The Simplest Absorbing Case (for validation) 37 | 38 | example = 3 39 | 40 | if example == 3: 41 | # Example 3: S&P Style Credit Rating Migration Matrix 42 | 43 | # S&P Ratings State Space 44 | # definition = [('0', "AAA"), ('1', "AA"), ('2', "A"), ('3', "BBB"), 45 | # ('4', "BB"), ('5', "B"), ('6', "CCC"), ('7', "D")] 46 | 47 | myState = Generic_SS 48 | 49 | print("> Describe state space") 50 | myState.describe() 51 | print("> List of states") 52 | print(80 * '-') 53 | print(myState.get_states()) 54 | print("> List of state labels") 55 | print(80 * '-') 56 | print(myState.get_state_labels()) 57 | 58 | print("> Load Dataset") 59 | data = pd.read_csv(dataset_path + 'synthetic_data4.csv', dtype={'State': str}) 60 | 61 | print("> Transitions Summary") 62 | print(80 * '-') 63 | pp.pprint(transitions_summary(data)) 64 | 65 | print("> Sort and Validate dataset") 66 | print(80 * '-') 67 | sorted_data = data.sort_values(['ID', 'Time'], ascending=[True, True]) 68 | print(myState.validate_dataset(dataset=sorted_data)) 69 | 70 | # compute confidence interval using goodman method at 95% confidence level 71 | print("> Cohort Estimator") 72 | print(80 * '-') 73 | cohort_bounds = unique_timestamps(sorted_data) 74 | myEstimator = es.CohortEstimator(states=myState, cohort_bounds=cohort_bounds, 75 | ci={'method': 'goodman', 'alpha': 0.05}) 76 | result = myEstimator.fit(sorted_data) 77 | 78 | # Print confidence intervals 79 | print("> Compute confidence interval using goodman method at 95% confidence level") 80 | myEstimator.summary() 81 | 82 | # Print the estimated results 83 | myMatrixSet = tm.TransitionMatrixSet(values=result, temporal_type='Incremental') 84 | # print(myMatrixSet.temporal_type) 85 | print("> Print Estimated Matrix Set") 86 | myMatrixSet.print_matrix() 87 | 88 | elif example == 2: 89 | # Example 2: IFRS 9 Style Migration Matrix 90 | # Format: discrete time grid (already arranged in cohorts) 91 | 92 | # Step 1 93 | # Load the data set into a pandas frame 94 | # Make sure state is read as a string and not as integer 95 | # Fifth synthetic data example: IFRS 9 Migration Matrix 96 | print(">>> Step 1: Data Loading") 97 | data = pd.read_csv(dataset_path + 'synthetic_data5.csv', dtype={'State': str}) 98 | sorted_data = data.sort_values(['ID', 'Time'], ascending=[True, True]) 99 | # Data is a pandas frame, all methods are available 100 | print(sorted_data.describe()) 101 | 102 | # Step 2 103 | # Describe and validate the State Space against the data 104 | # We create a mock IFRS 9 state space (three stage assets) 105 | print(">>> Step 2: Diagnostics") 106 | definition = [('0', "Stage 1"), ('1', "Stage 2"), ('2', "Stage 3")] 107 | myState = tm.StateSpace(definition) 108 | myState.describe() 109 | print(myState.validate_dataset(dataset=sorted_data)) 110 | 111 | # Step 3 112 | # Estimate matrices using method of choice 113 | # compute confidence interval using goodman method at 95% confidence level 114 | print(">>> Step 3: Estimation") 115 | cohort_bounds = unique_timestamps(sorted_data) 116 | myEstimator = es.CohortEstimator(states=myState, cohort_bounds=cohort_bounds, 117 | ci={'method': 'goodman', 'alpha': 0.05}) 118 | # myMatrix = matrix.CohortEstimator(states=myState) 119 | result = myEstimator.fit(sorted_data) 120 | myEstimator.summary() 121 | 122 | print(">>> Step 4: Average Matrix") 123 | print(myEstimator.average_matrix) 124 | 125 | # Step 4 126 | # Review full set of numerical results 127 | print(">>> Step 5") 128 | myMatrixSet = tm.TransitionMatrixSet(values=result, temporal_type='Incremental') 129 | print(myMatrixSet.temporal_type) 130 | myMatrixSet.print_matrix() 131 | 132 | elif example == 1: 133 | # Example 1: Simplest Absorbing Case for validation 134 | data = pd.read_csv(dataset_path + 'synthetic_data6.csv', dtype={'State': str}) 135 | sorted_data = data.sort_values(['ID', 'Time'], ascending=[True, True]) 136 | myState = tm.StateSpace() 137 | myState.generic(2) 138 | print(80 * '-') 139 | print('State Space Validation:') 140 | print(myState.validate_dataset(dataset=sorted_data)) 141 | cohort_bounds = unique_timestamps(sorted_data) 142 | myEstimator = es.CohortEstimator(states=myState, cohort_bounds=cohort_bounds, 143 | ci={'method': 'goodman', 'alpha': 0.05}) 144 | result = myEstimator.fit(sorted_data) 145 | myMatrixSet = tm.TransitionMatrixSet(values=result, temporal_type='Incremental') 146 | print(80 * '-') 147 | print('Sample Estimated Matrix (Count Format, All Cohorts:') 148 | myEstimator.print(select='Counts') 149 | print(80 * '-') 150 | print('Sample Estimated Matrix (Frequency Format, Period 3):') 151 | myEstimator.print(select='Frequencies', period=3) 152 | 153 | 154 | def main(): 155 | print("Done") 156 | 157 | 158 | if __name__ == "__main__": 159 | main() 160 | -------------------------------------------------------------------------------- /docs/source/data_formats.rst: -------------------------------------------------------------------------------- 1 | Input Data Formats 2 | =================== 3 | 4 | The transitionMatrix package supports a variety of input data formats for empirical (observation) data. Two key ones are described here in more detail. More background about data formats is available at the `Open Risk Manual Risk Data Category `_ 5 | 6 | 7 | Long Data Format 8 | ------------------------------------------- 9 | 10 | Long Data Format is a tabular representation of time series data that records the states (measurements) of multiple entities. Its defining characteristic is that each table row contains data pertaining to one entity at one point in time. 11 | 12 | Canonical Form of Long Data 13 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 14 | 15 | The Long Data Format (also Narrow or Stacked) consists of Tuples, e.g. (Entity ID, Time, From State, To State) indicating the time T at which an entity with ID migrated from the (From State) -> to the (To State). 16 | 17 | The *canonical form* used as input to duration based estimators uses normalized timestamps (from 0 to T_max, where T_max is the last timepoint) and looks as follows: 18 | 19 | +----+------+------+----+ 20 | | ID | Time | From | To | 21 | +----+------+------+----+ 22 | | 1 | 1.1 | 0 | 1 | 23 | +----+------+------+----+ 24 | | 1 | 2.0 | 1 | 2 | 25 | +----+------+------+----+ 26 | | 1 | 3.4 | 2 | 3 | 27 | +----+------+------+----+ 28 | | 1 | 4.0 | 3 | 2 | 29 | +----+------+------+----+ 30 | | 2 | 1.2 | 0 | 1 | 31 | +----+------+------+----+ 32 | | 2 | 2.4 | 1 | 2 | 33 | +----+------+------+----+ 34 | | 2 | 3.5 | 2 | 3 | 35 | +----+------+------+----+ 36 | 37 | The canonical form has the advantage of being unambiguous about the context where the transition occurs. The meaning of each row of data stands on its own and does not rely on the order (or even the presence) of other records. This facilitates, for example, the algorithmic processing of the data. On the flipside, the format is less efficient in terms of storage (the state information occurs twice) compared to the compact format (See below). 38 | 39 | The canonical format requires that the final state of all entities at the end of the observation window (Time F) is included (otherwise we have no indication about when the measurements stopped). Alternatively such information is provided as separate metadata (or implicitly, for example if measurements are understood to span a number of full annual periods). 40 | 41 | .. note:: 42 | 43 | Synthetic_data(7, 8, 9) in the :ref:`Datasets` collection are examples of data in long format and canonical form 44 | 45 | String Dates 46 | ~~~~~~~~~~~~~~~~ 47 | 48 | It is frequent that transition data (e.g. from financial applications) have timestamps in the form of a *date string*. For example: 49 | 50 | +----+-------------+------+----+ 51 | | ID | Date String | From | To | 52 | +----+-------------+------+----+ 53 | | 1 | 10-10-2010 | 0 | 1 | 54 | +----+-------------+------+----+ 55 | | 1 | 10-11-2010 | 1 | 2 | 56 | +----+-------------+------+----+ 57 | 58 | String dates must be converted to a numerical representation before we can work with the transition data. transitionMatrix offers the :func:`transitionMatrix.utils.converters.datetime_to_float` function of :mod:`transitionMatrix.utils` subpackage can be used to convert data into the canonical form. 59 | 60 | .. note:: 61 | 62 | Synthetic_data9 and rating_data in the :ref:`Datasets` collection have observation times in string data form. 63 | 64 | 65 | Compact Form of Long Format 66 | ------------------------------------------- 67 | 68 | The format uses triples (ID, Time, State), indicating the time T at which an entity ID **Left** its previous state S (the state it migrates to is encoded in the next observation of the same entity). The convention can obviously be reversed to indicate the time of entering a new state (in which case we need some information to bound the start of the observation window). 69 | 70 | The compact long format avoids the duplication of data of the canonical approach but requires the presence of other records to infer the realised sequence of events. 71 | 72 | The format also requires that the final state of all entities at the end of the observation window (Time F) is included as the last record (otherwise we have no indication about when the measurements stopped). Alternatively such information is provided separately (or implicitly, e.g. if measurements are understood to span a number of full annual periods). 73 | 74 | 75 | +----+--------+-------+ 76 | | ID | Time | State | 77 | +----+--------+-------+ 78 | | 1 | 1.1 | 0 | 79 | +----+--------+-------+ 80 | | 1 | 2.0 | 1 | 81 | +----+--------+-------+ 82 | | 1 | 3.4 | 2 | 83 | +----+--------+-------+ 84 | | 1 | 4.0 | 3 | 85 | +----+--------+-------+ 86 | | 1 | F | 2 | 87 | +----+--------+-------+ 88 | | 2 | 1.2 | 0 | 89 | +----+--------+-------+ 90 | | 2 | 2.4 | 1 | 91 | +----+--------+-------+ 92 | | 2 | 3.5 | 2 | 93 | +----+--------+-------+ 94 | | 2 | F | 3 | 95 | +----+--------+-------+ 96 | 97 | Wide Data Format 98 | ------------------ 99 | 100 | Wide Data Format is an alternative tabular representation of time series data that records the states (measurements) of multiple entities. Its defining characteristic is that each table row contains *all the data* pertaining to any one entity. The measurement times are not arbitrary but encoded in the column labels: 101 | 102 | +----+--------+-------+-------+ 103 | | ID | 2011 | 2012 | 2013 | 104 | +----+--------+-------+-------+ 105 | | A1 | 1 | 0 | 1 | 106 | +----+--------+-------+-------+ 107 | | A2 | 2 | 1 | 3 | 108 | +----+--------+-------+-------+ 109 | | A3 | 0 | 1 | 2 | 110 | +----+--------+-------+-------+ 111 | 112 | Conversion from wide to long formats can be handled using the `pandas wide_to_long method 113 | `_. 114 | 115 | 116 | (This method will be more integrated in the future) 117 | 118 | 119 | Other Formats 120 | ------------------------------------------- 121 | 122 | As mentioned, a design choice is that data ingestion of transitionMatrix is via a pandas dataframe so other formats can be handled with additional code by the user. If there is a format that you repeatedly encounter submit an issue with your desired format / transformation `suggestion `_. -------------------------------------------------------------------------------- /datasets/scenario_data.csv: -------------------------------------------------------------------------------- 1 | ID,Time,State 2 | 0,0,3 3 | 1,0,3 4 | 2,0,3 5 | 3,0,3 6 | 4,0,3 7 | 5,0,3 8 | 6,0,3 9 | 7,0,3 10 | 8,0,3 11 | 9,0,3 12 | 10,0,3 13 | 11,0,3 14 | 12,0,3 15 | 13,0,3 16 | 14,0,3 17 | 15,0,3 18 | 16,0,3 19 | 17,0,3 20 | 18,0,3 21 | 19,0,3 22 | 20,0,3 23 | 21,0,3 24 | 22,0,3 25 | 23,0,3 26 | 24,0,3 27 | 25,0,3 28 | 26,0,3 29 | 27,0,3 30 | 28,0,3 31 | 29,0,3 32 | 30,0,3 33 | 31,0,3 34 | 32,0,3 35 | 33,0,3 36 | 34,0,3 37 | 35,0,3 38 | 36,0,3 39 | 37,0,3 40 | 38,0,3 41 | 39,0,3 42 | 40,0,3 43 | 41,0,3 44 | 42,0,3 45 | 43,0,3 46 | 44,0,3 47 | 45,0,3 48 | 46,0,3 49 | 47,0,3 50 | 48,0,3 51 | 49,0,3 52 | 0,1,4 53 | 1,1,4 54 | 2,1,3 55 | 3,1,4 56 | 4,1,3 57 | 5,1,3 58 | 6,1,3 59 | 7,1,3 60 | 8,1,3 61 | 9,1,3 62 | 10,1,3 63 | 11,1,3 64 | 12,1,5 65 | 13,1,4 66 | 14,1,3 67 | 15,1,3 68 | 16,1,7 69 | 17,1,3 70 | 18,1,3 71 | 19,1,3 72 | 20,1,4 73 | 21,1,3 74 | 22,1,5 75 | 23,1,3 76 | 24,1,3 77 | 25,1,4 78 | 26,1,4 79 | 27,1,3 80 | 28,1,3 81 | 29,1,3 82 | 30,1,4 83 | 31,1,3 84 | 32,1,3 85 | 33,1,3 86 | 34,1,3 87 | 35,1,4 88 | 36,1,3 89 | 37,1,4 90 | 38,1,3 91 | 39,1,3 92 | 40,1,4 93 | 41,1,3 94 | 42,1,3 95 | 43,1,4 96 | 44,1,4 97 | 45,1,4 98 | 46,1,3 99 | 47,1,3 100 | 48,1,3 101 | 49,1,3 102 | 0,2,4 103 | 1,2,4 104 | 2,2,3 105 | 3,2,5 106 | 4,2,3 107 | 5,2,3 108 | 6,2,3 109 | 7,2,3 110 | 8,2,4 111 | 9,2,3 112 | 10,2,3 113 | 11,2,3 114 | 12,2,4 115 | 13,2,4 116 | 14,2,4 117 | 15,2,3 118 | 16,2,7 119 | 17,2,3 120 | 18,2,3 121 | 19,2,3 122 | 20,2,4 123 | 21,2,3 124 | 22,2,4 125 | 23,2,3 126 | 24,2,4 127 | 25,2,5 128 | 26,2,5 129 | 27,2,3 130 | 28,2,3 131 | 29,2,3 132 | 30,2,5 133 | 31,2,3 134 | 32,2,3 135 | 33,2,3 136 | 34,2,3 137 | 35,2,4 138 | 36,2,4 139 | 37,2,4 140 | 38,2,3 141 | 39,2,3 142 | 40,2,4 143 | 41,2,3 144 | 42,2,4 145 | 43,2,5 146 | 44,2,4 147 | 45,2,4 148 | 46,2,3 149 | 47,2,3 150 | 48,2,4 151 | 49,2,4 152 | 0,3,4 153 | 1,3,4 154 | 2,3,3 155 | 3,3,5 156 | 4,3,4 157 | 5,3,3 158 | 6,3,3 159 | 7,3,3 160 | 8,3,5 161 | 9,3,3 162 | 10,3,3 163 | 11,3,3 164 | 12,3,5 165 | 13,3,4 166 | 14,3,4 167 | 15,3,3 168 | 16,3,7 169 | 17,3,3 170 | 18,3,3 171 | 19,3,4 172 | 20,3,5 173 | 21,3,4 174 | 22,3,4 175 | 23,3,4 176 | 24,3,4 177 | 25,3,5 178 | 26,3,5 179 | 27,3,3 180 | 28,3,3 181 | 29,3,3 182 | 30,3,5 183 | 31,3,4 184 | 32,3,3 185 | 33,3,3 186 | 34,3,4 187 | 35,3,4 188 | 36,3,5 189 | 37,3,4 190 | 38,3,3 191 | 39,3,3 192 | 40,3,5 193 | 41,3,3 194 | 42,3,4 195 | 43,3,6 196 | 44,3,4 197 | 45,3,5 198 | 46,3,3 199 | 47,3,3 200 | 48,3,4 201 | 49,3,5 202 | 0,4,4 203 | 1,4,4 204 | 2,4,3 205 | 3,4,5 206 | 4,4,4 207 | 5,4,3 208 | 6,4,3 209 | 7,4,4 210 | 8,4,5 211 | 9,4,4 212 | 10,4,3 213 | 11,4,3 214 | 12,4,5 215 | 13,4,4 216 | 14,4,4 217 | 15,4,3 218 | 16,4,7 219 | 17,4,4 220 | 18,4,4 221 | 19,4,4 222 | 20,4,6 223 | 21,4,4 224 | 22,4,5 225 | 23,4,4 226 | 24,4,4 227 | 25,4,5 228 | 26,4,4 229 | 27,4,3 230 | 28,4,3 231 | 29,4,3 232 | 30,4,7 233 | 31,4,4 234 | 32,4,4 235 | 33,4,3 236 | 34,4,4 237 | 35,4,4 238 | 36,4,6 239 | 37,4,4 240 | 38,4,4 241 | 39,4,3 242 | 40,4,5 243 | 41,4,3 244 | 42,4,4 245 | 43,4,7 246 | 44,4,4 247 | 45,4,5 248 | 46,4,3 249 | 47,4,3 250 | 48,4,5 251 | 49,4,5 252 | 0,5,5 253 | 1,5,4 254 | 2,5,3 255 | 3,5,6 256 | 4,5,4 257 | 5,5,3 258 | 6,5,4 259 | 7,5,4 260 | 8,5,5 261 | 9,5,4 262 | 10,5,3 263 | 11,5,4 264 | 12,5,5 265 | 13,5,4 266 | 14,5,4 267 | 15,5,4 268 | 16,5,7 269 | 17,5,4 270 | 18,5,4 271 | 19,5,5 272 | 20,5,4 273 | 21,5,4 274 | 22,5,5 275 | 23,5,4 276 | 24,5,4 277 | 25,5,5 278 | 26,5,5 279 | 27,5,3 280 | 28,5,3 281 | 29,5,4 282 | 30,5,7 283 | 31,5,4 284 | 32,5,4 285 | 33,5,3 286 | 34,5,4 287 | 35,5,4 288 | 36,5,6 289 | 37,5,5 290 | 38,5,4 291 | 39,5,3 292 | 40,5,5 293 | 41,5,3 294 | 42,5,4 295 | 43,5,7 296 | 44,5,4 297 | 45,5,6 298 | 46,5,3 299 | 47,5,3 300 | 48,5,5 301 | 49,5,5 302 | 0,6,5 303 | 1,6,4 304 | 2,6,3 305 | 3,6,7 306 | 4,6,5 307 | 5,6,3 308 | 6,6,4 309 | 7,6,4 310 | 8,6,5 311 | 9,6,4 312 | 10,6,3 313 | 11,6,4 314 | 12,6,7 315 | 13,6,4 316 | 14,6,4 317 | 15,6,4 318 | 16,6,7 319 | 17,6,4 320 | 18,6,4 321 | 19,6,5 322 | 20,6,5 323 | 21,6,4 324 | 22,6,4 325 | 23,6,4 326 | 24,6,4 327 | 25,6,6 328 | 26,6,5 329 | 27,6,3 330 | 28,6,3 331 | 29,6,4 332 | 30,6,7 333 | 31,6,4 334 | 32,6,4 335 | 33,6,3 336 | 34,6,4 337 | 35,6,4 338 | 36,6,7 339 | 37,6,5 340 | 38,6,4 341 | 39,6,4 342 | 40,6,4 343 | 41,6,3 344 | 42,6,4 345 | 43,6,7 346 | 44,6,5 347 | 45,6,5 348 | 46,6,3 349 | 47,6,3 350 | 48,6,4 351 | 49,6,5 352 | 0,7,4 353 | 1,7,4 354 | 2,7,4 355 | 3,7,7 356 | 4,7,4 357 | 5,7,3 358 | 6,7,4 359 | 7,7,4 360 | 8,7,5 361 | 9,7,4 362 | 10,7,3 363 | 11,7,4 364 | 12,7,7 365 | 13,7,4 366 | 14,7,4 367 | 15,7,4 368 | 16,7,7 369 | 17,7,4 370 | 18,7,4 371 | 19,7,5 372 | 20,7,6 373 | 21,7,4 374 | 22,7,4 375 | 23,7,5 376 | 24,7,4 377 | 25,7,6 378 | 26,7,5 379 | 27,7,3 380 | 28,7,3 381 | 29,7,4 382 | 30,7,7 383 | 31,7,4 384 | 32,7,4 385 | 33,7,3 386 | 34,7,4 387 | 35,7,4 388 | 36,7,7 389 | 37,7,5 390 | 38,7,4 391 | 39,7,4 392 | 40,7,4 393 | 41,7,3 394 | 42,7,4 395 | 43,7,7 396 | 44,7,4 397 | 45,7,5 398 | 46,7,3 399 | 47,7,3 400 | 48,7,7 401 | 49,7,5 402 | 0,8,4 403 | 1,8,4 404 | 2,8,4 405 | 3,8,7 406 | 4,8,5 407 | 5,8,3 408 | 6,8,4 409 | 7,8,4 410 | 8,8,7 411 | 9,8,4 412 | 10,8,3 413 | 11,8,4 414 | 12,8,7 415 | 13,8,5 416 | 14,8,4 417 | 15,8,4 418 | 16,8,7 419 | 17,8,4 420 | 18,8,4 421 | 19,8,5 422 | 20,8,7 423 | 21,8,5 424 | 22,8,4 425 | 23,8,5 426 | 24,8,4 427 | 25,8,6 428 | 26,8,5 429 | 27,8,3 430 | 28,8,3 431 | 29,8,4 432 | 30,8,7 433 | 31,8,5 434 | 32,8,4 435 | 33,8,4 436 | 34,8,5 437 | 35,8,4 438 | 36,8,7 439 | 37,8,5 440 | 38,8,4 441 | 39,8,4 442 | 40,8,4 443 | 41,8,3 444 | 42,8,4 445 | 43,8,7 446 | 44,8,5 447 | 45,8,5 448 | 46,8,3 449 | 47,8,3 450 | 48,8,7 451 | 49,8,5 452 | 0,9,4 453 | 1,9,4 454 | 2,9,4 455 | 3,9,7 456 | 4,9,5 457 | 5,9,3 458 | 6,9,4 459 | 7,9,4 460 | 8,9,7 461 | 9,9,4 462 | 10,9,3 463 | 11,9,4 464 | 12,9,7 465 | 13,9,5 466 | 14,9,4 467 | 15,9,5 468 | 16,9,7 469 | 17,9,4 470 | 18,9,4 471 | 19,9,6 472 | 20,9,7 473 | 21,9,5 474 | 22,9,4 475 | 23,9,5 476 | 24,9,4 477 | 25,9,5 478 | 26,9,5 479 | 27,9,3 480 | 28,9,3 481 | 29,9,4 482 | 30,9,7 483 | 31,9,4 484 | 32,9,5 485 | 33,9,4 486 | 34,9,6 487 | 35,9,4 488 | 36,9,7 489 | 37,9,6 490 | 38,9,4 491 | 39,9,5 492 | 40,9,4 493 | 41,9,3 494 | 42,9,4 495 | 43,9,7 496 | 44,9,4 497 | 45,9,5 498 | 46,9,3 499 | 47,9,3 500 | 48,9,7 501 | 49,9,5 502 | 0,10,5 503 | 1,10,4 504 | 2,10,4 505 | 3,10,7 506 | 4,10,7 507 | 5,10,3 508 | 6,10,4 509 | 7,10,4 510 | 8,10,7 511 | 9,10,4 512 | 10,10,3 513 | 11,10,4 514 | 12,10,7 515 | 13,10,7 516 | 14,10,4 517 | 15,10,5 518 | 16,10,7 519 | 17,10,4 520 | 18,10,4 521 | 19,10,6 522 | 20,10,7 523 | 21,10,5 524 | 22,10,4 525 | 23,10,5 526 | 24,10,4 527 | 25,10,5 528 | 26,10,5 529 | 27,10,4 530 | 28,10,4 531 | 29,10,4 532 | 30,10,7 533 | 31,10,4 534 | 32,10,4 535 | 33,10,4 536 | 34,10,5 537 | 35,10,4 538 | 36,10,7 539 | 37,10,7 540 | 38,10,4 541 | 39,10,5 542 | 40,10,5 543 | 41,10,4 544 | 42,10,4 545 | 43,10,7 546 | 44,10,5 547 | 45,10,7 548 | 46,10,3 549 | 47,10,3 550 | 48,10,7 551 | 49,10,5 552 | -------------------------------------------------------------------------------- /transitionMatrix/creditratings/creditcurve.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | # (c) 2017-2024 Open Risk (https://www.openriskmanagement.com) 4 | # 5 | # TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included 6 | # in the source distribution of TransitionMatrix. This is notwithstanding any licenses of 7 | # third-party software included in this distribution. You may not use this file except in 8 | # compliance with the License. 9 | # 10 | # Unless required by applicable law or agreed to in writing, software distributed under 11 | # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 12 | # either express or implied. See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """ This module provides objects related to credit curves 16 | 17 | * CreditCurve_ implements the functionality of a collection of credit (default curves) 18 | 19 | """ 20 | 21 | import numpy as np 22 | import pandas as pd 23 | 24 | 25 | class CreditCurve(np.matrix): 26 | """ The _`CreditCurve` object implements a typical collection of `credit curves `_. 27 | The class inherits from numpy matrices and implements additional properties specific to curves. 28 | 29 | """ 30 | 31 | def __new__(cls, values=None, json_file=None, csv_file=None): 32 | """ Create a new credit curve set. Different options for initialization are: 33 | 34 | * providing values as a list of list 35 | * providing values as a numpy array (The rows are the different curves, the columns are different periods) 36 | * loading from a csv file 37 | * loading from a json file 38 | 39 | Without data, a default identity matrix is generated with user specified dimension 40 | 41 | :param values: initialization values 42 | :param json_file: a json file containing transition matrix data 43 | :param csv_file: a csv file containing transition matrix data 44 | :type values: list of lists or numpy array 45 | :returns: returns a CreditCurve object 46 | :rtype: object 47 | 48 | .. note:: The initialization in itself does not validate if the provided values form indeed a credit curve 49 | 50 | :Example: 51 | 52 | .. code-block:: python 53 | 54 | A = tm.CreditCurve(values=[[0.1, 0.2, 0.3], [0.2, 0.6, 0.8], [0.01, 0.02, 0.06]]) 55 | 56 | """ 57 | obj = None 58 | if values is not None: 59 | # Initialize with given values 60 | obj = np.asarray(values).view(cls) 61 | elif json_file is not None: 62 | # Initialize from file in json format 63 | q = pd.read_json(json_file) 64 | obj = np.asarray(q.values).view(cls) 65 | elif csv_file is not None: 66 | # Initialize from file in csv format 67 | q = pd.read_csv(csv_file, index_col=None) 68 | obj = np.asarray(q.values).view(cls) 69 | # validation flag is set to False at initialization 70 | obj.validated = False 71 | # temporary dimension assignment (must validated for squareness) 72 | obj.dimension = obj.shape[0] 73 | return obj 74 | 75 | def to_json(self, file): 76 | """ 77 | Write credit curves to file in json format 78 | 79 | :param file: json filename 80 | """ 81 | 82 | q = pd.DataFrame(self) 83 | q.to_json(file, orient='values') 84 | 85 | def to_csv(self, file): 86 | """ 87 | Write credit curves to file in csv format 88 | 89 | :param file: csv filename 90 | """ 91 | 92 | q = pd.DataFrame(self) 93 | q.to_csv(file, index=False) 94 | 95 | def to_html(self, file=None): 96 | html_table = pd.DataFrame(self).to_html() 97 | if file is not None: 98 | file = open(file, 'w') 99 | file.write(html_table) 100 | file.close() 101 | return html_table 102 | 103 | def validate(self, accuracy=1e-3): 104 | """ Validate required properties of a credit curve set. The following are checked 105 | 106 | 1. check that all values are probabilities (between 0 and 1) 107 | 2. check that values are non-decreasing 108 | 109 | :param accuracy: accuracy level to use for validation 110 | :type accuracy: float 111 | 112 | :returns: List of tuples with validation messages 113 | """ 114 | validation_messages = [] 115 | 116 | curve_set = self 117 | curve_set_size = curve_set.shape[0] 118 | curve_set_periods = curve_set.shape[1] 119 | 120 | # checking that values of curve_set are within allowed range 121 | for i in range(curve_set_size): 122 | for j in range(curve_set_periods): 123 | if curve_set[i, j] < 0: 124 | validation_messages.append(("Negative Probabilities: ", (i, j, curve_set[i, j]))) 125 | if curve_set[i, j] > 1: 126 | validation_messages.append(("Probabilities Larger than 1: ", (i, j, curve_set[i, j]))) 127 | # checking monotonicity 128 | for i in range(curve_set_size): 129 | for j in range(1, curve_set_periods): 130 | if curve_set[i, j] < curve_set[i, j - 1]: 131 | validation_messages.append(("Curve not monotonic: ", (i, j))) 132 | 133 | if len(validation_messages) == 0: 134 | self.validated = True 135 | return self.validated 136 | else: 137 | self.validated = False 138 | return validation_messages 139 | 140 | def hazard_curve(self): 141 | """ Compute hazard rates 142 | 143 | .. Todo:: Compute hazard rates 144 | 145 | :return: TODO 146 | 147 | """ 148 | pass 149 | 150 | def characterize(self): 151 | """ Analyse or classify a credit curve according to its properties 152 | 153 | * slope of hazard rate 154 | 155 | .. Todo:: Further characterization 156 | 157 | """ 158 | 159 | pass 160 | 161 | def print_curve(self, format_type='Standard', accuracy=2): 162 | """ Pretty print a set of credit curves 163 | 164 | :param format_type: formatting options (Standard, Percent) 165 | :type format_type: str 166 | :param accuracy: number of decimals to display 167 | :type accuracy: int 168 | 169 | """ 170 | for s_in in range(self.shape[0]): 171 | for s_out in range(self.shape[1]): 172 | if format_type == 'Standard': 173 | format_string = "{0:." + str(accuracy) + "f}" 174 | print(format_string.format(self[s_in, s_out]) + ' ', end='') 175 | elif format_type == 'Percent': 176 | print("{0:.2f}%".format(100 * self[s_in, s_out]) + ' ', end='') 177 | print('') 178 | print('') 179 | --------------------------------------------------------------------------------