├── tests ├── __init__.py ├── __main__.py ├── conftest.py ├── utils │ ├── test_btree.py │ └── test_concordance.py └── test_generate_datasets.py ├── docs ├── docs_requirements.txt ├── _static │ └── custom.css ├── images │ ├── badfit.png │ ├── goodfit.png │ ├── qq_plot.png │ ├── flat_plot.png │ ├── kmf_mcas.png │ ├── add_at_risk.png │ ├── ci_show_plot.png │ ├── coxph_plot.png │ ├── normal_plot.png │ ├── invert_y_axis.png │ ├── lcd_parametric.png │ ├── lifetimes_mcas.png │ ├── lls_democracy.png │ ├── quickstart_aaf.png │ ├── quickstart_kmf.png │ ├── lls_regime_type.png │ ├── quickstart_multi.png │ ├── show_censors_plot.png │ ├── survival_weibull.png │ ├── lifelines_intro_lcd.png │ ├── quickstart_kmf_cdf.png │ ├── weibull_aft_forest.png │ ├── coxph_plot_quickstart.png │ ├── quickstart_predict_aaf.png │ ├── single_at_risk_plots.png │ ├── waft_plot_quickstart.png │ ├── weibull_aft_two_models.png │ ├── weibull_extrapolation.png │ ├── survival_regression_aaf.png │ ├── coxph_plot_covarite_groups.png │ ├── lifelines_intro_all_regimes.png │ ├── lifelines_intro_kmf_curve.png │ ├── lifelines_intro_kmf_fitter.png │ ├── lifelines_intro_naf_fitter.png │ ├── survival_regression_harper.png │ ├── waltons_cumulative_hazard.png │ ├── waltons_survival_function.png │ ├── lifelines_intro_multi_kmf_fitter.png │ ├── lifelines_intro_naf_fitter_multi.png │ ├── lifelines_intro_naf_smooth_multi.png │ ├── survival_regression_conditioning.png │ ├── lifelines_intro_multi_kmf_fitter_2.png │ ├── lifelines_intro_naf_smooth_multi_2.png │ ├── survival_analysis_intro_censoring.png │ ├── weibull_aft_two_models_side_by_side.png │ ├── survival_analysis_intro_censoring_revealed.png │ └── survival_regression_conditioning_with_median.png ├── lifelines.utils.rst ├── lifelines.datasets.rst ├── lifelines.plotting.rst ├── lifelines.statistics.rst ├── References.rst ├── index.rst ├── lifelines.fitters.rst ├── Makefile ├── Survival Analysis intro.rst └── conf.py ├── .coveragerc ├── reqs ├── travis-requirements.txt ├── base-requirements.txt ├── docs-requirements.txt └── dev-requirements.txt ├── lifelines ├── version.py ├── datasets │ ├── static_test.csv │ ├── holly_molly_polly.tsv │ ├── psychiatric_patients.csv │ ├── panel_test.csv │ ├── gehan.dat │ ├── g3.csv │ ├── anderson.csv │ ├── lymphoma.csv │ ├── larynx.csv │ ├── multicenter_aids_cohort.tsv │ ├── CuZn-LeftCensoredDataset.csv │ ├── dfcv_dataset.py │ ├── waltons_dataset.csv │ ├── nh4.csv │ ├── regression.csv │ ├── stanford_heart.csv │ ├── lung.csv │ └── rossi.csv ├── utils │ ├── logsf.py │ ├── lowess.py │ ├── gamma.py │ └── btree.py ├── __init__.py └── fitters │ ├── exponential_fitter.py │ ├── log_normal_fitter.py │ ├── log_logistic_fitter.py │ ├── weibull_fitter.py │ ├── breslow_fleming_harrington_fitter.py │ ├── piecewise_exponential_fitter.py │ ├── weibull_aft_fitter.py │ ├── log_logistic_aft_fitter.py │ └── log_normal_aft_fitter.py ├── MANIFEST.in ├── .gitignore ├── examples └── README.md ├── .pre-commit-config.yaml ├── perf_tests ├── aaf_perf_test.py ├── cp_perf_test.py ├── lognormal_perf_test.py ├── ctv_perf_test.py ├── weibull_aft_perf.py ├── weibull_perf_test.py └── batch_vs_single.py ├── Makefile ├── .travis.yml ├── experiments ├── aalen_and_cook_simulation.py ├── detection_limits.py └── left_censoring_experiments.py ├── .prospector.yaml ├── LICENSE ├── setup.py ├── CONTRIBUTING.md ├── README.md └── paper ├── paper.md └── paper.bib /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/docs_requirements.txt: -------------------------------------------------------------------------------- 1 | -r ../reqs/docs-requirements.txt -------------------------------------------------------------------------------- /docs/_static/custom.css: -------------------------------------------------------------------------------- 1 | .wy-nav-content { 2 | max-width: 900px !important; 3 | } 4 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | # .coveragerc to control coverage.py 2 | [run] 3 | omit = 4 | lifelines/plotting.py 5 | -------------------------------------------------------------------------------- /docs/images/badfit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/canyon289/lifelines/master/docs/images/badfit.png -------------------------------------------------------------------------------- /docs/images/goodfit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/canyon289/lifelines/master/docs/images/goodfit.png -------------------------------------------------------------------------------- /docs/images/qq_plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/canyon289/lifelines/master/docs/images/qq_plot.png -------------------------------------------------------------------------------- /docs/images/flat_plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/canyon289/lifelines/master/docs/images/flat_plot.png -------------------------------------------------------------------------------- /docs/images/kmf_mcas.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/canyon289/lifelines/master/docs/images/kmf_mcas.png -------------------------------------------------------------------------------- /docs/images/add_at_risk.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/canyon289/lifelines/master/docs/images/add_at_risk.png -------------------------------------------------------------------------------- /docs/images/ci_show_plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/canyon289/lifelines/master/docs/images/ci_show_plot.png -------------------------------------------------------------------------------- /docs/images/coxph_plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/canyon289/lifelines/master/docs/images/coxph_plot.png -------------------------------------------------------------------------------- /docs/images/normal_plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/canyon289/lifelines/master/docs/images/normal_plot.png -------------------------------------------------------------------------------- /docs/images/invert_y_axis.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/canyon289/lifelines/master/docs/images/invert_y_axis.png -------------------------------------------------------------------------------- /docs/images/lcd_parametric.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/canyon289/lifelines/master/docs/images/lcd_parametric.png -------------------------------------------------------------------------------- /docs/images/lifetimes_mcas.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/canyon289/lifelines/master/docs/images/lifetimes_mcas.png -------------------------------------------------------------------------------- /docs/images/lls_democracy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/canyon289/lifelines/master/docs/images/lls_democracy.png -------------------------------------------------------------------------------- /docs/images/quickstart_aaf.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/canyon289/lifelines/master/docs/images/quickstart_aaf.png -------------------------------------------------------------------------------- /docs/images/quickstart_kmf.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/canyon289/lifelines/master/docs/images/quickstart_kmf.png -------------------------------------------------------------------------------- /docs/images/lls_regime_type.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/canyon289/lifelines/master/docs/images/lls_regime_type.png -------------------------------------------------------------------------------- /docs/images/quickstart_multi.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/canyon289/lifelines/master/docs/images/quickstart_multi.png -------------------------------------------------------------------------------- /docs/images/show_censors_plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/canyon289/lifelines/master/docs/images/show_censors_plot.png -------------------------------------------------------------------------------- /docs/images/survival_weibull.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/canyon289/lifelines/master/docs/images/survival_weibull.png -------------------------------------------------------------------------------- /reqs/travis-requirements.txt: -------------------------------------------------------------------------------- 1 | python-coveralls 2 | seaborn 3 | pytest-travis-fold 4 | dill 5 | -r dev-requirements.txt 6 | -------------------------------------------------------------------------------- /docs/images/lifelines_intro_lcd.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/canyon289/lifelines/master/docs/images/lifelines_intro_lcd.png -------------------------------------------------------------------------------- /docs/images/quickstart_kmf_cdf.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/canyon289/lifelines/master/docs/images/quickstart_kmf_cdf.png -------------------------------------------------------------------------------- /docs/images/weibull_aft_forest.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/canyon289/lifelines/master/docs/images/weibull_aft_forest.png -------------------------------------------------------------------------------- /lifelines/version.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import unicode_literals 3 | 4 | __version__ = "0.21.0" 5 | -------------------------------------------------------------------------------- /docs/images/coxph_plot_quickstart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/canyon289/lifelines/master/docs/images/coxph_plot_quickstart.png -------------------------------------------------------------------------------- /docs/images/quickstart_predict_aaf.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/canyon289/lifelines/master/docs/images/quickstart_predict_aaf.png -------------------------------------------------------------------------------- /docs/images/single_at_risk_plots.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/canyon289/lifelines/master/docs/images/single_at_risk_plots.png -------------------------------------------------------------------------------- /docs/images/waft_plot_quickstart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/canyon289/lifelines/master/docs/images/waft_plot_quickstart.png -------------------------------------------------------------------------------- /docs/images/weibull_aft_two_models.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/canyon289/lifelines/master/docs/images/weibull_aft_two_models.png -------------------------------------------------------------------------------- /docs/images/weibull_extrapolation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/canyon289/lifelines/master/docs/images/weibull_extrapolation.png -------------------------------------------------------------------------------- /docs/images/survival_regression_aaf.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/canyon289/lifelines/master/docs/images/survival_regression_aaf.png -------------------------------------------------------------------------------- /reqs/base-requirements.txt: -------------------------------------------------------------------------------- 1 | numpy>=1.14.0 2 | scipy>=1.0 3 | pandas>=0.23.0 4 | matplotlib>=3.0 5 | bottleneck>=1.0 6 | autograd>=1.2 7 | -------------------------------------------------------------------------------- /docs/images/coxph_plot_covarite_groups.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/canyon289/lifelines/master/docs/images/coxph_plot_covarite_groups.png -------------------------------------------------------------------------------- /docs/images/lifelines_intro_all_regimes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/canyon289/lifelines/master/docs/images/lifelines_intro_all_regimes.png -------------------------------------------------------------------------------- /docs/images/lifelines_intro_kmf_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/canyon289/lifelines/master/docs/images/lifelines_intro_kmf_curve.png -------------------------------------------------------------------------------- /docs/images/lifelines_intro_kmf_fitter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/canyon289/lifelines/master/docs/images/lifelines_intro_kmf_fitter.png -------------------------------------------------------------------------------- /docs/images/lifelines_intro_naf_fitter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/canyon289/lifelines/master/docs/images/lifelines_intro_naf_fitter.png -------------------------------------------------------------------------------- /docs/images/survival_regression_harper.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/canyon289/lifelines/master/docs/images/survival_regression_harper.png -------------------------------------------------------------------------------- /docs/images/waltons_cumulative_hazard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/canyon289/lifelines/master/docs/images/waltons_cumulative_hazard.png -------------------------------------------------------------------------------- /docs/images/waltons_survival_function.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/canyon289/lifelines/master/docs/images/waltons_survival_function.png -------------------------------------------------------------------------------- /reqs/docs-requirements.txt: -------------------------------------------------------------------------------- 1 | -r dev-requirements.txt 2 | sphinx 3 | sphinx_rtd_theme 4 | nbsphinx 5 | jupyter_client 6 | nbconvert!=5.4 7 | ipykernel -------------------------------------------------------------------------------- /docs/images/lifelines_intro_multi_kmf_fitter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/canyon289/lifelines/master/docs/images/lifelines_intro_multi_kmf_fitter.png -------------------------------------------------------------------------------- /docs/images/lifelines_intro_naf_fitter_multi.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/canyon289/lifelines/master/docs/images/lifelines_intro_naf_fitter_multi.png -------------------------------------------------------------------------------- /docs/images/lifelines_intro_naf_smooth_multi.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/canyon289/lifelines/master/docs/images/lifelines_intro_naf_smooth_multi.png -------------------------------------------------------------------------------- /docs/images/survival_regression_conditioning.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/canyon289/lifelines/master/docs/images/survival_regression_conditioning.png -------------------------------------------------------------------------------- /docs/images/lifelines_intro_multi_kmf_fitter_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/canyon289/lifelines/master/docs/images/lifelines_intro_multi_kmf_fitter_2.png -------------------------------------------------------------------------------- /docs/images/lifelines_intro_naf_smooth_multi_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/canyon289/lifelines/master/docs/images/lifelines_intro_naf_smooth_multi_2.png -------------------------------------------------------------------------------- /docs/images/survival_analysis_intro_censoring.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/canyon289/lifelines/master/docs/images/survival_analysis_intro_censoring.png -------------------------------------------------------------------------------- /docs/images/weibull_aft_two_models_side_by_side.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/canyon289/lifelines/master/docs/images/weibull_aft_two_models_side_by_side.png -------------------------------------------------------------------------------- /lifelines/datasets/static_test.csv: -------------------------------------------------------------------------------- 1 | id,t,E,var1,var2 2 | 1,4,1,-1,-1 3 | 2,3,1,-2,-2 4 | 3,3,0,-3,-3 5 | 4,4,1,-4,-4 6 | 5,2,1,-5,-5 7 | 6,0,1,-6,-6 8 | 7,2,1,-7,-7 9 | -------------------------------------------------------------------------------- /docs/images/survival_analysis_intro_censoring_revealed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/canyon289/lifelines/master/docs/images/survival_analysis_intro_censoring_revealed.png -------------------------------------------------------------------------------- /docs/images/survival_regression_conditioning_with_median.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/canyon289/lifelines/master/docs/images/survival_regression_conditioning_with_median.png -------------------------------------------------------------------------------- /docs/lifelines.utils.rst: -------------------------------------------------------------------------------- 1 | lifelines.utils 2 | =============== 3 | 4 | .. automodule:: lifelines.utils 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/lifelines.datasets.rst: -------------------------------------------------------------------------------- 1 | lifelines.datasets 2 | ================== 3 | 4 | 5 | .. automodule:: lifelines.datasets 6 | :members: 7 | :undoc-members: 8 | :show-inheritance: 9 | -------------------------------------------------------------------------------- /docs/lifelines.plotting.rst: -------------------------------------------------------------------------------- 1 | lifelines.plotting 2 | ================== 3 | 4 | 5 | .. automodule:: lifelines.plotting 6 | :members: 7 | :undoc-members: 8 | :show-inheritance: 9 | -------------------------------------------------------------------------------- /docs/lifelines.statistics.rst: -------------------------------------------------------------------------------- 1 | lifelines.statistics 2 | ====================== 3 | 4 | 5 | .. automodule:: lifelines.statistics 6 | :members: 7 | :undoc-members: 8 | :show-inheritance: 9 | -------------------------------------------------------------------------------- /tests/__main__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import sys 3 | import pytest 4 | 5 | 6 | if __name__ == "__main__": 7 | # Exit with correct code 8 | sys.exit(pytest.main(["--pyargs", "lifelines.tests"] + sys.argv[1:])) 9 | -------------------------------------------------------------------------------- /lifelines/datasets/holly_molly_polly.tsv: -------------------------------------------------------------------------------- 1 | ID Status Stratum Start(days) Stop(days) tx T 2 | 0 M 1 1 0 100 1 100 3 | 1 M 1 2 100 105 1 5 4 | 2 H 1 1 0 30 0 30 5 | 3 H 1 2 30 50 0 20 6 | 4 P 1 1 0 20 0 20 7 | 5 P 1 2 20 60 0 40 8 | 6 P 1 3 60 85 0 25 9 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.md 2 | include LICENSE 3 | include MANIFEST.in 4 | 5 | include *.ipynb 6 | 7 | recursive-include lifelines * 8 | recursive-include datasets * 9 | recursive-include styles * 10 | 11 | recursive-exclude * *.py[co] 12 | -------------------------------------------------------------------------------- /docs/References.rst: -------------------------------------------------------------------------------- 1 | Reference library for *lifelines* 2 | ================================== 3 | 4 | .. toctree:: 5 | 6 | lifelines.fitters 7 | lifelines.utils 8 | lifelines.statistics 9 | lifelines.plotting 10 | lifelines.datasets 11 | -------------------------------------------------------------------------------- /reqs/dev-requirements.txt: -------------------------------------------------------------------------------- 1 | -r base-requirements.txt 2 | # installs lifelines as editable dependency in develop mode 3 | -e . 4 | pytest>=3.6 5 | pytest-icdiff;python_version > '3.5' 6 | coverage>=4.4 7 | pytest-cov 8 | pypandoc 9 | prospector[with_pyroma] 10 | pre-commit 11 | black;python_version > '3.5' 12 | dill 13 | statsmodels 14 | flaky 15 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.so 3 | build 4 | dist 5 | *.egg-info 6 | *.iml 7 | 8 | # IDE files 9 | \.vscode/ 10 | 11 | # testing artifacts 12 | \.coverage 13 | \.pytest_cache/ 14 | 15 | # pipenv files 16 | Pipfile 17 | Pipfile.lock 18 | 19 | # Pyenv 20 | .python-version 21 | 22 | # Jupyter 23 | .ipynb_checkpoints 24 | 25 | # docs artifacts 26 | docs/_build/ 27 | 28 | # asv builds 29 | \.asv/ 30 | -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- 1 | ### Examples 2 | 3 | In this folder are some examples of lifelines usage, some with and some without comments and context. You can see some common patterns using lifelines and survival analysis. 4 | 5 | 6 | #### Other examples 7 | 8 | - [nice tutorial to lifelines and survival analysis](https://github.com/chrisluedtke/data-science-journal/blob/master/07-Advanced-Regression/02_Survival_Analysis.ipynb) 9 | -------------------------------------------------------------------------------- /lifelines/datasets/psychiatric_patients.csv: -------------------------------------------------------------------------------- 1 | Age,T,C,sex 2 | 51,1,1,2 3 | 58,1,1,2 4 | 55,2,1,2 5 | 28,22,1,2 6 | 21,30,0,1 7 | 19,28,1,1 8 | 25,32,1,2 9 | 48,11,1,2 10 | 47,14,1,2 11 | 25,36,0,2 12 | 31,31,0,2 13 | 24,33,0,1 14 | 25,33,0,1 15 | 30,37,0,2 16 | 33,35,0,2 17 | 36,25,1,1 18 | 30,31,0,1 19 | 41,22,1,1 20 | 43,26,1,2 21 | 45,24,1,2 22 | 35,35,0,2 23 | 29,34,0,1 24 | 35,30,0,1 25 | 32,35,1,1 26 | 36,40,1,2 27 | 32,39,0,1 28 | -------------------------------------------------------------------------------- /lifelines/datasets/panel_test.csv: -------------------------------------------------------------------------------- 1 | id,t,E,var1,var2 2 | 1,1,0,0,1 3 | 1,2,0,0,1 4 | 1,3,0,4,3 5 | 1,4,1,8,4 6 | 2,1,0,1.2,1 7 | 2,2,0,1.2,2 8 | 2,3,0,1.2,2 9 | 3,1,0,0,1 10 | 3,2,1,1,2 11 | 4,1,0,0,1 12 | 4,2,0,1,2 13 | 4,3,0,1,3 14 | 4,4,0,2,4 15 | 4,5,1,2,5 16 | 5,1,0,1,-1 17 | 5,2,0,2,-1 18 | 5,3,0,3,-1 19 | 6,1,1,3,0 20 | 7,1,0,1,0 21 | 7,2,0,2,1 22 | 7,3,0,3,0 23 | 7,4,0,3,1 24 | 7,5,0,3,0 25 | 7,6,1,3,1 26 | 8,1,0,-1,0 27 | 8,2,1,1,0 28 | 9,1,0,1,1 29 | 9,2,0,2,2 30 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v2.0.0 4 | hooks: 5 | - id: trailing-whitespace 6 | - id: check-ast 7 | - id: check-yaml 8 | - id: end-of-file-fixer 9 | - id: fix-encoding-pragma 10 | - id: mixed-line-ending 11 | - id: trailing-whitespace 12 | - repo: https://github.com/ambv/black 13 | rev: stable 14 | hooks: 15 | - id: black 16 | args: ["--line-length", "120"] 17 | -------------------------------------------------------------------------------- /lifelines/datasets/gehan.dat: -------------------------------------------------------------------------------- 1 | 2 6 1 2 | 2 6 1 3 | 2 6 1 4 | 2 6 0 5 | 2 7 1 6 | 2 9 0 7 | 2 10 1 8 | 2 10 0 9 | 2 11 0 10 | 2 13 1 11 | 2 16 1 12 | 2 17 0 13 | 2 19 0 14 | 2 20 0 15 | 2 22 1 16 | 2 23 1 17 | 2 25 0 18 | 2 32 0 19 | 2 32 0 20 | 2 34 0 21 | 2 35 0 22 | 1 1 1 23 | 1 1 1 24 | 1 2 1 25 | 1 2 1 26 | 1 3 1 27 | 1 4 1 28 | 1 4 1 29 | 1 5 1 30 | 1 5 1 31 | 1 8 1 32 | 1 8 1 33 | 1 8 1 34 | 1 8 1 35 | 1 11 1 36 | 1 11 1 37 | 1 12 1 38 | 1 12 1 39 | 1 15 1 40 | 1 17 1 41 | 1 22 1 42 | 1 23 1 43 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import numpy as np 3 | import pytest 4 | 5 | 6 | def pytest_runtest_setup(item): 7 | random_seed = np.random.randint(1000) 8 | print("Seed used in np.random.seed(): %d" % random_seed) 9 | np.random.seed(random_seed) 10 | 11 | 12 | def pytest_addoption(parser): 13 | parser.addoption("--block", action="store", default=True, help="Should plotting block or not.") 14 | 15 | 16 | @pytest.fixture 17 | def block(request): 18 | try: 19 | return request.config.getoption("--block") not in "False,false,no,0".split(",") 20 | except ValueError: 21 | return True 22 | -------------------------------------------------------------------------------- /lifelines/datasets/g3.csv: -------------------------------------------------------------------------------- 1 | no.,age,sex,histology,group,event,time 2 | 1,41,Female,Grade3,RIT,True,53 3 | 2,45,Female,Grade3,RIT,False,28 4 | 3,48,Male,Grade3,RIT,False,69 5 | 4,54,Male,Grade3,RIT,False,58 6 | 5,40,Female,Grade3,RIT,False,54 7 | 6,31,Male,Grade3,RIT,True,25 8 | 7,53,Male,Grade3,RIT,False,51 9 | 8,49,Male,Grade3,RIT,False,61 10 | 9,36,Male,Grade3,RIT,False,57 11 | 10,52,Male,Grade3,RIT,False,57 12 | 11,57,Male,Grade3,RIT,False,50 13 | 1,27,Male,Grade3,Control,True,34 14 | 2,32,Male,Grade3,Control,True,32 15 | 3,53,Female,Grade3,Control,True,9 16 | 4,46,Male,Grade3,Control,True,19 17 | 5,33,Female,Grade3,Control,False,50 18 | 6,19,Female,Grade3,Control,False,48 19 | -------------------------------------------------------------------------------- /perf_tests/aaf_perf_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # aalen additive 3 | 4 | 5 | if __name__ == "__main__": 6 | import pandas as pd 7 | import numpy as np 8 | import time 9 | 10 | from lifelines.fitters.aalen_additive_fitter import AalenAdditiveFitter 11 | from lifelines.datasets import load_rossi 12 | 13 | df = load_rossi() 14 | df = pd.concat([df] * 1) 15 | # df['week'] = np.random.exponential(size=df.shape[0]) 16 | aaf = AalenAdditiveFitter() 17 | start_time = time.time() 18 | aaf.fit(df, duration_col="week", event_col="arrest") 19 | print("--- %s seconds ---" % (time.time() - start_time)) 20 | aaf.print_summary(5) 21 | -------------------------------------------------------------------------------- /perf_tests/cp_perf_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # cox regression 3 | 4 | 5 | if __name__ == "__main__": 6 | import pandas as pd 7 | import time 8 | import numpy as np 9 | 10 | from lifelines import CoxPHFitter 11 | from lifelines.datasets import load_rossi 12 | 13 | df = load_rossi() 14 | df = pd.concat([df] * 16) 15 | # df = df.reset_index() 16 | # df['week'] = np.random.exponential(1, size=df.shape[0]) 17 | cp = CoxPHFitter() 18 | start_time = time.time() 19 | cp.fit(df, duration_col="week", event_col="arrest", batch_mode=True) 20 | print("--- %s seconds ---" % (time.time() - start_time)) 21 | cp.print_summary() 22 | -------------------------------------------------------------------------------- /perf_tests/lognormal_perf_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # aalen additive 3 | 4 | 5 | if __name__ == "__main__": 6 | import pandas as pd 7 | import numpy as np 8 | import time 9 | 10 | from lifelines import LogNormalFitter 11 | 12 | np.random.seed(1) 13 | N = 250000 14 | mu = 3 * np.random.randn() 15 | sigma = np.random.uniform(0.1, 3.0) 16 | 17 | X, C = np.exp(sigma * np.random.randn(N) + mu), np.exp(np.random.randn(N) + mu) 18 | E = X <= C 19 | T = np.minimum(X, C) 20 | 21 | lnf = LogNormalFitter() 22 | start_time = time.time() 23 | lnf.fit(T, E) 24 | print("--- %s seconds ---" % (time.time() - start_time)) 25 | lnf.print_summary(5) 26 | -------------------------------------------------------------------------------- /perf_tests/ctv_perf_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | if __name__ == "__main__": 3 | import time 4 | import pandas as pd 5 | from lifelines import CoxTimeVaryingFitter 6 | from lifelines.datasets import load_rossi 7 | from lifelines.utils import to_long_format 8 | 9 | df = load_rossi() 10 | df = pd.concat([df] * 20) 11 | df = df.reset_index() 12 | df = to_long_format(df, duration_col="week") 13 | ctv = CoxTimeVaryingFitter() 14 | start_time = time.time() 15 | ctv.fit(df, id_col="index", event_col="arrest", start_col="start", stop_col="stop") 16 | time_took = time.time() - start_time 17 | print("--- %s seconds ---" % time_took) 18 | ctv.print_summary() 19 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | init: 2 | ifeq ($(TRAVIS), true) 3 | pip install -r reqs/travis-requirements.txt 4 | pip install pandas==${PANDAS_VERSION} 5 | pip list --local 6 | else 7 | pip install -r reqs/dev-requirements.txt 8 | pre-commit install 9 | endif 10 | 11 | test: 12 | py.test -rfs --cov=lifelines --block=False --cov-report term-missing 13 | 14 | lint: 15 | ifeq ($(TRAVIS_PYTHON_VERSION), 2.7) 16 | echo "Skip linting for Python2.7" 17 | else 18 | black lifelines/ -l 120 --fast 19 | black tests/ -l 120 --fast 20 | prospector --output-format grouped 21 | endif 22 | 23 | check_format: 24 | ifeq ($(TRAVIS_PYTHON_VERSION), 3.6) 25 | black . --check --line-length 120 26 | else 27 | echo "Only check format on Python3.6" 28 | endif 29 | 30 | pre: 31 | pre-commit run --all-files 32 | -------------------------------------------------------------------------------- /lifelines/datasets/anderson.csv: -------------------------------------------------------------------------------- 1 | t status sex logWBC Rx 2 | 35 0 1 1.45 0 3 | 34 0 1 1.47 0 4 | 32 0 1 2.2 0 5 | 32 0 1 2.53 0 6 | 25 0 1 1.78 0 7 | 23 1 1 2.57 0 8 | 22 1 1 2.32 0 9 | 20 0 1 2.01 0 10 | 19 0 0 2.05 0 11 | 17 0 0 2.16 0 12 | 16 1 1 3.6 0 13 | 13 1 0 2.88 0 14 | 11 0 0 2.6 0 15 | 10 0 0 2.7 0 16 | 10 1 0 2.96 0 17 | 9 0 0 2.8 0 18 | 7 1 0 4.43 0 19 | 6 0 0 3.2 0 20 | 6 1 0 2.31 0 21 | 6 1 1 4.06 0 22 | 6 1 0 3.28 0 23 | 23 1 1 1.97 1 24 | 22 1 0 2.73 1 25 | 17 1 0 2.95 1 26 | 15 1 0 2.3 1 27 | 12 1 0 1.5 1 28 | 12 1 0 3.06 1 29 | 11 1 0 3.49 1 30 | 11 1 0 2.12 1 31 | 8 1 0 3.52 1 32 | 8 1 0 3.05 1 33 | 8 1 0 2.32 1 34 | 8 1 1 3.26 1 35 | 5 1 1 3.49 1 36 | 5 1 0 3.97 1 37 | 4 1 1 4.36 1 38 | 4 1 1 2.42 1 39 | 3 1 1 4.01 1 40 | 2 1 1 4.91 1 41 | 2 1 1 4.48 1 42 | 1 1 1 2.8 1 43 | 1 1 1 5 1 44 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | cache: pip 3 | dist: trusty 4 | python: 5 | - "3.5" 6 | - "3.6" 7 | env: 8 | - export PANDAS_VERSION=0.23.4 9 | - export PANDAS_VERSION=0.24.1 10 | # Enable newer 3.7 without globally enabling sudo and dist: xenial for other build jobs 11 | matrix: 12 | include: 13 | - python: 3.7 14 | dist: xenial 15 | sudo: true 16 | env: export PANDAS_VERSION=0.24.1 17 | - python: 3.7 18 | dist: xenial 19 | sudo: true 20 | env: export PANDAS_VERSION=0.23.4 21 | before_install: 22 | - ls 23 | # - sudo apt-get update 24 | install: "make" 25 | script: 26 | # enforce formatting 27 | - make check_format 28 | # command to run tests 29 | - make test 30 | after_success: 31 | - coveralls 32 | # Don't want notifications 33 | notifications: 34 | email: false 35 | -------------------------------------------------------------------------------- /perf_tests/weibull_aft_perf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # weibull aft 3 | 4 | 5 | if __name__ == "__main__": 6 | import pandas as pd 7 | import time 8 | import numpy as np 9 | 10 | from lifelines import WeibullAFTFitter 11 | from lifelines.datasets import load_rossi 12 | 13 | df = load_rossi() 14 | df = pd.concat([df] * 1) 15 | 16 | df["start"] = df["week"] 17 | df["stop"] = np.where(df["arrest"], df["start"], np.inf) 18 | df = df.drop("week", axis=1) 19 | 20 | wp = WeibullAFTFitter() 21 | start_time = time.time() 22 | print(df.head()) 23 | wp.fit_interval_censoring(df, start_col="start", stop_col="stop", event_col="arrest") 24 | print("--- %s seconds ---" % (time.time() - start_time)) 25 | wp.print_summary() 26 | 27 | wp.fit_right_censoring(load_rossi(), "week", event_col="arrest") 28 | wp.print_summary() 29 | -------------------------------------------------------------------------------- /perf_tests/weibull_perf_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | if __name__ == "__main__": 4 | import pandas as pd 5 | import numpy as np 6 | import time 7 | 8 | from lifelines import WeibullFitter 9 | 10 | data = ( 11 | [{"start": 0, "stop": 2, "E": False}] * (1000 - 376) 12 | + [{"start": 2, "stop": 5, "E": False}] * (376 - 82) 13 | + [{"start": 5, "stop": 10, "E": False}] * (82 - 7) 14 | + [{"start": 10, "stop": 1e10, "E": False}] * 7 15 | ) 16 | 17 | df = pd.DataFrame.from_records(data) 18 | print(df) 19 | 20 | df = df.groupby(["start", "stop", "E"]).size().reset_index() 21 | print(df) 22 | 23 | wb = WeibullFitter() 24 | start_time = time.time() 25 | wb.fit_interval_censoring(df["start"], df["stop"], df["E"], weights=df[0]) 26 | print("--- %s seconds ---" % (time.time() - start_time)) 27 | wb.print_summary(5) 28 | -------------------------------------------------------------------------------- /lifelines/utils/logsf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from scipy.stats import norm as _scipy_norm 4 | import autograd.numpy as np 5 | from autograd.scipy.stats import norm 6 | from autograd.extend import primitive, defvjp 7 | from autograd.numpy.numpy_vjps import unbroadcast_f 8 | 9 | # TODO: next release of autograd will have this built in. 10 | 11 | logsf = primitive(_scipy_norm.logsf) 12 | 13 | defvjp( 14 | logsf, 15 | lambda ans, x, loc=0.0, scale=1.0: unbroadcast_f( 16 | x, lambda g: -g * np.exp(norm.logpdf(x, loc, scale) - logsf(x, loc, scale)) 17 | ), 18 | lambda ans, x, loc=0.0, scale=1.0: unbroadcast_f( 19 | loc, lambda g: g * np.exp(norm.logpdf(x, loc, scale) - logsf(x, loc, scale)) 20 | ), 21 | lambda ans, x, loc=0.0, scale=1.0: unbroadcast_f( 22 | scale, lambda g: g * np.exp(norm.logpdf(x, loc, scale) - logsf(x, loc, scale)) * (x - loc) / scale 23 | ), 24 | ) 25 | -------------------------------------------------------------------------------- /experiments/aalen_and_cook_simulation.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import numpy as np 3 | from scipy.stats import weibull_min 4 | import pandas as pd 5 | from lifelines import WeibullAFTFitter, CoxPHFitter 6 | 7 | # This is an implementation of https://uwspace.uwaterloo.ca/bitstream/handle/10012/10265/Cook_Richard-10265.pdf 8 | 9 | N = 50000 10 | p = 0.5 11 | bX = np.log(0.5) 12 | bZ = np.log(4) 13 | 14 | Z = np.random.binomial(1, p, size=N) 15 | X = np.random.binomial(1, 0.5, size=N) 16 | X_ = 20000 + 10 * np.random.randn(N) 17 | 18 | W = weibull_min.rvs(1, scale=1, loc=0, size=N) 19 | 20 | Y = bX * X + bZ * Z + np.log(W) 21 | T = np.exp(Y) 22 | 23 | ####################################### 24 | 25 | df = pd.DataFrame({"T": T, "x": X, "x_": X_}) 26 | 27 | 28 | wf = WeibullAFTFitter().fit(df, "T") 29 | wf.print_summary(4) 30 | 31 | 32 | cph = CoxPHFitter().fit(df, "T", show_progress=True, step_size=1.0) 33 | cph.print_summary(4) 34 | -------------------------------------------------------------------------------- /lifelines/datasets/lymphoma.csv: -------------------------------------------------------------------------------- 1 | Stage_group,Time,Censor 2 | 1,6,1 3 | 1,19,1 4 | 1,32,1 5 | 1,42,1 6 | 1,42,1 7 | 1,43,0 8 | 1,94,1 9 | 1,126,0 10 | 1,169,0 11 | 1,207,1 12 | 1,211,0 13 | 1,227,0 14 | 1,253,1 15 | 1,255,0 16 | 1,270,0 17 | 1,310,0 18 | 1,316,0 19 | 1,335,0 20 | 1,346,0 21 | 2,4,1 22 | 2,6,1 23 | 2,10,1 24 | 2,11,1 25 | 2,11,1 26 | 2,11,1 27 | 2,13,1 28 | 2,17,1 29 | 2,20,1 30 | 2,20,1 31 | 2,21,1 32 | 2,22,1 33 | 2,24,1 34 | 2,24,1 35 | 2,29,1 36 | 2,30,1 37 | 2,30,1 38 | 2,31,1 39 | 2,33,1 40 | 2,34,1 41 | 2,35,1 42 | 2,39,1 43 | 2,40,1 44 | 2,41,0 45 | 2,43,0 46 | 2,45,1 47 | 2,46,1 48 | 2,50,1 49 | 2,56,1 50 | 2,61,0 51 | 2,61,0 52 | 2,63,1 53 | 2,68,1 54 | 2,82,1 55 | 2,85,1 56 | 2,88,1 57 | 2,89,1 58 | 2,90,1 59 | 2,93,1 60 | 2,104,1 61 | 2,110,1 62 | 2,134,1 63 | 2,137,1 64 | 2,160,0 65 | 2,169,1 66 | 2,171,1 67 | 2,173,1 68 | 2,175,1 69 | 2,184,1 70 | 2,201,1 71 | 2,222,1 72 | 2,235,0 73 | 2,247,0 74 | 2,260,0 75 | 2,284,0 76 | 2,290,0 77 | 2,291,0 78 | 2,302,0 79 | 2,304,0 80 | 2,341,0 81 | 2,345,0 82 | -------------------------------------------------------------------------------- /.prospector.yaml: -------------------------------------------------------------------------------- 1 | strictness: medium 2 | 3 | pylint: 4 | options: 5 | bad-names: foo,baz,toto,tutu,tata,data 6 | # max-args default = 5 7 | max-args: 15 8 | # max-locals default = 15 9 | max-locals: 50 10 | # max-branches default = 15 11 | max-branches: 16 12 | disable: 13 | - line-too-long 14 | - protected-access 15 | - no-value-for-parameter 16 | - assignment-from-no-return 17 | - invalid-unary-operand-type 18 | # remove if python2.7 support is dropped 19 | - useless-object-inheritance 20 | - old-style-class 21 | 22 | pyflakes: 23 | disable: 24 | - F401 25 | - F841 26 | # let pylint used-before-assignment handle this 27 | - F821 28 | 29 | pep8: 30 | options: 31 | max-line-length: 120 32 | disable: 33 | - E501 34 | - E241 35 | 36 | mccabe: 37 | options: 38 | # max-complexity default = 10 39 | max-complexity: 23 40 | 41 | pyroma: 42 | run: true 43 | 44 | pep257: 45 | run: false 46 | 47 | ignore-paths: 48 | - build 49 | - benchmarks 50 | -------------------------------------------------------------------------------- /tests/utils/test_btree.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import print_function 3 | 4 | import pytest 5 | import numpy as np 6 | 7 | from lifelines.utils.btree import _BTree as BTree 8 | 9 | 10 | def test_btree(): 11 | t = BTree(np.arange(10)) 12 | for i in range(10): 13 | assert t.rank(i) == (0, 0) 14 | 15 | assert len(t) == 0 16 | t.insert(5) 17 | t.insert(6) 18 | t.insert(6) 19 | t.insert(0) 20 | t.insert(9) 21 | assert len(t) == 5 22 | 23 | assert t.rank(0) == (0, 1) 24 | assert t.rank(0.5) == (1, 0) 25 | assert t.rank(4.5) == (1, 0) 26 | assert t.rank(5) == (1, 1) 27 | assert t.rank(5.5) == (2, 0) 28 | assert t.rank(6) == (2, 2) 29 | assert t.rank(6.5) == (4, 0) 30 | assert t.rank(8.5) == (4, 0) 31 | assert t.rank(9) == (4, 1) 32 | assert t.rank(9.5) == (5, 0) 33 | 34 | for i in range(1, 32): 35 | BTree(np.arange(i)) 36 | 37 | with pytest.raises(ValueError): 38 | # This has to go last since it screws up the counts 39 | t.insert(5.5) 40 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Cameron Davidson-Pilon 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /tests/test_generate_datasets.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import os 3 | 4 | import pytest 5 | import matplotlib.pyplot as plt 6 | 7 | from lifelines import NelsonAalenFitter, KaplanMeierFitter 8 | from lifelines.generate_datasets import exponential_survival_data 9 | 10 | 11 | def test_exponential_data_sets_correct_censor(): 12 | N = 20000 13 | censorship = 0.2 14 | T, C = exponential_survival_data(N, censorship, scale=10) 15 | assert abs(C.mean() - (1 - censorship)) < 0.02 16 | 17 | 18 | @pytest.mark.skipif("DISPLAY" not in os.environ, reason="requires display") 19 | def test_exponential_data_sets_fit(): 20 | N = 20000 21 | T, C = exponential_survival_data(N, 0.2, scale=10) 22 | naf = NelsonAalenFitter() 23 | naf.fit(T, C).plot() 24 | plt.title("Should be a linear with slope = 0.1") 25 | 26 | 27 | @pytest.mark.skipif("DISPLAY" not in os.environ, reason="requires display") 28 | def test_kmf_minimum_observation_bias(): 29 | N = 250 30 | kmf = KaplanMeierFitter() 31 | T, C = exponential_survival_data(N, 0.1, scale=10) 32 | B = 0.01 * T 33 | kmf.fit(T, C, entry=B) 34 | kmf.plot() 35 | plt.title("Should have larger variances in the tails") 36 | -------------------------------------------------------------------------------- /experiments/detection_limits.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Experimenting with some data generation and inference for left censorship with one or more 4 | minimum detectable limits. 5 | 6 | Recall that MLE bias is equal to 0 up to the order 1/sqrt(n), so we expect that for 7 | small n, we will see a bias. 8 | 9 | """ 10 | import numpy as np 11 | from lifelines import WeibullFitter 12 | 13 | 14 | def one_detection_limit(N, fraction_below_limit): 15 | 16 | T_actual = 0.5 * np.random.weibull(1, size=N) 17 | 18 | MIN_1 = np.percentile(T_actual, fraction_below_limit) 19 | 20 | T = np.maximum(MIN_1, T_actual) 21 | E = T_actual > MIN_1 22 | 23 | wf = WeibullFitter().fit(T, E, left_censorship=True) 24 | return wf 25 | 26 | 27 | def three_detection_limit(N): 28 | 29 | T_actual = 0.5 * np.random.weibull(5, size=N) 30 | 31 | MIN_0 = np.percentile(T_actual, 5) 32 | MIN_1 = np.percentile(T_actual, 10) 33 | MIN_2 = np.percentile(T_actual, 30) 34 | MIN_3 = np.percentile(T_actual, 50) 35 | 36 | T = T_actual.copy() 37 | ix = np.random.randint(4, size=N) 38 | 39 | T = np.where(ix == 0, np.maximum(T, MIN_0), T) 40 | T = np.where(ix == 1, np.maximum(T, MIN_1), T) 41 | T = np.where(ix == 2, np.maximum(T, MIN_2), T) 42 | T = np.where(ix == 3, np.maximum(T, MIN_3), T) 43 | E = T_actual == T 44 | 45 | wf = WeibullFitter().fit(T, E, left_censorship=True) 46 | return wf 47 | 48 | 49 | # biased 50 | np.mean([three_detection_limit(50).rho_ for _ in range(1000)]) 51 | 52 | 53 | # less biased 54 | np.mean([three_detection_limit(500).rho_ for _ in range(1000)]) 55 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import os 3 | 4 | from setuptools import setup, find_packages 5 | 6 | 7 | def filepath(fname): 8 | return os.path.join(os.path.dirname(__file__), fname) 9 | 10 | 11 | exec(compile(open("lifelines/version.py").read(), "lifelines/version.py", "exec")) 12 | 13 | with open("README.md") as f: 14 | long_description = f.read() 15 | 16 | setup( 17 | name="lifelines", 18 | version=__version__, 19 | author="Cameron Davidson-Pilon", 20 | author_email="cam.davidson.pilon@gmail.com", 21 | description="Survival analysis in Python, including Kaplan Meier, Nelson Aalen and regression", 22 | license="MIT", 23 | keywords="survival analysis statistics data analysis", 24 | url="https://github.com/CamDavidsonPilon/lifelines", 25 | packages=find_packages(), 26 | python_requires=">=3.5", 27 | long_description=long_description, 28 | long_description_content_type="text/markdown", 29 | classifiers=[ 30 | "Development Status :: 4 - Beta", 31 | "License :: OSI Approved :: MIT License", 32 | "Programming Language :: Python", 33 | "Programming Language :: Python :: 3.5", 34 | "Programming Language :: Python :: 3.6", 35 | "Programming Language :: Python :: 3.7", 36 | "Topic :: Scientific/Engineering", 37 | ], 38 | install_requires=[ 39 | "numpy>=1.6.0", 40 | "scipy>=1.0", 41 | "pandas>=0.23.0", 42 | "matplotlib>=3.0", 43 | "bottleneck>=1.0", 44 | "autograd>=1.2", 45 | ], 46 | package_data={"lifelines": ["../README.md", "../README.txt", "../LICENSE", "../MANIFEST.in", "datasets/*"]}, 47 | ) 48 | -------------------------------------------------------------------------------- /experiments/left_censoring_experiments.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import numpy as np 3 | import scipy 4 | from matplotlib import pyplot as plt 5 | from lifelines import WeibullFitter, KaplanMeierFitter, LogNormalFitter, LogLogisticFitter 6 | from lifelines.plotting import left_censorship_cdf_plot, qq_plot 7 | 8 | plt.style.use("bmh") 9 | 10 | 11 | N = 2500 12 | 13 | T_actual = scipy.stats.fisk(8, 0, 1).rvs(N) 14 | 15 | MIN_0 = np.percentile(T_actual, 5) 16 | MIN_1 = np.percentile(T_actual, 10) 17 | MIN_2 = np.percentile(T_actual, 30) 18 | MIN_3 = np.percentile(T_actual, 50) 19 | 20 | T = T_actual.copy() 21 | ix = np.random.randint(4, size=N) 22 | 23 | T = np.where(ix == 0, np.maximum(T, MIN_0), T) 24 | T = np.where(ix == 1, np.maximum(T, MIN_1), T) 25 | T = np.where(ix == 2, np.maximum(T, MIN_2), T) 26 | T = np.where(ix == 3, np.maximum(T, MIN_3), T) 27 | E = T_actual == T 28 | 29 | fig, axes = plt.subplots(2, 2, figsize=(9, 5)) 30 | axes = axes.reshape(4) 31 | 32 | for i, model in enumerate([WeibullFitter(), KaplanMeierFitter(), LogNormalFitter(), LogLogisticFitter()]): 33 | if isinstance(model, KaplanMeierFitter): 34 | model.fit(T, E, left_censorship=True, label=model.__class__.__name__) 35 | else: 36 | model.fit(T, E, left_censorship=True, label=model.__class__.__name__) 37 | 38 | model.plot_cumulative_density(ax=axes[i]) 39 | plt.tight_layout() 40 | 41 | for i, model in enumerate([WeibullFitter(), LogNormalFitter(), LogLogisticFitter()]): 42 | model.fit(T, E, left_censorship=True) 43 | fig, axes = plt.subplots(2, 1, figsize=(8, 6)) 44 | 45 | left_censorship_cdf_plot(model, ax=axes[0]) 46 | qq_plot(model, ax=axes[1]) 47 | 48 | 49 | plt.show() 50 | -------------------------------------------------------------------------------- /lifelines/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # pylint: skip-file 3 | 4 | from lifelines.fitters.weibull_fitter import WeibullFitter 5 | from lifelines.fitters.exponential_fitter import ExponentialFitter 6 | from lifelines.fitters.nelson_aalen_fitter import NelsonAalenFitter 7 | from lifelines.fitters.kaplan_meier_fitter import KaplanMeierFitter 8 | from lifelines.fitters.breslow_fleming_harrington_fitter import BreslowFlemingHarringtonFitter 9 | from lifelines.fitters.coxph_fitter import CoxPHFitter 10 | from lifelines.fitters.cox_time_varying_fitter import CoxTimeVaryingFitter 11 | from lifelines.fitters.aalen_additive_fitter import AalenAdditiveFitter 12 | from lifelines.fitters.aalen_johansen_fitter import AalenJohansenFitter 13 | from lifelines.fitters.log_normal_fitter import LogNormalFitter 14 | from lifelines.fitters.log_logistic_fitter import LogLogisticFitter 15 | from lifelines.fitters.piecewise_exponential_fitter import PiecewiseExponentialFitter 16 | from lifelines.fitters.weibull_aft_fitter import WeibullAFTFitter 17 | from lifelines.fitters.log_logistic_aft_fitter import LogLogisticAFTFitter 18 | from lifelines.fitters.log_normal_aft_fitter import LogNormalAFTFitter 19 | 20 | 21 | from lifelines.version import __version__ 22 | 23 | __all__ = [ 24 | "__version__", 25 | "KaplanMeierFitter", 26 | "NelsonAalenFitter", 27 | "AalenAdditiveFitter", 28 | "BreslowFlemingHarringtonFitter", 29 | "CoxPHFitter", 30 | "WeibullFitter", 31 | "ExponentialFitter", 32 | "CoxTimeVaryingFitter", 33 | "AalenJohansenFitter", 34 | "LogNormalFitter", 35 | "LogLogisticFitter", 36 | "WeibullAFTFitter", 37 | "LogLogisticAFTFitter", 38 | "LogNormalAFTFitter", 39 | "PiecewiseExponentialFitter", 40 | ] 41 | -------------------------------------------------------------------------------- /lifelines/datasets/larynx.csv: -------------------------------------------------------------------------------- 1 | time,age,death,Stage II,Stage III,Stage IV 2 | 0.6,77,1,0,0,0 3 | 1.3,53,1,0,0,0 4 | 2.4,45,1,0,0,0 5 | 2.5,57,0,0,0,0 6 | 3.2,58,1,0,0,0 7 | 3.2,51,0,0,0,0 8 | 3.3,76,1,0,0,0 9 | 3.3,63,0,0,0,0 10 | 3.5,43,1,0,0,0 11 | 3.5,60,1,0,0,0 12 | 4.0,52,1,0,0,0 13 | 4.0,63,1,0,0,0 14 | 4.3,86,1,0,0,0 15 | 4.5,48,0,0,0,0 16 | 4.5,68,0,0,0,0 17 | 5.3,81,1,0,0,0 18 | 5.5,70,0,0,0,0 19 | 5.9,58,0,0,0,0 20 | 5.9,47,0,0,0,0 21 | 6.0,75,1,0,0,0 22 | 6.1,77,0,0,0,0 23 | 6.2,64,0,0,0,0 24 | 6.4,77,1,0,0,0 25 | 6.5,67,1,0,0,0 26 | 6.5,79,0,0,0,0 27 | 6.7,61,0,0,0,0 28 | 7.0,66,0,0,0,0 29 | 7.4,68,1,0,0,0 30 | 7.4,73,0,0,0,0 31 | 8.1,56,0,0,0,0 32 | 8.1,73,0,0,0,0 33 | 9.6,58,0,0,0,0 34 | 10.7,68,0,0,0,0 35 | 0.2,86,1,1,0,0 36 | 1.8,64,1,1,0,0 37 | 2.0,63,1,1,0,0 38 | 2.2,71,0,1,0,0 39 | 2.6,67,0,1,0,0 40 | 3.3,51,0,1,0,0 41 | 3.6,70,1,1,0,0 42 | 3.6,72,0,1,0,0 43 | 4.0,81,1,1,0,0 44 | 4.3,47,0,1,0,0 45 | 4.3,64,0,1,0,0 46 | 5.0,66,0,1,0,0 47 | 6.2,74,1,1,0,0 48 | 7.0,62,1,1,0,0 49 | 7.5,50,0,1,0,0 50 | 7.6,53,0,1,0,0 51 | 9.3,61,0,1,0,0 52 | 0.3,49,1,0,1,0 53 | 0.3,71,1,0,1,0 54 | 0.5,57,1,0,1,0 55 | 0.7,79,1,0,1,0 56 | 0.8,82,1,0,1,0 57 | 1.0,49,1,0,1,0 58 | 1.3,60,1,0,1,0 59 | 1.6,64,1,0,1,0 60 | 1.8,74,1,0,1,0 61 | 1.9,72,1,0,1,0 62 | 1.9,53,1,0,1,0 63 | 3.2,54,1,0,1,0 64 | 3.5,81,1,0,1,0 65 | 3.7,52,0,0,1,0 66 | 4.5,66,0,0,1,0 67 | 4.8,54,0,0,1,0 68 | 4.8,63,0,0,1,0 69 | 5.0,59,1,0,1,0 70 | 5.0,49,0,0,1,0 71 | 5.1,69,0,0,1,0 72 | 6.3,70,1,0,1,0 73 | 6.4,65,1,0,1,0 74 | 6.5,65,0,0,1,0 75 | 7.8,68,1,0,1,0 76 | 8.0,78,0,0,1,0 77 | 9.3,69,0,0,1,0 78 | 10.1,51,0,0,1,0 79 | 0.1,65,1,0,0,1 80 | 0.3,71,1,0,0,1 81 | 0.4,76,1,0,0,1 82 | 0.8,65,1,0,0,1 83 | 0.8,78,1,0,0,1 84 | 1.0,41,1,0,0,1 85 | 1.5,68,1,0,0,1 86 | 2.0,69,1,0,0,1 87 | 2.3,62,1,0,0,1 88 | 2.9,74,0,0,0,1 89 | 3.6,71,1,0,0,1 90 | 3.8,84,1,0,0,1 91 | 4.3,48,0,0,0,1 92 | -------------------------------------------------------------------------------- /perf_tests/batch_vs_single.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from time import time 3 | import pandas as pd 4 | import numpy as np 5 | from lifelines.datasets import load_rossi 6 | from lifelines import CoxPHFitter 7 | import statsmodels.api as sm 8 | 9 | # This compares the batch algorithm (in CTV) vs the single iteration algorithm (original in CPH) 10 | # N vs (% ties == unique(T) / N) 11 | 12 | 13 | ROSSI_ROWS = 432 14 | results = {} 15 | 16 | 17 | for n_copies in [1, 2, 4, 6, 8, 10, 13, 17, 20, 25]: 18 | 19 | # lower percents means more ties. 20 | # original rossi dataset has 0.113 21 | for fraction in np.linspace(0.01, 0.99, 15): 22 | print(n_copies, fraction) 23 | 24 | df = pd.concat([load_rossi()] * n_copies) 25 | n_unique_durations = int(df.shape[0] * fraction) + 1 26 | unique_durations = np.round(np.random.exponential(10, size=n_unique_durations), 5) 27 | 28 | df["week"] = np.tile(unique_durations, int(np.ceil(1 / fraction)))[: df.shape[0]] 29 | 30 | batch_results = [] 31 | for _ in range(3): 32 | cph_batch = CoxPHFitter() 33 | start_time = time() 34 | cph_batch.fit(df, "week", "arrest", batch_mode=True) 35 | batch_results.append(time() - start_time) 36 | 37 | single_results = [] 38 | for _ in range(3): 39 | cph_single = CoxPHFitter() 40 | start_time = time() 41 | cph_single.fit(df, "week", "arrest", batch_mode=False) 42 | single_results.append(time() - start_time) 43 | 44 | batch_time = min(batch_results) 45 | single_time = min(single_results) 46 | print({"batch": batch_time, "single": single_time}) 47 | results[(n_copies * ROSSI_ROWS, fraction)] = {"batch": batch_time, "single": single_time} 48 | 49 | results = pd.DataFrame(results).T.sort_index() 50 | results = results.reset_index() 51 | results = results.rename(columns={"level_0": "N", "level_1": "frac"}) 52 | results["ratio"] = results["batch"] / results["single"] 53 | 54 | print(results) 55 | results.to_csv("perf_results.csv", index=False) 56 | 57 | 58 | results["N * frac"] = results["N"] * results["frac"] 59 | 60 | X = results[["N", "frac", "N * frac"]] 61 | X = sm.add_constant(X) 62 | 63 | Y = results["ratio"] 64 | 65 | 66 | model = sm.OLS(Y, X).fit() 67 | print(model.summary()) 68 | print(model.params) 69 | -------------------------------------------------------------------------------- /lifelines/datasets/multicenter_aids_cohort.tsv: -------------------------------------------------------------------------------- 1 | i AIDSY W T D 2 | 1 1990.425 4.575 7.575 0 3 | 2 1991.250 3.750 6.750 0 4 | 3 1992.014 2.986 5.986 0 5 | 4 1992.030 2.970 5.970 0 6 | 5 1992.072 2.928 5.928 0 7 | 6 1992.220 2.780 4.688 1 8 | 7 1992.374 2.626 5.626 0 9 | 8 1992.389 2.611 5.611 0 10 | 9 1992.450 2.550 5.550 0 11 | 10 1992.653 2.347 5.347 0 12 | 11 1992.825 2.175 3.842 0 13 | 12 1992.906 2.094 3.655 1 14 | 13 1992.911 2.089 3.062 1 15 | 14 1992.958 2.042 5.042 0 16 | 15 1993.264 1.736 4.653 0 17 | 16 1993.384 1.616 2.729 1 18 | 17 1993.436 1.564 4.564 0 19 | 18 1993.439 1.561 2.897 1 20 | 19 1993.444 1.556 4.556 0 21 | 20 1993.503 1.497 2.024 1 22 | 21 1993.533 1.467 2.400 1 23 | 22 1993.637 1.363 3.043 1 24 | 23 1993.700 1.300 4.300 0 25 | 24 1994.081 0.919 1.169 1 26 | 25 1994.137 0.863 3.863 0 27 | 26 1994.189 0.811 3.811 0 28 | 27 1994.212 0.788 3.788 0 29 | 28 1994.228 0.772 3.772 0 30 | 29 1994.253 0.747 1.894 1 31 | 30 1994.358 0.642 1.951 1 32 | 31 1994.538 0.462 3.462 0 33 | 32 1994.664 0.336 3.336 0 34 | 33 1994.708 0.292 1.125 0 35 | 34 1994.734 0.266 1.258 1 36 | 35 1994.742 0.258 3.258 0 37 | 36 1994.798 0.202 3.202 0 38 | 37 1994.814 0.186 3.186 0 39 | 38 1994.836 0.164 0.973 1 40 | 39 1994.872 0.128 3.128 0 41 | 40 1994.903 0.097 1.794 1 42 | 41 1994.933 0.067 0.962 1 43 | 42 1994.950 0.050 1.255 1 44 | 43 1995.059 0.000 2.941 0 45 | 44 1995.070 0.000 1.619 1 46 | 45 1995.103 0.000 2.897 0 47 | 46 1995.169 0.000 1.619 1 48 | 47 1995.178 0.000 2.456 1 49 | 48 1995.189 0.000 1.752 1 50 | 49 1995.202 0.000 2.798 0 51 | 50 1995.231 0.000 2.769 0 52 | 51 1995.239 0.000 0.791 1 53 | 52 1995.247 0.000 2.753 0 54 | 53 1995.280 0.000 2.720 0 55 | 54 1995.286 0.000 1.881 0 56 | 55 1995.286 0.000 2.714 0 57 | 56 1995.309 0.000 1.322 1 58 | 57 1995.342 0.000 2.658 0 59 | 58 1995.384 0.000 1.216 1 60 | 59 1995.478 0.000 0.269 1 61 | 60 1995.481 0.000 2.500 1 62 | 61 1995.664 0.000 2.336 0 63 | 62 1995.869 0.000 2.131 0 64 | 63 1995.897 0.000 2.103 0 65 | 64 1995.914 0.000 0.086 0 66 | 65 1995.936 0.000 2.064 0 67 | 66 1995.941 0.000 2.059 0 68 | 67 1996.027 0.000 1.107 1 69 | 68 1996.350 0.000 0.067 0 70 | 69 1996.384 0.000 1.616 0 71 | 70 1996.486 0.000 1.431 0 72 | 71 1996.530 0.000 1.470 0 73 | 72 1996.572 0.000 0.820 1 74 | 73 1997.011 0.000 0.989 0 75 | 74 1997.422 0.000 0.578 0 76 | 75 1997.511 0.000 0.489 0 77 | 76 1997.597 0.000 0.403 0 78 | 77 1997.650 0.000 0.350 0 79 | 78 1997.847 0.000 0.153 0 80 | -------------------------------------------------------------------------------- /lifelines/datasets/CuZn-LeftCensoredDataset.csv: -------------------------------------------------------------------------------- 1 | E,T,group 2 | 0,1.0,alluvial_fan 3 | 0,1.0,alluvial_fan 4 | 0,1.0,alluvial_fan 5 | 0,1.0,alluvial_fan 6 | 1,1.0,alluvial_fan 7 | 1,1.0,alluvial_fan 8 | 1,1.0,alluvial_fan 9 | 1,1.0,alluvial_fan 10 | 1,1.0,alluvial_fan 11 | 1,2.0,alluvial_fan 12 | 1,2.0,alluvial_fan 13 | 1,2.0,alluvial_fan 14 | 1,2.0,alluvial_fan 15 | 1,2.0,alluvial_fan 16 | 1,2.0,alluvial_fan 17 | 1,2.0,alluvial_fan 18 | 1,2.0,alluvial_fan 19 | 1,2.0,alluvial_fan 20 | 1,2.0,alluvial_fan 21 | 1,2.0,alluvial_fan 22 | 1,2.0,alluvial_fan 23 | 1,2.0,alluvial_fan 24 | 1,2.0,alluvial_fan 25 | 1,2.0,alluvial_fan 26 | 1,2.0,alluvial_fan 27 | 1,2.0,alluvial_fan 28 | 1,2.0,alluvial_fan 29 | 1,2.0,alluvial_fan 30 | 1,2.0,alluvial_fan 31 | 1,2.0,alluvial_fan 32 | 1,3.0,alluvial_fan 33 | 1,3.0,alluvial_fan 34 | 1,3.0,alluvial_fan 35 | 1,3.0,alluvial_fan 36 | 1,3.0,alluvial_fan 37 | 1,3.0,alluvial_fan 38 | 1,4.0,alluvial_fan 39 | 1,4.0,alluvial_fan 40 | 1,4.0,alluvial_fan 41 | 0,5.0,alluvial_fan 42 | 0,5.0,alluvial_fan 43 | 0,5.0,alluvial_fan 44 | 0,5.0,alluvial_fan 45 | 0,5.0,alluvial_fan 46 | 0,5.0,alluvial_fan 47 | 0,5.0,alluvial_fan 48 | 0,5.0,alluvial_fan 49 | 1,5.0,alluvial_fan 50 | 1,5.0,alluvial_fan 51 | 1,5.0,alluvial_fan 52 | 1,7.0,alluvial_fan 53 | 1,7.0,alluvial_fan 54 | 1,7.0,alluvial_fan 55 | 1,8.0,alluvial_fan 56 | 1,9.0,alluvial_fan 57 | 0,1.0,basin_trough 58 | 0,1.0,basin_trough 59 | 1,1.0,basin_trough 60 | 1,1.0,basin_trough 61 | 1,1.0,basin_trough 62 | 1,1.0,basin_trough 63 | 1,1.0,basin_trough 64 | 1,1.0,basin_trough 65 | 1,1.0,basin_trough 66 | 0,2.0,basin_trough 67 | 0,2.0,basin_trough 68 | 1,2.0,basin_trough 69 | 1,2.0,basin_trough 70 | 1,2.0,basin_trough 71 | 1,2.0,basin_trough 72 | 1,3.0,basin_trough 73 | 1,3.0,basin_trough 74 | 1,3.0,basin_trough 75 | 1,3.0,basin_trough 76 | 1,3.0,basin_trough 77 | 1,3.0,basin_trough 78 | 1,3.0,basin_trough 79 | 1,3.0,basin_trough 80 | 1,4.0,basin_trough 81 | 1,4.0,basin_trough 82 | 1,4.0,basin_trough 83 | 1,4.0,basin_trough 84 | 1,4.0,basin_trough 85 | 0,5.0,basin_trough 86 | 0,5.0,basin_trough 87 | 0,5.0,basin_trough 88 | 0,5.0,basin_trough 89 | 0,5.0,basin_trough 90 | 1,5.0,basin_trough 91 | 1,6.0,basin_trough 92 | 1,6.0,basin_trough 93 | 1,8.0,basin_trough 94 | 1,9.0,basin_trough 95 | 1,9.0,basin_trough 96 | 0,10.0,basin_trough 97 | 0,10.0,basin_trough 98 | 0,10.0,basin_trough 99 | 0,10.0,basin_trough 100 | 1,12.0,basin_trough 101 | 1,14.0,basin_trough 102 | 0,15.0,basin_trough 103 | 1,15.0,basin_trough 104 | 1,17.0,basin_trough 105 | 1,23.0,basin_trough 106 | -------------------------------------------------------------------------------- /lifelines/utils/lowess.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module implements the Lowess function for nonparametric regression. 3 | Functions: 4 | lowess Fit a smooth nonparametric regression curve to a scatterplot. 5 | For more information, see 6 | William S. Cleveland: "Robust locally weighted regression and smoothing 7 | scatterplots", Journal of the American Statistical Association, December 1979, 8 | volume 74, number 368, pp. 829-836. 9 | William S. Cleveland and Susan J. Devlin: "Locally weighted regression: An 10 | approach to regression analysis by local fitting", Journal of the American 11 | Statistical Association, September 1988, volume 83, number 403, pp. 596-610. 12 | """ 13 | 14 | # Authors: Alexandre Gramfort 15 | # 16 | # License: BSD (3-clause) 17 | 18 | 19 | # Slight updates in lifelines 0.16.0, 2018 20 | 21 | from math import ceil 22 | import numpy as np 23 | from scipy import linalg 24 | 25 | 26 | def lowess(x, y, f=2.0 / 3.0, iterations=3): 27 | """lowess(x, y, f=2./3., iter=3) -> yest 28 | Lowess smoother: Robust locally weighted regression. 29 | The lowess function fits a nonparametric regression curve to a scatterplot. 30 | The arrays x and y contain an equal number of elements; each pair 31 | (x[i], y[i]) defines a data point in the scatterplot. The function returns 32 | the estimated (smooth) values of y. 33 | The smoothing span is given by f. A larger value for f will result in a 34 | smoother curve. The number of robustifying iterations is given by iter. The 35 | function will run faster with a smaller number of iterations. 36 | """ 37 | n = len(x) 38 | r = int(ceil(f * n)) 39 | h = [np.sort(np.abs(x - x[i]))[r] for i in range(n)] 40 | w = np.clip(np.abs((x[:, None] - x[None, :]) / h), 0.0, 1.0) 41 | w = (1 - w ** 3) ** 3 42 | yest = np.zeros(n) 43 | delta = np.ones(n) 44 | for _ in range(iterations): 45 | for i in range(n): 46 | weights = delta * w[:, i] 47 | b = np.array([np.sum(weights * y), np.sum(weights * y * x)]) 48 | A = np.array([[np.sum(weights), np.sum(weights * x)], [np.sum(weights * x), np.sum(weights * x * x)]]) 49 | # I think it is safe to assume this. 50 | # pylint: disable=unexpected-keyword-arg 51 | beta = linalg.solve(A, b, assume_a="pos", check_finite=False) 52 | yest[i] = beta[0] + beta[1] * x[i] 53 | 54 | residuals = y - yest 55 | s = np.median(np.abs(residuals)) 56 | delta = np.clip(residuals / (6.0 * s), -1, 1) 57 | delta = (1 - delta ** 2) ** 2 58 | 59 | return yest 60 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | ## Contributing to lifelines 2 | 3 | 4 | ### Questions about survival analysis? 5 | If you are using lifelines for survival analysis and have a question about "how do I do X?" or "what does Y do?", the best place to ask that is either in our [gitter channel](https://gitter.im/python-lifelines/Lobby) or at [stats.stackexchange.com](https://stats.stackexchange.com/). 6 | 7 | 8 | ### Submitting bugs or other errors observed 9 | 10 | We appreciate all bug reports submitted, as this will help the entire community get a better product. Please open up an issue in the Github Repository. If possible, please provide a code snippet, and what version of lifelines you are using. 11 | 12 | 13 | ### Submitting new feature requests 14 | 15 | Please open up an issue in the Github Repository with as much context as possible about the feature you would like to see. Also useful is to link to other libraries/software that have that feature. 16 | 17 | 18 | ### Submitting code, or other changes 19 | 20 | If you are interested in contributing to lifelines (and we thank you for the interest!), we recommend first opening up an issue in the GitHub repository to discuss the changes. From there, we can together plan how to execute the changes. See the Development section below for how to setup a local environment. 21 | 22 | ## Development 23 | 24 | ### Setting up a lifelines development environment 25 | 26 | 1. From the root directory of `lifelines` activate your [virtual environment](https://realpython.com/python-virtual-environments-a-primer/) (if you plan to use one). 27 | 2. Install the development requirements and [`pre-commit`](https://pre-commit.com) hooks. If you are on Mac, Linux, or [Windows `WSL`](https://docs.microsoft.com/en-us/windows/wsl/faq) you can use the provided [`Makefile`](https://github.com/CamDavidsonPilon/lifelines/blob/master/Makefile). Just type `make` into the console and you're ready to start developing. This will also install the dev-requirements. 28 | 29 | ### Formatting 30 | 31 | `lifelines` uses the [`black`](https://github.com/ambv/black) python formatter. 32 | There are 3 different ways to format your code. 33 | 1. Use the [`Makefile`](https://github.com/CamDavidsonPilon/lifelines/blob/master/Makefile). 34 | * `make lint` 35 | 2. Call `black` directly and pass the correct line length. 36 | * `black . -l 120` 37 | 3. Have you code formatted automatically during commit with the `pre-commit` hook. 38 | * stage and commit your unformatted changes: `git commit -m "your_commit_message"` 39 | * Code that needs to be formatted will "fail" the commit hooks and be formatted for you. 40 | * Stage the newly formatted python code: `git add *.py` 41 | * Recall your original commit command and commit again: `git commit -m "your_commit_message"` 42 | 43 | ### Running the tests 44 | 45 | You can optionally run the test suite after install with 46 | 47 | py.test 48 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. lifelines documentation master file, created by 2 | sphinx-quickstart on Sun Feb 2 17:10:21 2014. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | .. image:: http://i.imgur.com/EOowdSD.png 7 | 8 | ------------------------------------- 9 | 10 | 11 | lifelines 12 | ===================================== 13 | 14 | *lifelines* is a implementation of survival analysis in Python. What 15 | benefits does *lifelines* offer over other survival analysis 16 | implementations? 17 | 18 | - built on top of Pandas 19 | - internal plotting methods 20 | - simple and intuitive API 21 | - only focus is survival analysis 22 | 23 | 24 | Contents: 25 | ============ 26 | 27 | .. toctree:: 28 | :maxdepth: 1 29 | :caption: Quickstart & Intro 30 | 31 | Quickstart 32 | Survival Analysis intro 33 | 34 | .. toctree:: 35 | :maxdepth: 1 36 | :caption: Univariate Models 37 | 38 | Survival analysis with lifelines 39 | jupyter_notebooks/Piecewise Exponential Models and Creating Custom Models.ipynb 40 | jupyter_notebooks/Modelling time-lagged conversion rates.ipynb 41 | 42 | .. toctree:: 43 | :maxdepth: 1 44 | :caption: Regression Models 45 | 46 | Survival Regression 47 | Time varying survival regression 48 | jupyter_notebooks/Proportional hazard assumption.ipynb 49 | jupyter_notebooks/Cox residuals.ipynb 50 | 51 | .. toctree:: 52 | :maxdepth: 1 53 | :caption: Detailed documentation 54 | 55 | Examples 56 | References 57 | 58 | .. toctree:: 59 | :maxdepth: 1 60 | :caption: About lifelines 61 | 62 | Changelog 63 | Citing lifelines 64 | 65 | .. toctree:: 66 | :maxdepth: 1 67 | :caption: Questions? Suggestions? 68 | 69 | Gitter channel 70 | Create a GitHub issue 71 | Development blog 72 | 73 | Installation 74 | ------------------------------ 75 | 76 | 77 | .. code-block:: console 78 | 79 | pip install lifelines 80 | 81 | 82 | Source code and issue tracker 83 | ------------------------------ 84 | 85 | Available on Github, `CamDavidsonPilon/lifelines `_. 86 | Please report bugs, issues and feature extensions there. We also have `Gitter channel `_ available to discuss survival analysis and *lifelines*: 87 | 88 | Citing *lifelines* 89 | ------------------------------ 90 | 91 | The following link will bring you to a page where you can find the latest citation for *lifelines*: 92 | 93 | `Citation for lifelines `_ 94 | 95 | 96 | Indices and tables 97 | ================== 98 | 99 | * :ref:`genindex` 100 | * :ref:`modindex` 101 | * :ref:`search` 102 | -------------------------------------------------------------------------------- /tests/utils/test_concordance.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import pytest 4 | import numpy as np 5 | import pandas as pd 6 | 7 | from lifelines import CoxPHFitter 8 | from lifelines.datasets import load_rossi 9 | 10 | from lifelines.utils.concordance import concordance_index 11 | from lifelines.utils.concordance import concordance_index as fast_cindex 12 | from lifelines.utils.concordance import naive_concordance_index as slow_cindex 13 | 14 | 15 | def test_concordance_index_returns_same_after_shifting(): 16 | T = np.array([1, 2, 3, 4, 5, 6]) 17 | T_ = np.array([2, 1, 4, 6, 5, 3]) 18 | assert ( 19 | concordance_index(T, T_) 20 | == concordance_index(T - 5, T_ - 5) 21 | == concordance_index(T, T_ - 5) 22 | == concordance_index(T - 5, T_) 23 | ) 24 | 25 | 26 | def test_both_concordance_index_function_deal_with_ties_the_same_way(): 27 | actual_times = np.array([1, 1, 2]) 28 | predicted_times = np.array([1, 2, 3]) 29 | obs = np.ones(3) 30 | assert fast_cindex(actual_times, predicted_times, obs) == slow_cindex(actual_times, predicted_times, obs) == 1.0 31 | 32 | 33 | def test_both_concordance_index_with_only_censoring_fails_gracefully(): 34 | actual_times = np.array([1, 2, 3]) 35 | predicted_times = np.array([1, 2, 3]) 36 | obs = np.zeros(3) 37 | with pytest.raises(ZeroDivisionError, match="admissable pairs"): 38 | fast_cindex(actual_times, predicted_times, obs) 39 | 40 | with pytest.raises(ZeroDivisionError, match="admissable pairs"): 41 | slow_cindex(actual_times, predicted_times, obs) 42 | 43 | 44 | def test_concordance_index_function_exits(): 45 | N = 10 * 1000 46 | actual_times = np.random.exponential(1, size=N) 47 | predicted_times = np.random.exponential(1, size=N) 48 | obs = np.ones(N) 49 | assert fast_cindex(actual_times, predicted_times, obs) 50 | 51 | 52 | def test_concordance_index_will_not_overflow(): 53 | a = np.arange(65536) 54 | assert concordance_index(a, a) == 1.0 55 | b = np.arange(65537) 56 | assert concordance_index(b, b) == 1.0 57 | assert concordance_index(b, b[::-1]) == 0.0 58 | 59 | 60 | def test_concordance_index_fast_is_same_as_slow(): 61 | size = 100 62 | T = np.random.normal(size=size) 63 | P = np.random.normal(size=size) 64 | C = np.random.choice([0, 1], size=size) 65 | Z = np.zeros_like(T) 66 | 67 | # Hard to imagine these failing 68 | assert slow_cindex(T, Z, C) == fast_cindex(T, Z, C) 69 | assert slow_cindex(T, T, C) == fast_cindex(T, T, C) 70 | # This is the real test though 71 | assert slow_cindex(T, P, C) == fast_cindex(T, P, C) 72 | 73 | cp = CoxPHFitter() 74 | df = load_rossi() 75 | cp.fit(df, duration_col="week", event_col="arrest") 76 | 77 | T = cp.durations.values.ravel() 78 | P = -cp.predict_partial_hazard(df[df.columns.difference(["week", "arrest"])]).values.ravel() 79 | 80 | E = cp.event_observed.values.ravel() 81 | 82 | assert slow_cindex(T, P, E) == fast_cindex(T, P, E) 83 | -------------------------------------------------------------------------------- /lifelines/datasets/dfcv_dataset.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import pandas as pd 3 | from lifelines.utils import add_covariate_to_timeline 4 | from lifelines.utils import to_long_format 5 | 6 | df = pd.DataFrame( 7 | [ 8 | [1, 3, True, 1], 9 | [6, 4, False, 0], 10 | [3, 5, True, 1], 11 | [2, 5, False, 1], 12 | [4, 6, True, 1], 13 | [7, 7, True, 0], 14 | [8, 8, False, 0], 15 | [5, 8, False, 1], 16 | [9, 9, True, 0], 17 | [10, 10, True, 0], 18 | ], 19 | columns=["id", "time", "event", "group"], 20 | ) 21 | 22 | 23 | df = to_long_format(df, "time") 24 | 25 | cv = pd.DataFrame.from_records( 26 | [ 27 | {"id": 1, "z": 0, "time": 0}, 28 | {"id": 6, "z": 1, "time": 0}, 29 | {"id": 3, "z": 1, "time": 0}, 30 | {"id": 2, "z": 0, "time": 0}, 31 | {"id": 4, "z": 0, "time": 0}, 32 | {"id": 7, "z": 0, "time": 0}, 33 | {"id": 8, "z": 0, "time": 0}, 34 | {"id": 5, "z": 0, "time": 0}, 35 | {"id": 9, "z": 0, "time": 0}, 36 | {"id": 10, "z": 0, "time": 0}, 37 | {"id": 1, "z": 0, "time": 3}, 38 | {"id": 6, "z": 1, "time": 3}, 39 | {"id": 3, "z": 1, "time": 3}, 40 | {"id": 2, "z": 0, "time": 3}, 41 | {"id": 4, "z": 0, "time": 3}, 42 | {"id": 7, "z": 0, "time": 3}, 43 | {"id": 8, "z": 0, "time": 3}, 44 | {"id": 5, "z": 0, "time": 3}, 45 | {"id": 9, "z": 0, "time": 3}, 46 | {"id": 10, "z": 1, "time": 3}, 47 | {"id": 6, "z": 1, "time": 4}, 48 | {"id": 3, "z": 1, "time": 4}, 49 | {"id": 2, "z": 0, "time": 4}, 50 | {"id": 4, "z": 0, "time": 4}, 51 | {"id": 7, "z": 0, "time": 4}, 52 | {"id": 8, "z": 0, "time": 4}, 53 | {"id": 5, "z": 0, "time": 4}, 54 | {"id": 9, "z": 0, "time": 4}, 55 | {"id": 10, "z": 1, "time": 4}, 56 | {"id": 3, "z": 1, "time": 5}, 57 | {"id": 2, "z": 0, "time": 5}, 58 | {"id": 4, "z": 0, "time": 5}, 59 | {"id": 7, "z": 1, "time": 5}, 60 | {"id": 8, "z": 0, "time": 5}, 61 | {"id": 5, "z": 0, "time": 5}, 62 | {"id": 9, "z": 1, "time": 5}, 63 | {"id": 10, "z": 1, "time": 5}, 64 | {"id": 4, "z": 0, "time": 6}, 65 | {"id": 7, "z": 1, "time": 6}, 66 | {"id": 8, "z": 0, "time": 6}, 67 | {"id": 5, "z": 1, "time": 6}, 68 | {"id": 9, "z": 1, "time": 6}, 69 | {"id": 10, "z": 1, "time": 6}, 70 | {"id": 7, "z": 1, "time": 7}, 71 | {"id": 8, "z": 0, "time": 7}, 72 | {"id": 5, "z": 1, "time": 7}, 73 | {"id": 9, "z": 1, "time": 7}, 74 | {"id": 10, "z": 1, "time": 7}, 75 | {"id": 8, "z": 0, "time": 8}, 76 | {"id": 5, "z": 1, "time": 8}, 77 | {"id": 9, "z": 1, "time": 8}, 78 | {"id": 10, "z": 1, "time": 8}, 79 | {"id": 9, "z": 1, "time": 9}, 80 | {"id": 10, "z": 1, "time": 9}, 81 | ] 82 | ) 83 | 84 | dfcv = add_covariate_to_timeline(df, cv, "id", "time", "event", add_enum=False) 85 | -------------------------------------------------------------------------------- /lifelines/fitters/exponential_fitter.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | 4 | import numpy as np 5 | from lifelines.fitters import KnownModelParametericUnivariateFitter 6 | 7 | 8 | class ExponentialFitter(KnownModelParametericUnivariateFitter): 9 | r""" 10 | This class implements an Exponential model for univariate data. The model has parameterized 11 | form: 12 | 13 | .. math:: S(t) = \exp\left(\frac{-t}{\lambda}\right), \lambda >0 14 | 15 | which implies the cumulative hazard rate is 16 | 17 | .. math:: H(t) = \frac{t}{\lambda} 18 | 19 | and the hazard rate is: 20 | 21 | .. math:: h(t) = \frac{1}{\lambda} 22 | 23 | After calling the `.fit` method, you have access to properties like: ``survival_function_``, ``lambda_``, ``cumulative_hazard_`` 24 | A summary of the fit is available with the method ``print_summary()`` 25 | 26 | Parameters 27 | ----------- 28 | alpha: float, optional (default=0.05) 29 | the level in the confidence intervals. 30 | 31 | Important 32 | ---------- 33 | The parameterization of this model changed in lifelines 0.19.0. Previously, the cumulative hazard looked like 34 | :math:`\lambda t`. The parameterization is now the reciprocal of :math:`\lambda`. 35 | 36 | Attributes 37 | ---------- 38 | cumulative_hazard_ : DataFrame 39 | The estimated cumulative hazard (with custom timeline if provided) 40 | confidence_interval_cumulative_hazard_ : DataFrame 41 | The lower and upper confidence intervals for the cumulative hazard 42 | hazard_ : DataFrame 43 | The estimated hazard (with custom timeline if provided) 44 | confidence_interval_hazard_ : DataFrame 45 | The lower and upper confidence intervals for the hazard 46 | survival_function_ : DataFrame 47 | The estimated survival function (with custom timeline if provided) 48 | confidence_interval_survival_function_ : DataFrame 49 | The lower and upper confidence intervals for the survival function 50 | variance_matrix_ : numpy array 51 | The variance matrix of the coefficients 52 | median_: float 53 | The median time to event 54 | lambda_: float 55 | The fitted parameter in the model 56 | durations: array 57 | The durations provided 58 | event_observed: array 59 | The event_observed variable provided 60 | timeline: array 61 | The time line to use for plotting and indexing 62 | entry: array or None 63 | The entry array provided, or None 64 | cumumlative_density_ : DataFrame 65 | The estimated cumulative density function (with custom timeline if provided) 66 | confidence_interval_cumumlative_density_ : DataFrame 67 | The lower and upper confidence intervals for the cumulative density 68 | """ 69 | 70 | _fitted_parameter_names = ["lambda_"] 71 | 72 | @property 73 | def median_(self): 74 | return np.log(2) / self.lambda_ 75 | 76 | def _cumulative_hazard(self, params, times): 77 | lambda_ = params[0] 78 | return times / lambda_ 79 | -------------------------------------------------------------------------------- /lifelines/datasets/waltons_dataset.csv: -------------------------------------------------------------------------------- 1 | T,E,group 2 | 6.0,1,miR-137 3 | 13.0,1,miR-137 4 | 13.0,1,miR-137 5 | 13.0,1,miR-137 6 | 19.0,1,miR-137 7 | 19.0,1,miR-137 8 | 19.0,1,miR-137 9 | 26.0,1,miR-137 10 | 26.0,1,miR-137 11 | 26.0,1,miR-137 12 | 26.0,1,miR-137 13 | 26.0,1,miR-137 14 | 33.0,1,miR-137 15 | 33.0,1,miR-137 16 | 47.0,1,miR-137 17 | 62.0,1,miR-137 18 | 62.0,1,miR-137 19 | 9.0,1,miR-137 20 | 9.0,1,miR-137 21 | 9.0,1,miR-137 22 | 15.0,1,miR-137 23 | 15.0,1,miR-137 24 | 22.0,1,miR-137 25 | 22.0,1,miR-137 26 | 22.0,1,miR-137 27 | 22.0,1,miR-137 28 | 29.0,1,miR-137 29 | 29.0,1,miR-137 30 | 29.0,1,miR-137 31 | 29.0,1,miR-137 32 | 29.0,1,miR-137 33 | 36.0,1,miR-137 34 | 36.0,1,miR-137 35 | 43.0,1,miR-137 36 | 33.0,1,control 37 | 54.0,1,control 38 | 54.0,1,control 39 | 61.0,1,control 40 | 61.0,1,control 41 | 61.0,0,control 42 | 61.0,1,control 43 | 61.0,0,control 44 | 61.0,1,control 45 | 61.0,1,control 46 | 61.0,1,control 47 | 61.0,1,control 48 | 61.0,1,control 49 | 61.0,1,control 50 | 69.0,1,control 51 | 69.0,1,control 52 | 69.0,1,control 53 | 69.0,1,control 54 | 69.0,1,control 55 | 69.0,1,control 56 | 69.0,0,control 57 | 69.0,1,control 58 | 69.0,1,control 59 | 69.0,1,control 60 | 69.0,1,control 61 | 32.0,1,control 62 | 53.0,1,control 63 | 53.0,1,control 64 | 60.0,1,control 65 | 60.0,1,control 66 | 60.0,1,control 67 | 60.0,1,control 68 | 60.0,1,control 69 | 68.0,1,control 70 | 68.0,1,control 71 | 68.0,1,control 72 | 68.0,1,control 73 | 68.0,0,control 74 | 68.0,1,control 75 | 68.0,1,control 76 | 68.0,1,control 77 | 68.0,1,control 78 | 68.0,1,control 79 | 75.0,1,control 80 | 17.0,1,control 81 | 51.0,1,control 82 | 51.0,1,control 83 | 51.0,1,control 84 | 58.0,1,control 85 | 58.0,1,control 86 | 58.0,1,control 87 | 58.0,1,control 88 | 66.0,1,control 89 | 66.0,1,control 90 | 7.0,1,control 91 | 7.0,0,control 92 | 41.0,1,control 93 | 41.0,1,control 94 | 41.0,1,control 95 | 41.0,1,control 96 | 41.0,1,control 97 | 41.0,1,control 98 | 41.0,1,control 99 | 48.0,1,control 100 | 48.0,1,control 101 | 48.0,1,control 102 | 48.0,1,control 103 | 48.0,1,control 104 | 48.0,1,control 105 | 48.0,1,control 106 | 48.0,1,control 107 | 56.0,1,control 108 | 56.0,1,control 109 | 56.0,1,control 110 | 56.0,1,control 111 | 56.0,1,control 112 | 56.0,1,control 113 | 56.0,1,control 114 | 56.0,1,control 115 | 56.0,1,control 116 | 56.0,1,control 117 | 56.0,1,control 118 | 56.0,1,control 119 | 56.0,1,control 120 | 56.0,1,control 121 | 56.0,1,control 122 | 56.0,1,control 123 | 56.0,1,control 124 | 56.0,1,control 125 | 63.0,1,control 126 | 63.0,1,control 127 | 63.0,1,control 128 | 63.0,1,control 129 | 63.0,1,control 130 | 63.0,1,control 131 | 63.0,1,control 132 | 63.0,1,control 133 | 63.0,1,control 134 | 69.0,1,control 135 | 69.0,1,control 136 | 38.0,1,control 137 | 38.0,1,control 138 | 45.0,1,control 139 | 45.0,1,control 140 | 45.0,1,control 141 | 45.0,1,control 142 | 45.0,1,control 143 | 45.0,1,control 144 | 45.0,1,control 145 | 45.0,1,control 146 | 45.0,0,control 147 | 45.0,1,control 148 | 53.0,1,control 149 | 53.0,1,control 150 | 53.0,1,control 151 | 53.0,1,control 152 | 53.0,1,control 153 | 60.0,1,control 154 | 60.0,0,control 155 | 60.0,1,control 156 | 60.0,1,control 157 | 60.0,1,control 158 | 60.0,1,control 159 | 60.0,1,control 160 | 60.0,1,control 161 | 60.0,1,control 162 | 60.0,1,control 163 | 60.0,1,control 164 | 66.0,1,control 165 | -------------------------------------------------------------------------------- /lifelines/utils/gamma.py: -------------------------------------------------------------------------------- 1 | """ 2 | MIT License 3 | 4 | Copyright (c) 2018 Better 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | 24 | Edited in 2019, Cameron Davidson-Pilon 25 | """ 26 | from autograd.extend import primitive, defvjp 27 | from autograd.numpy.numpy_vjps import unbroadcast_f # This is not documented 28 | from scipy.special import gammainc as gammainc_orig 29 | 30 | 31 | @primitive 32 | def gammainc(k, x): 33 | """ Lower regularized incomplete gamma function. 34 | We rely on `scipy.special.gammainc 35 | `_ 36 | for this. However, there is a number of issues using this function 37 | together with `autograd `_: 38 | 1. It doesn't let you take the gradient with respect to k 39 | 2. The gradient with respect to x is really slow 40 | As a really stupid workaround, because we don't need the numbers to 41 | be 100% exact, we just approximate the gradient. 42 | Side note 1: if you truly want to compute the correct derivative, see the 43 | `Wikipedia articule about the Incomplete gamma function 44 | `_ 45 | where the T(3, s, x) function can be implemented as 46 | .. code-block:: python 47 | def T3(s, x): 48 | return mpmath.meijerg(a_s=([], [0, 0]), b_s=([s-1, -1, -1], []), z=x) 49 | I wasted a few hours on this but sadly it turns out to be extremely slow. 50 | Side note 2: TensorFlow actually has a `similar bug 51 | `_ 52 | """ 53 | return gammainc_orig(k, x) 54 | 55 | 56 | @primitive 57 | def gammainc2(k, x): 58 | return gammainc_orig(k, x) 59 | 60 | 61 | G_EPS = 1e-8 62 | 63 | defvjp( 64 | gammainc2, 65 | lambda ans, k, x: unbroadcast_f( 66 | k, lambda g: g * (gammainc_orig(k + G_EPS, x) - 2 * ans + gammainc_orig(k - G_EPS, x)) / G_EPS ** 2 67 | ), 68 | lambda ans, k, x: unbroadcast_f( 69 | k, lambda g: g * (gammainc_orig(k, x + G_EPS) - 2 * ans + gammainc_orig(k, x - G_EPS)) / G_EPS ** 2 70 | ), 71 | ) 72 | 73 | defvjp( 74 | gammainc, 75 | lambda ans, k, x: unbroadcast_f(k, lambda g: g * (gammainc2(k + G_EPS, x) - ans) / G_EPS), 76 | lambda ans, k, x: unbroadcast_f(x, lambda g: g * (gammainc2(k, x + G_EPS) - ans) / G_EPS), 77 | ) 78 | -------------------------------------------------------------------------------- /lifelines/fitters/log_normal_fitter.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | 4 | import autograd.numpy as np 5 | from autograd.scipy.stats import norm 6 | from lifelines.fitters import KnownModelParametericUnivariateFitter 7 | from lifelines.utils.logsf import logsf 8 | 9 | 10 | class LogNormalFitter(KnownModelParametericUnivariateFitter): 11 | r""" 12 | This class implements an Log Normal model for univariate data. The model has parameterized 13 | form: 14 | 15 | .. math:: S(t) = 1 - \Phi((\log(t) - \mu)/\sigma), \sigma >0 16 | 17 | where :math:`\Phi` is the CDF of a standard normal random variable. 18 | This implies the cumulative hazard rate is 19 | 20 | .. math:: H(t) = -\log(1 - \Phi((\log(t) - \mu)/\sigma)) 21 | 22 | After calling the `.fit` method, you have access to properties like: ``survival_function_``, ``mu_``, ``sigma_``. 23 | A summary of the fit is available with the method ``print_summary()`` 24 | 25 | Parameters 26 | ----------- 27 | alpha: float, optional (default=0.05) 28 | the level in the confidence intervals. 29 | 30 | 31 | Attributes 32 | ---------- 33 | cumulative_hazard_ : DataFrame 34 | The estimated cumulative hazard (with custom timeline if provided) 35 | confidence_interval_cumulative_hazard_ : DataFrame 36 | The lower and upper confidence intervals for the cumulative hazard 37 | hazard_ : DataFrame 38 | The estimated hazard (with custom timeline if provided) 39 | confidence_interval_hazard_ : DataFrame 40 | The lower and upper confidence intervals for the hazard 41 | survival_function_ : DataFrame 42 | The estimated survival function (with custom timeline if provided) 43 | confidence_interval_survival_function_ : DataFrame 44 | The lower and upper confidence intervals for the survival function 45 | cumumlative_density_ : DataFrame 46 | The estimated cumulative density function (with custom timeline if provided) 47 | confidence_interval_cumumlative_density_ : DataFrame 48 | The lower and upper confidence intervals for the cumulative density 49 | variance_matrix_ : numpy array 50 | The variance matrix of the coefficients 51 | median_: float 52 | The median time to event 53 | mu_: float 54 | The fitted parameter in the model 55 | sigma_: float 56 | The fitted parameter in the model 57 | durations: array 58 | The durations provided 59 | event_observed: array 60 | The event_observed variable provided 61 | timeline: array 62 | The time line to use for plotting and indexing 63 | entry: array or None 64 | The entry array provided, or None 65 | """ 66 | 67 | _fitted_parameter_names = ["mu_", "sigma_"] 68 | _bounds = [(None, None), (0, None)] 69 | 70 | @property 71 | def median_(self): 72 | return np.exp(self.mu_) 73 | 74 | def _cumulative_hazard(self, params, times): 75 | mu_, sigma_ = params 76 | Z = (np.log(times) - mu_) / sigma_ 77 | return -logsf(Z) 78 | 79 | def _log_hazard(self, params, times): 80 | mu_, sigma_ = params 81 | Z = (np.log(times) - mu_) / sigma_ 82 | return norm.logpdf(Z, loc=0, scale=1) - np.log(sigma_) - np.log(times) - logsf(Z) 83 | 84 | def _log_1m_sf(self, params, times): 85 | mu_, sigma_ = params 86 | Z = (np.log(times) - mu_) / sigma_ 87 | return norm.logcdf(Z, loc=0, scale=1) 88 | -------------------------------------------------------------------------------- /docs/lifelines.fitters.rst: -------------------------------------------------------------------------------- 1 | lifelines.fitters 2 | ================= 3 | 4 | 5 | lifelines.fitters.aalen\_additive\_fitter module 6 | ------------------------------------------------ 7 | 8 | .. automodule:: lifelines.fitters.aalen_additive_fitter 9 | :members: 10 | :undoc-members: 11 | 12 | lifelines.fitters.aalen\_johansen\_fitter module 13 | ------------------------------------------------ 14 | 15 | .. automodule:: lifelines.fitters.aalen_johansen_fitter 16 | :members: 17 | :undoc-members: 18 | 19 | lifelines.fitters.breslow\_fleming\_harrington\_fitter module 20 | ------------------------------------------------------------- 21 | 22 | .. automodule:: lifelines.fitters.breslow_fleming_harrington_fitter 23 | :members: 24 | :undoc-members: 25 | 26 | lifelines.fitters.cox\_time\_varying\_fitter module 27 | --------------------------------------------------- 28 | 29 | .. automodule:: lifelines.fitters.cox_time_varying_fitter 30 | :members: 31 | :undoc-members: 32 | 33 | lifelines.fitters.coxph\_fitter module 34 | -------------------------------------- 35 | 36 | .. automodule:: lifelines.fitters.coxph_fitter 37 | :members: 38 | :undoc-members: 39 | 40 | lifelines.fitters.exponential\_fitter module 41 | -------------------------------------------- 42 | 43 | .. automodule:: lifelines.fitters.exponential_fitter 44 | :members: 45 | :undoc-members: 46 | 47 | lifelines.fitters.kaplan\_meier\_fitter module 48 | ---------------------------------------------- 49 | 50 | .. automodule:: lifelines.fitters.kaplan_meier_fitter 51 | :members: 52 | :undoc-members: 53 | 54 | lifelines.fitters.log\_logistic\_fitter module 55 | ---------------------------------------------- 56 | 57 | .. automodule:: lifelines.fitters.log_logistic_fitter 58 | :members: 59 | :undoc-members: 60 | 61 | lifelines.fitters.log\_normal\_fitter module 62 | -------------------------------------------- 63 | 64 | .. automodule:: lifelines.fitters.log_normal_fitter 65 | :members: 66 | :undoc-members: 67 | 68 | lifelines.fitters.nelson\_aalen\_fitter module 69 | ---------------------------------------------- 70 | 71 | .. automodule:: lifelines.fitters.nelson_aalen_fitter 72 | :members: 73 | :undoc-members: 74 | 75 | lifelines.fitters.piecewise\_exponential\_fitter module 76 | ------------------------------------------------------- 77 | 78 | .. automodule:: lifelines.fitters.piecewise_exponential_fitter 79 | :members: 80 | :undoc-members: 81 | 82 | 83 | 84 | lifelines.fitters.weibull\_fitter module 85 | ------------------------------------------ 86 | 87 | .. automodule:: lifelines.fitters.weibull_fitter 88 | :members: 89 | :undoc-members: 90 | 91 | 92 | lifelines.fitters.weibull\_aft\_fitter module 93 | ------------------------------------------------- 94 | 95 | .. automodule:: lifelines.fitters.weibull_aft_fitter 96 | :members: 97 | :undoc-members: 98 | 99 | lifelines.fitters.log\_normal\_aft\_fitter module 100 | --------------------------------------------------- 101 | 102 | .. automodule:: lifelines.fitters.log_normal_aft_fitter 103 | :members: 104 | :undoc-members: 105 | 106 | lifelines.fitters.log\_logistic\_aft\_fitter module 107 | ----------------------------------------------------- 108 | 109 | .. automodule:: lifelines.fitters.log_logistic_aft_fitter 110 | :members: 111 | :undoc-members: 112 | 113 | 114 | 115 | .. automodule:: lifelines.fitters 116 | :members: 117 | :undoc-members: 118 | -------------------------------------------------------------------------------- /lifelines/fitters/log_logistic_fitter.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import autograd.numpy as np 3 | 4 | from lifelines.fitters import KnownModelParametericUnivariateFitter 5 | 6 | 7 | class LogLogisticFitter(KnownModelParametericUnivariateFitter): 8 | 9 | r""" 10 | This class implements a Log-Logistic model for univariate data. The model has parameterized 11 | form: 12 | 13 | .. math:: S(t) = \left(1 + \left(\frac{t}{\alpha}\right)^{\beta}\right)^{-1}, \alpha > 0, \beta > 0, 14 | 15 | and the hazard rate is: 16 | 17 | .. math:: h(t) = \frac{\left(\frac{\beta}{\alpha}\right)\left(\frac{t}{\alpha}\right) ^ {\beta-1}}{\left(1 + \left(\frac{t}{\alpha}\right)^{\beta}\right)} 18 | 19 | and the cumulative hazard is: 20 | 21 | .. math:: H(t) = \log\left(\left(\frac{t}{\alpha}\right) ^ {\beta} + 1\right) 22 | 23 | After calling the `.fit` method, you have access to properties like: ``cumulative_hazard_``, ``plot``, ``survival_function_``, ``alpha_`` and ``beta_``. 24 | A summary of the fit is available with the method 'print_summary()' 25 | 26 | Parameters 27 | ----------- 28 | alpha: float, optional (default=0.05) 29 | the level in the confidence intervals. 30 | 31 | Examples 32 | -------- 33 | 34 | >>> from lifelines import LogLogisticFitter 35 | >>> from lifelines.datasets import load_waltons 36 | >>> waltons = load_waltons() 37 | >>> llf = LogLogisticFitter() 38 | >>> llf.fit(waltons['T'], waltons['E']) 39 | >>> llf.plot() 40 | >>> print(llf.alpha_) 41 | 42 | Attributes 43 | ---------- 44 | cumulative_hazard_ : DataFrame 45 | The estimated cumulative hazard (with custom timeline if provided) 46 | confidence_interval_cumulative_hazard_ : DataFrame 47 | The lower and upper confidence intervals for the cumulative hazard 48 | hazard_ : DataFrame 49 | The estimated hazard (with custom timeline if provided) 50 | confidence_interval_hazard_ : DataFrame 51 | The lower and upper confidence intervals for the hazard 52 | survival_function_ : DataFrame 53 | The estimated survival function (with custom timeline if provided) 54 | confidence_interval_survival_function_ : DataFrame 55 | The lower and upper confidence intervals for the survival function 56 | cumumlative_density_ : DataFrame 57 | The estimated cumulative density function (with custom timeline if provided) 58 | confidence_interval_cumumlative_density_ : DataFrame 59 | The lower and upper confidence intervals for the cumulative density 60 | variance_matrix_ : numpy array 61 | The variance matrix of the coefficients 62 | median_: float 63 | The median time to event 64 | alpha_: float 65 | The fitted parameter in the model 66 | beta_: float 67 | The fitted parameter in the model 68 | durations: array 69 | The durations provided 70 | event_observed: array 71 | The event_observed variable provided 72 | timeline: array 73 | The time line to use for plotting and indexing 74 | entry: array or None 75 | The entry array provided, or None 76 | """ 77 | _fitted_parameter_names = ["alpha_", "beta_"] 78 | 79 | @property 80 | def median_(self): 81 | return self.alpha_ 82 | 83 | def _cumulative_hazard(self, params, times): 84 | alpha_, beta_ = params 85 | return np.log1p((times / alpha_) ** beta_) 86 | 87 | def _log_1m_sf(self, params, times): 88 | alpha_, beta_ = params 89 | return -np.log1p((times / alpha_) ** -beta_) 90 | -------------------------------------------------------------------------------- /lifelines/fitters/weibull_fitter.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import autograd.numpy as np 3 | 4 | from lifelines.fitters import KnownModelParametericUnivariateFitter 5 | 6 | 7 | class WeibullFitter(KnownModelParametericUnivariateFitter): 8 | 9 | r""" 10 | 11 | This class implements a Weibull model for univariate data. The model has parameterized 12 | form: 13 | 14 | .. math:: S(t) = \exp\left(-\left(\frac{t}{\lambda}\right)^\rho\right), \lambda > 0, \rho > 0, 15 | 16 | which implies the cumulative hazard rate is 17 | 18 | .. math:: H(t) = \left(\frac{t}{\lambda}\right)^\rho, 19 | 20 | and the hazard rate is: 21 | 22 | .. math:: h(t) = \frac{\rho}{\lambda}\left(\frac{t}{\lambda}\right)^{\rho-1} 23 | 24 | After calling the `.fit` method, you have access to properties like: ``cumulative_hazard_``, ``survival_function_``, ``lambda_`` and ``rho_``. 25 | A summary of the fit is available with the method ``print_summary()``. 26 | 27 | Parameters 28 | ----------- 29 | alpha: float, optional (default=0.05) 30 | the level in the confidence intervals. 31 | 32 | Important 33 | ---------- 34 | The parameterization of this model changed in lifelines 0.19.0. Previously, the cumulative hazard looked like 35 | :math:`(\lambda t)^\rho`. The parameterization is now the reciprocal of :math:`\lambda`. 36 | 37 | Examples 38 | -------- 39 | 40 | >>> from lifelines import WeibullFitter 41 | >>> from lifelines.datasets import load_waltons 42 | >>> waltons = load_waltons() 43 | >>> wbf = WeibullFitter() 44 | >>> wbf.fit(waltons['T'], waltons['E']) 45 | >>> wbf.plot() 46 | >>> print(wbf.lambda_) 47 | 48 | Attributes 49 | ---------- 50 | cumulative_hazard_ : DataFrame 51 | The estimated cumulative hazard (with custom timeline if provided) 52 | confidence_interval_cumulative_hazard_ : DataFrame 53 | The lower and upper confidence intervals for the cumulative hazard 54 | hazard_ : DataFrame 55 | The estimated hazard (with custom timeline if provided) 56 | confidence_interval_hazard_ : DataFrame 57 | The lower and upper confidence intervals for the hazard 58 | survival_function_ : DataFrame 59 | The estimated survival function (with custom timeline if provided) 60 | confidence_interval_survival_function_ : DataFrame 61 | The lower and upper confidence intervals for the survival function 62 | cumumlative_density_ : DataFrame 63 | The estimated cumulative density function (with custom timeline if provided) 64 | confidence_interval_cumumlative_density_ : DataFrame 65 | The lower and upper confidence intervals for the cumulative density 66 | variance_matrix_ : numpy array 67 | The variance matrix of the coefficients 68 | median_: float 69 | The median time to event 70 | lambda_: float 71 | The fitted parameter in the model 72 | rho_: float 73 | The fitted parameter in the model 74 | durations: array 75 | The durations provided 76 | event_observed: array 77 | The event_observed variable provided 78 | timeline: array 79 | The time line to use for plotting and indexing 80 | entry: array or None 81 | The entry array provided, or None 82 | """ 83 | 84 | _fitted_parameter_names = ["lambda_", "rho_"] 85 | 86 | def _cumulative_hazard(self, params, times): 87 | lambda_, rho_ = params 88 | return (times / lambda_) ** rho_ 89 | 90 | @property 91 | def median_(self): 92 | return self.lambda_ * (np.log(2) ** (1.0 / self.rho_)) 93 | -------------------------------------------------------------------------------- /lifelines/fitters/breslow_fleming_harrington_fitter.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | 4 | import numpy as np 5 | import pandas as pd 6 | 7 | from lifelines.fitters import UnivariateFitter 8 | from lifelines import NelsonAalenFitter 9 | from lifelines.utils import _to_array, coalesce, CensoringType 10 | 11 | 12 | class BreslowFlemingHarringtonFitter(UnivariateFitter): 13 | 14 | """ 15 | Class for fitting the Breslow-Fleming-Harrington estimate for the survival function. This estimator 16 | is a biased estimator of the survival function but is more stable when the population is small and 17 | there are too few early truncation times, it may happen that is the number of patients at risk and 18 | the number of deaths is the same. 19 | 20 | Mathematically, the NAF estimator is the negative logarithm of the BFH estimator. 21 | 22 | BreslowFlemingHarringtonFitter(alpha=0.05) 23 | 24 | Parameters 25 | ---------- 26 | alpha: float, optional (default=0.05) 27 | The alpha value associated with the confidence intervals. 28 | 29 | """ 30 | 31 | def fit( 32 | self, 33 | durations, 34 | event_observed=None, 35 | timeline=None, 36 | entry=None, 37 | label="BFH_estimate", 38 | alpha=None, 39 | ci_labels=None, 40 | ): # pylint: disable=too-many-arguments 41 | """ 42 | Parameters 43 | ---------- 44 | durations: an array, or pd.Series, of length n 45 | duration subject was observed for 46 | timeline: 47 | return the best estimate at the values in timelines (positively increasing) 48 | event_observed: an array, or pd.Series, of length n 49 | True if the the death was observed, False if the event was lost (right-censored). Defaults all True if event_observed==None 50 | entry: an array, or pd.Series, of length n 51 | relative time when a subject entered the study. This is 52 | useful for left-truncated observations, i.e the birth event was not observed. 53 | If None, defaults to all 0 (all birth events observed.) 54 | label: string 55 | a string to name the column of the estimate. 56 | alpha: float, optional (default=0.05) 57 | the alpha value in the confidence intervals. Overrides the initializing 58 | alpha for this call to fit only. 59 | ci_labels: iterable 60 | add custom column names to the generated confidence intervals as a length-2 list: [, ]. Default: