├── docs
    ├── README.md
    ├── authors.md
    ├── license.md
    ├── CHANGELOG.md
    ├── history.md
    ├── contributing.md
    ├── .gitignore
    ├── usage.md
    ├── Makefile
    ├── api.rst
    ├── make.bat
    ├── quickstart.md
    ├── installation.md
    ├── index.rst
    └── conf.py
├── .envrc
├── CHANGELOG.md
├── requirements_dev.txt
├── AUTHORS.md
├── PULL_REQUEST_TEMPLATE.md
├── requirements.txt
├── medicare_utils
    ├── __init__.py
    ├── metadata
    │   ├── xw
    │   │   ├── to_json.py
    │   │   └── snfr.json
    │   └── codebook
    │   │   ├── bsfab.json
    │   │   └── med.json
    ├── codebook.py
    ├── utils.py
    └── parquet.py
├── tox.ini
├── readthedocs.yml
├── MANIFEST.in
├── .editorconfig
├── .github
    ├── ISSUE_TEMPLATE.md
    └── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   └── feature_request.md
├── tests
    ├── test_codebook.py
    ├── test_medicare_df_with_data.py
    └── test_medicare_df.py
├── environment.yml
├── LICENSE
├── .gitignore
├── setup.py
├── README.md
├── Makefile
├── CONTRIBUTING.md
└── setup.cfg


/docs/README.md:
--------------------------------------------------------------------------------
1 | ../README.md


--------------------------------------------------------------------------------
/docs/authors.md:
--------------------------------------------------------------------------------
1 | ../AUTHORS.md


--------------------------------------------------------------------------------
/docs/license.md:
--------------------------------------------------------------------------------
1 | ../LICENSE


--------------------------------------------------------------------------------
/docs/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | ../CHANGELOG.md


--------------------------------------------------------------------------------
/docs/history.md:
--------------------------------------------------------------------------------
1 | ../CHANGELOG.md


--------------------------------------------------------------------------------
/docs/contributing.md:
--------------------------------------------------------------------------------
1 | ../CONTRIBUTING.md


--------------------------------------------------------------------------------
/.envrc:
--------------------------------------------------------------------------------
1 | source activate medicare_utils
2 | 


--------------------------------------------------------------------------------
/docs/.gitignore:
--------------------------------------------------------------------------------
1 | /medicare_utils.rst
2 | /medicare_utils.*.rst
3 | /modules.rst
4 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # Changelog
2 | 
3 | ## 0.0.1 (2018-03-01)
4 | 
5 | -   First release on PyPI.
6 | 


--------------------------------------------------------------------------------
/docs/usage.md:
--------------------------------------------------------------------------------
1 | # Usage
2 | 
3 | To use `medicare_utils` in a project:
4 | 
5 | ```py
6 | import medicare_utils as med
7 | ```
8 | 


--------------------------------------------------------------------------------
/requirements_dev.txt:
--------------------------------------------------------------------------------
1 | Sphinx==1.7.0
2 | bumpversion==0.5.3
3 | coverage==4.5.1
4 | flake8==3.5.0
5 | pip==21.1
6 | tox==2.9.1
7 | wheel==0.30.0
8 | yapf==0.20.2
9 | 


--------------------------------------------------------------------------------
/AUTHORS.md:
--------------------------------------------------------------------------------
 1 | # Credits
 2 | 
 3 | ## Development Lead
 4 | 
 5 | -   Kyle Barron <barronk@mit.edu>
 6 | 
 7 | ## Contributors
 8 | 
 9 | None yet. Why not be the first?
10 | 


--------------------------------------------------------------------------------
/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
1 | - [ ] closes #xxxx
2 | - [ ] tests added / passed
3 | - [ ] passes `git diff upstream/master -u -- "*.py" | flake8 --diff`
4 | - [ ] whatsnew entry
5 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | dask>=0.17.1
 2 | distributed>=1.21.1
 3 | fastparquet>=0.1.4
 4 | joblib>=0.11
 5 | numpy>=1.14.1
 6 | pandas>=0.22.0
 7 | pyarrow>=0.9.0
 8 | requests>=2.18.4
 9 | requests-html>=0.9.0
10 | tqdm>=4.19.9
11 | 


--------------------------------------------------------------------------------
/medicare_utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | __author__ = """Kyle Barron"""
 4 | __email__ = 'barronk@mit.edu'
 5 | __version__ = '0.1.0'
 6 | 
 7 | from .codes import icd9, hcpcs, npi
 8 | from .utils import fpath, pq_vars
 9 | from .medicare_df import MedicareDF
10 | from .codebook import codebook
11 | from . import parquet
12 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | [tox]
 2 | envlist = py35, py36, flake8
 3 | 
 4 | [travis]
 5 | python =
 6 |     3.6: py36
 7 |     3.5: py35
 8 | 
 9 | [testenv:flake8]
10 | basepython = python
11 | deps = flake8
12 | commands = flake8 medicare_utils
13 | 
14 | [testenv]
15 | setenv =
16 |     PYTHONPATH = {toxinidir}
17 | 
18 | commands = python setup.py test
19 | 
20 | 


--------------------------------------------------------------------------------
/readthedocs.yml:
--------------------------------------------------------------------------------
 1 | build:
 2 |     image: latest
 3 | 
 4 | python:
 5 |     version: 3.6
 6 |     setup_py_install: true
 7 | 
 8 | # Also build PDF documentation
 9 | formats:
10 |     - pdf
11 | 
12 | # Path to pip requirements file
13 | # requirements_file: requirements.txt
14 | 
15 | # Conda environment file
16 | conda:
17 |     file: environment.yml
18 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | include AUTHORS.md
 2 | include CHANGELOG.md
 3 | include CONTRIBUTING.md
 4 | include LICENSE
 5 | include README.md
 6 | 
 7 | recursive-include medicare_utils/metadata *.json
 8 | 
 9 | recursive-include tests *
10 | recursive-exclude * __pycache__
11 | recursive-exclude * *.py[co]
12 | 
13 | recursive-include docs *.rst conf.py Makefile make.bat *.jpg *.png *.gif
14 | 


--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
 1 | # http://editorconfig.org
 2 | 
 3 | root = true
 4 | 
 5 | [*]
 6 | indent_style = space
 7 | indent_size = 4
 8 | trim_trailing_whitespace = true
 9 | insert_final_newline = true
10 | charset = utf-8
11 | end_of_line = lf
12 | 
13 | [*.bat]
14 | indent_style = tab
15 | end_of_line = crlf
16 | 
17 | [LICENSE]
18 | insert_final_newline = false
19 | 
20 | [Makefile]
21 | indent_style = tab
22 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | * medicare_utils version:
 2 | * Python version:
 3 | * Operating System:
 4 | 
 5 | ### Description
 6 | 
 7 | Describe what you were trying to get done.
 8 | Tell us what happened, what went wrong, and what you expected to happen.
 9 | 
10 | ### What I Did
11 | 
12 | ```
13 | Paste the command(s) you ran and the output.
14 | If there was a crash, please include the traceback here.
15 | ```
16 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | 
 5 | ---
 6 | 
 7 | #### Code Sample, a copy-pastable example if possible
 8 | 
 9 | ```python
10 | # Your code here
11 | 
12 | ```
13 | #### Problem description
14 | 
15 | [this should explain **why** the current behaviour is a problem and why the expected output is a better solution.]
16 | 
17 | **Note**: Many problems can be resolved by simply upgrading `medicare_utils` to the latest version. Before submitting, please check if that solution works for you.
18 | 
19 | #### Expected Output
20 | 
21 | #### Package Version
22 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | 
 5 | ---
 6 | 
 7 | **Is your feature request related to a problem? Please describe.**
 8 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
 9 | 
10 | **Describe the solution you'd like**
11 | A clear and concise description of what you want to happen.
12 | 
13 | **Describe alternatives you've considered**
14 | A clear and concise description of any alternative solutions or features you've considered.
15 | 
16 | **Additional context**
17 | Add any other context or screenshots about the feature request here.
18 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = python -msphinx
 7 | SPHINXPROJ    = medicare_utils
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/tests/test_codebook.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from medicare_utils import codebook
 3 | 
 4 | class TestCodebook(object):
 5 |     @pytest.fixture(params=['bsfab', 'med', 'opc'])
 6 |     def d(self, request):
 7 |         return codebook(request.param)
 8 | 
 9 |     def test_unique_varnames(self, d):
10 |         varnames = [key for key, val in d.items()]
11 |         assert len(varnames) == len(set(varnames))
12 | 
13 |     def test_dict_keys(self, d):
14 |         varnames = [key for key, val in d.items()]
15 |         for varname in varnames:
16 |             keys = [key for key, val in d[varname].items()]
17 |             assert len(keys) == 2
18 |             assert 'name' in keys
19 |             assert 'values' in keys
20 | 


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
 1 | name: 'medicare_utils'
 2 | channels:
 3 |     - conda-forge
 4 | dependencies:
 5 |     - bumpversion==0.5.3
 6 |     - dask>=0.18.0
 7 |     - fastparquet>=0.1.5
 8 |     - flake8>=3.5.0
 9 |     - ipykernel>=4.8.2
10 |     - joblib
11 |     - mypy
12 |     - numpy>=1.14.5
13 |     - pandas>=0.23.3
14 |     - pip==10.0.1
15 |     - pyarrow>=0.10.0
16 |     - pydocstyle>=2.1.1
17 |     - pytest>=3.6.2
18 |     - python-snappy>=0.5.2
19 |     - python=3.6
20 |     - recommonmark>=0.4.0
21 |     - requests>=2.19.1
22 |     - sphinx>=1.7.5
23 |     - tqdm>=4.23.4
24 |     - wheel==0.30.0
25 |     - yapf>=0.22.0
26 |     - pip:
27 |         - jupyter
28 |         - pymdown-extensions>=4.11
29 |         - python-language-server>=0.19.0
30 |         - requests-html
31 |         - sphinx-autodoc-typehints
32 |         - sphinx-bootstrap-theme
33 | 


--------------------------------------------------------------------------------
/docs/api.rst:
--------------------------------------------------------------------------------
 1 | API
 2 | ===
 3 | 
 4 | medicare\_utils.codebook
 5 | ------------------------
 6 | 
 7 | .. automodule:: medicare_utils.codebook
 8 |     :members:
 9 |     :undoc-members:
10 |     :show-inheritance:
11 | 
12 | medicare\_utils.codes
13 | ---------------------
14 | 
15 | .. automodule:: medicare_utils.codes
16 |     :members:
17 |     :undoc-members:
18 |     :show-inheritance:
19 | 
20 | medicare\_utils.medicare\_df
21 | ----------------------------
22 | 
23 | .. automodule:: medicare_utils.medicare_df
24 |     :members:
25 |     :undoc-members:
26 |     :show-inheritance:
27 | 
28 | medicare\_utils.parquet
29 | -----------------------
30 | 
31 | .. automodule:: medicare_utils.parquet
32 |     :members:
33 |     :undoc-members:
34 |     :show-inheritance:
35 | 
36 | medicare\_utils.utils
37 | ---------------------
38 | 
39 | .. automodule:: medicare_utils.utils
40 |     :members:
41 |     :undoc-members:
42 |     :show-inheritance:
43 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=python -msphinx
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | set SPHINXPROJ=medicare_utils
13 | 
14 | if "%1" == "" goto help
15 | 
16 | %SPHINXBUILD% >NUL 2>NUL
17 | if errorlevel 9009 (
18 | 	echo.
19 | 	echo.The Sphinx module was not found. Make sure you have Sphinx installed,
20 | 	echo.then set the SPHINXBUILD environment variable to point to the full
21 | 	echo.path of the 'sphinx-build' executable. Alternatively you may add the
22 | 	echo.Sphinx directory to PATH.
23 | 	echo.
24 | 	echo.If you don't have Sphinx installed, grab it from
25 | 	echo.http://sphinx-doc.org/
26 | 	exit /b 1
27 | )
28 | 
29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
30 | goto end
31 | 
32 | :help
33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
34 | 
35 | :end
36 | popd
37 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018, Kyle Barron
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | 


--------------------------------------------------------------------------------
/docs/quickstart.md:
--------------------------------------------------------------------------------
 1 | # Quick Start guide
 2 | 
 3 | ## Importing the package
 4 | 
 5 | First, make sure you've [installed](installation.html) `medicare_utils`.
 6 | Then to use the package, you need to import it:
 7 | 
 8 | ```py
 9 | import medicare_utils as med
10 | ```
11 | 
12 | The `as med` means that you can refer to the package as `med` instead of `medicare_utils` from here on.
13 | 
14 | ## Data extracts
15 | 
16 | Data extracts are started with the `med.MedicareDF` function. For example, I can begin an extract using 1% sample data and for the years 2010-2012 with:
17 | ```py
18 | mdf = med.MedicareDF(percent=1, years=range(2010, 2013))
19 | ```
20 | 
21 | Note that the `range` function includes integers up to but not including the second argument.
22 | 
23 | Then I can get a cohort of white women aged 66-75
24 | 
25 | 
26 | In recent years, Python has become the `fastest growing major programming language <https://stackoverflow.blog/2017/09/06/incredible-growth-python/>`_, largely due to its widespread use among data scientists. This popularity has fostered packages that work with data, such as `Pandas <https://pandas.pydata.org/>`_, the standard for in-memory data analysis. A newer package, `Dask <https://dask.pydata.org/en/latest/dataframe.html>`_, has been developed to parallelize Pandas operations and work with data larger than memory.
27 | 


--------------------------------------------------------------------------------
/medicare_utils/metadata/xw/to_json.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import pandas as pd
 3 | 
 4 | def main():
 5 |     """Internal code to convert Jean's crosswalks to JSON files
 6 |     """
 7 |     data_types = ['carc', 'carl', 'dmec', 'dmel', 'hhac', 'hhar', 'hosc',
 8 |         'hosr', 'ipc', 'ipr', 'med', 'opc', 'opr', 'snfc', 'snfr']
 9 |     for data_type in data_types:
10 |         df = pd.read_stata(f'harm{data_type}.dta')
11 |         df = df.sort_values(['cname', 'year'])
12 |         df = df[df['year'] >= 1999]
13 |         df = df[df['cname'] != '']
14 |         xw = {}
15 | 
16 |         for i in range(len(df)):
17 |             dfi = df.iloc[i]
18 | 
19 |             yeari = str(dfi['year'])
20 |             cnamei = dfi['cname']
21 |             namei = dfi['name']
22 |             typei = dfi['type']
23 |             formati = dfi['format']
24 |             labeli = dfi['varlab']
25 | 
26 |             xw[cnamei] = xw.get(cnamei, {})
27 |             xw[cnamei]['desc'] = labeli
28 |             xw[cnamei][yeari] = xw[cnamei].get(yeari, {})
29 |             xw[cnamei][yeari]['name'] = xw[cnamei][yeari].get('name', namei)
30 |             xw[cnamei][yeari]['type'] = xw[cnamei][yeari].get('type', typei)
31 |             xw[cnamei][yeari]['format'] = xw[cnamei][yeari].get(
32 |                 'format', formati)
33 | 
34 |         with open(f'{data_type}.json', 'w') as f:
35 |             json.dump(xw, f, sort_keys=True, indent=4)
36 | 
37 | 
38 | if __name__ == '__main__':
39 |     main()
40 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | .lastlogin
  2 | 
  3 | # Temporary folder
  4 | tmp/
  5 | 
  6 | # Byte-compiled / optimized / DLL files
  7 | __pycache__/
  8 | *.py[cod]
  9 | *$py.class
 10 | 
 11 | # C extensions
 12 | *.so
 13 | 
 14 | # Distribution / packaging
 15 | .Python
 16 | env/
 17 | build/
 18 | develop-eggs/
 19 | dist/
 20 | downloads/
 21 | eggs/
 22 | .eggs/
 23 | lib/
 24 | lib64/
 25 | parts/
 26 | sdist/
 27 | var/
 28 | wheels/
 29 | *.egg-info/
 30 | .installed.cfg
 31 | *.egg
 32 | 
 33 | # PyInstaller
 34 | #  Usually these files are written by a python script from a template
 35 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 36 | *.manifest
 37 | *.spec
 38 | 
 39 | # Installer logs
 40 | pip-log.txt
 41 | pip-delete-this-directory.txt
 42 | 
 43 | # Unit test / coverage reports
 44 | htmlcov/
 45 | .tox/
 46 | .coverage
 47 | .coverage.*
 48 | .cache
 49 | nosetests.xml
 50 | coverage.xml
 51 | *.cover
 52 | .hypothesis/
 53 | .pytest_cache/
 54 | 
 55 | # Translations
 56 | *.mo
 57 | *.pot
 58 | 
 59 | # Django stuff:
 60 | *.log
 61 | local_settings.py
 62 | 
 63 | # Flask stuff:
 64 | instance/
 65 | .webassets-cache
 66 | 
 67 | # Scrapy stuff:
 68 | .scrapy
 69 | 
 70 | # Sphinx documentation
 71 | docs/_build/
 72 | 
 73 | # PyBuilder
 74 | target/
 75 | 
 76 | # Jupyter Notebook
 77 | .ipynb_checkpoints
 78 | 
 79 | # pyenv
 80 | .python-version
 81 | 
 82 | # celery beat schedule file
 83 | celerybeat-schedule
 84 | 
 85 | # SageMath parsed files
 86 | *.sage.py
 87 | 
 88 | # dotenv
 89 | .env
 90 | 
 91 | # virtualenv
 92 | .venv
 93 | venv/
 94 | ENV/
 95 | 
 96 | # Spyder project settings
 97 | .spyderproject
 98 | .spyproject
 99 | 
100 | # Rope project settings
101 | .ropeproject
102 | 
103 | # mkdocs documentation
104 | /site
105 | 
106 | # mypy
107 | .mypy_cache/
108 | 


--------------------------------------------------------------------------------
/docs/installation.md:
--------------------------------------------------------------------------------
 1 | # Installation
 2 | 
 3 | **This package only supports Python 3.6 or higher.** You can find out the version of Python installed by running `python --version` in your terminal. The first two numbers must be 3.6 or 3.7.
 4 | 
 5 | ```
 6 | $ python --version
 7 | Python 3.6.4 :: Anaconda custom (64-bit)
 8 | ```
 9 | 
10 | ## Stable release
11 | 
12 | To install medicare_utils, run this command in your terminal:
13 | 
14 | ```
15 | $ pip install medicare_utils --upgrade
16 | ```
17 | 
18 | This is the preferred method to install medicare_utils, as it will always install the most recent stable release.
19 | 
20 | If you don't have [`pip`](https://pip.pypa.io) installed, I recommend installing the [Anaconda distribution](https://www.anaconda.com/download), which will install a wide variety of helpful data science packages.
21 | Otherwise, this [Python installation guide](http://docs.python-guide.org/en/latest/starting/installation/) can guide you through the process of installing `pip` manually.
22 | 
23 | ## Development version
24 | 
25 | If you want the newest version available, you can install direct from the Github repository with:
26 | ```
27 | $ pip install git+https://github.com/kylebarron/medicare_utils --upgrade
28 | ```
29 | 
30 | ## From sources
31 | 
32 | The sources for medicare_utils can be downloaded from the [Github repo](https://github.com/kylebarron/medicare_utils).
33 | 
34 | You can either clone the public repository:
35 | 
36 | ```
37 | $ git clone git://github.com/kylebarron/medicare_utils
38 | ```
39 | 
40 | Or download the [tarball](https://github.com/kylebarron/medicare_utils/tarball/master):
41 | 
42 | ```
43 | $ curl  -OL https://github.com/kylebarron/medicare_utils/tarball/master
44 | ```
45 | 
46 | Once you have a copy of the source, you can install it with:
47 | 
48 | ```
49 | $ python setup.py install
50 | ```
51 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | """The setup script."""
 5 | 
 6 | from setuptools import setup, find_packages
 7 | 
 8 | with open('README.md') as readme_file:
 9 |     readme = readme_file.read()
10 | 
11 | with open('CHANGELOG.md') as history_file:
12 |     history = history_file.read()
13 | 
14 | with open('requirements.txt') as requirements_file:
15 |     requirements = requirements_file.readlines()
16 |     requirements = [x[:-1] for x in requirements]
17 | 
18 | with open('requirements_dev.txt') as test_requirements_file:
19 |     test_requirements = test_requirements_file.readlines()
20 |     test_requirements = [x[:-1] for x in test_requirements]
21 | 
22 | 
23 | setup_requirements = [
24 |     'setuptools >= 38.6.0',
25 |     'twine >= 1.11.0'
26 | ]
27 | 
28 | setup(
29 |     author="Kyle Barron",
30 |     author_email='barronk@mit.edu',
31 |     classifiers=[
32 |         'Development Status :: 2 - Pre-Alpha',
33 |         'Intended Audience :: Developers',
34 |         'License :: OSI Approved :: MIT License',
35 |         'Natural Language :: English',
36 |         'Programming Language :: Python :: 3',
37 |         'Programming Language :: Python :: 3.4',
38 |         'Programming Language :: Python :: 3.5',
39 |         'Programming Language :: Python :: 3.6',
40 |     ],
41 |     description="Scripts to assist working with Medicare data.",
42 |     install_requires=requirements,
43 |     license="MIT license",
44 |     long_description=readme + '\n\n' + history,
45 |     long_description_content_type='text/markdown',
46 |     include_package_data=True,
47 |     keywords='medicare_utils',
48 |     name='medicare_utils',
49 |     packages=find_packages(include=['medicare_utils']),
50 |     setup_requires=setup_requirements,
51 |     test_suite='tests',
52 |     tests_require=test_requirements,
53 |     url='https://github.com/kylebarron/medicare_utils',
54 |     version='0.1.0',
55 |     zip_safe=False,
56 | )
57 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # medicare_utils
 2 | 
 3 | <!-- [![image](https://img.shields.io/pypi/v/medicare_utils.svg)](https://pypi.python.org/pypi/medicare_utils) -->
 4 | 
 5 | <!-- [![image](https://img.shields.io/travis/kylebarron/medicare_utils.svg)](https://travis-ci.org/kylebarron/medicare_utils) -->
 6 | 
 7 | [![Documentation Status](https://readthedocs.org/projects/medicare-utils/badge/?version=latest)](https://medicare-utils.readthedocs.io/en/latest/?badge=latest)
 8 | 
 9 | Scripts to assist working with Medicare data.
10 | 
11 | -   Free software: MIT license
12 | -   Documentation: [medicare-utils.readthedocs.io](https://medicare-utils.readthedocs.io).
13 | 
14 | ## Features
15 | 
16 | Provides the class `MedicareDF`. This class contains some canned scripts to make common tasks easier. It currently contains two functions:
17 | - `get_cohort()`, which uses the beneficiary summary file to find a set of medicare beneficiaries according to options given to the function.
18 | - `search_for_codes()`, which searches for HCPCS, ICD-9 diagnosis, and/or ICD-9 procedure codes in a given type of file.
19 | 
20 | ## Installation
21 | 
22 | Install the package with:
23 | ```
24 | pip install git+https://github.com/kylebarron/medicare_utils --upgrade
25 | ```
26 | 
27 | You'll also need to manually install the `python-snappy` package. This is easiest with:
28 | ```
29 | conda install -c conda-forge python-snappy
30 | ```
31 | 
32 | Otherwise you need to first install the Snappy C library and then run
33 | ```
34 | pip install python-snappy
35 | ```
36 | 
37 | ## Usage
38 | 
39 | The class is initialized with
40 | ```py
41 | import medicare_utils as med
42 | mdf = med.MedicareDF('05', range(2010, 2013))
43 | mdf.get_cohort(gender='female', ages=range(65, 75))
44 | mdf.search_for_codes(2010, 'med', icd9_diag='41071')
45 | ```
46 | 
47 | It has attributes that refer to different levels of the data, when applicable:
48 | 
49 | - `mdf.pl`: patient-level data. Here the index of the data is `bene_id` for data post-2005, or `ehic` for data pre-2005.
50 | - `mdf.cl`: claim-level data.
51 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: clean clean-test clean-pyc clean-build docs help
 2 | .DEFAULT_GOAL := help
 3 | 
 4 | define BROWSER_PYSCRIPT
 5 | import os, webbrowser, sys
 6 | 
 7 | try:
 8 | 	from urllib import pathname2url
 9 | except:
10 | 	from urllib.request import pathname2url
11 | 
12 | webbrowser.open("file://" + pathname2url(os.path.abspath(sys.argv[1])))
13 | endef
14 | export BROWSER_PYSCRIPT
15 | 
16 | define PRINT_HELP_PYSCRIPT
17 | import re, sys
18 | 
19 | for line in sys.stdin:
20 | 	match = re.match(r'^([a-zA-Z_-]+):.*?## (.*)$$', line)
21 | 	if match:
22 | 		target, help = match.groups()
23 | 		print("%-20s %s" % (target, help))
24 | endef
25 | export PRINT_HELP_PYSCRIPT
26 | 
27 | BROWSER := python -c "$$BROWSER_PYSCRIPT"
28 | 
29 | help:
30 | 	@python -c "$$PRINT_HELP_PYSCRIPT" < $(MAKEFILE_LIST)
31 | 
32 | clean: clean-build clean-pyc clean-test ## remove all build, test, coverage and Python artifacts
33 | 
34 | clean-build: ## remove build artifacts
35 | 	rm -fr build/
36 | 	rm -fr dist/
37 | 	rm -fr .eggs/
38 | 	find . -name '*.egg-info' -exec rm -fr {} +
39 | 	find . -name '*.egg' -exec rm -f {} +
40 | 
41 | clean-pyc: ## remove Python file artifacts
42 | 	find . -name '*.pyc' -exec rm -f {} +
43 | 	find . -name '*.pyo' -exec rm -f {} +
44 | 	find . -name '*~' -exec rm -f {} +
45 | 	find . -name '__pycache__' -exec rm -fr {} +
46 | 
47 | clean-test: ## remove test and coverage artifacts
48 | 	rm -fr .tox/
49 | 	rm -f .coverage
50 | 	rm -fr htmlcov/
51 | 
52 | lint: ## check style with flake8
53 | 	flake8 medicare_utils tests
54 | 
55 | test: ## run tests quickly with the default Python
56 | 	python setup.py test
57 | 
58 | test-all: ## run tests on every Python version with tox
59 | 	tox
60 | 
61 | coverage: ## check code coverage quickly with the default Python
62 | 	coverage run --source medicare_utils setup.py test
63 | 	coverage report -m
64 | 	coverage html
65 | 	$(BROWSER) htmlcov/index.html
66 | 
67 | docs: ## generate Sphinx HTML documentation, including API docs
68 | 	rm -f docs/medicare_utils.rst
69 | 	rm -f docs/modules.rst
70 | 	sphinx-apidoc -o docs/ medicare_utils
71 | 	$(MAKE) -C docs clean
72 | 	$(MAKE) -C docs html
73 | 	$(BROWSER) docs/_build/html/index.html
74 | 
75 | servedocs: docs ## compile the docs watching for changes
76 | 	watchmedo shell-command -p '*.rst' -c '$(MAKE) -C docs html' -R -D .
77 | 
78 | release: clean ## package and upload a release
79 | 	python setup.py sdist upload
80 | 	python setup.py bdist_wheel upload
81 | 
82 | dist: clean ## builds source and wheel package
83 | 	python setup.py sdist
84 | 	python setup.py bdist_wheel
85 | 	ls -l dist
86 | 
87 | install: clean ## install the package to the active Python's site-packages
88 | 	python setup.py install
89 | 


--------------------------------------------------------------------------------
/medicare_utils/codebook.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import pkg_resources as pkg
 3 | 
 4 | 
 5 | def codebook(data_type: str) -> dict:
 6 |     """Load variable codebook
 7 | 
 8 |     Args:
 9 |         data_type:
10 | 
11 |             Type of file to get codebook for
12 | 
13 |             - ``bsfab`` (`Beneficiary Summary File, Base segment`_)
14 |             - ``med``   (`MedPAR File`_)
15 |             - ``opc``   (`Outpatient File, Claims segment`_)
16 | 
17 |             .. _`Beneficiary Summary File, Base segment`: https://kylebarron.github.io/medicare-documentation/resdac/mbsf/#base-abcd-segment_2
18 |             .. _`MedPAR File`: https://kylebarron.github.io/medicare-documentation/resdac/medpar-rif/#medpar-rif_1
19 |             .. _`Outpatient File, Claims segment`: https://kylebarron.github.io/medicare-documentation/resdac/op-rif/#outpatient-rif_1
20 | 
21 |     Returns:
22 |         ``dict`` with variable names as keys; values are another ``dict``. This
23 |         inner ``dict`` has two keys: ``name``, where the value is the
24 |         descriptive name of the variable, and ``values``, which is itself a
25 |         ``dict`` with variable values as keys and value descriptions as values
26 |         of the ``dict``.
27 |     Examples:
28 |         To get the labels of the values of ``clm_type``, in the ``med`` file,
29 |         you could do
30 | 
31 |         .. code-block:: python
32 | 
33 |             >>> import medicare_utils as med
34 |             >>> cbk = med.codebook('med')['clm_type']['values']
35 | 
36 |         Now ``cbk`` is a ``dict`` where the keys are the values
37 |         the variable can take, and the values are the labels of
38 |         the variable's values.
39 | 
40 |         .. code-block:: python
41 | 
42 |             >>> from pprint import pprint
43 |             >>> pprint(cbk)
44 |             {'10': 'HHA claim',
45 |              '20': 'Non swing bed SNF claim',
46 |              '30': 'Swing bed SNF claim',
47 |              '40': 'Outpatient claim',
48 |              '50': 'Hospice claim',
49 |              '60': 'Inpatient claim',
50 |              '61': "Inpatient 'Full-Encounter' claim",
51 |              '62': 'Medicare Advantage IME/GME claims',
52 |              '63': 'Medicare Advantage (no-pay) claims',
53 |              '64': 'Medicare Advantage (paid as FFS) claim',
54 |              '71': 'RIC O local carrier non-DMEPOS claim',
55 |              '72': 'RIC O local carrier DMEPOS claim',
56 |              '81': 'RIC M DMERC non-DMEPOS claim',
57 |              '82': 'RIC M DMERC DMEPOS claim'}
58 |     """
59 |     path = pkg.resource_filename(
60 |         'medicare_utils', f'metadata/codebook/{data_type}.json')
61 | 
62 |     try:
63 |         with open(path) as f:
64 |             data = json.load(f)
65 |     except FileNotFoundError:
66 |         raise NotImplementedError(f'Haven\'t added {data_type} codebook yet')
67 | 
68 |     return data
69 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | Welcome to medicare_utils's documentation!
 2 | ==========================================
 3 | 
 4 | A Python package to make working with Medicare data easier.
 5 | 
 6 | .. toctree::
 7 |    :maxdepth: 1
 8 |    :caption: Contents:
 9 | 
10 |    installation.md
11 |    quickstart.md
12 |    usage.md
13 |    api.rst
14 |    authors.md
15 |    contributing.md
16 |    history.md
17 | 
18 | Introduction
19 | ------------
20 | 
21 | Medicare data are large and unwieldy. Since the size of the data is often larger than memory, many people use SAS. However SAS is an ugly language and not enjoyable to work with. Python is an easier and faster alternative.
22 | 
23 | 
24 | Creating a data extract can be done in three lines of code:
25 | 
26 | .. code-block:: python
27 | 
28 |     import re
29 |     import medicare_utils as med
30 |     mdf = med.MedicareDF(
31 |         percent='100',
32 |         years=range(2008, 2014))
33 |     mdf.get_cohort(
34 |         gender='male',
35 |         ages=range(65, 75),
36 |         buyin_val=['3', 'C'],
37 |         join='outer',
38 |         keep_vars=['bene_dob'])
39 |     mdf.search_for_codes(
40 |         data_types=['med', 'opc'],
41 |         icd9_dx=re.compile(r'^410'),
42 |         icd9_dx_max_cols=1,
43 |         collapse_codes=True,
44 |         keep_vars={'med': ['medparid', 'admsndt', 'dschrgdt']},
45 |         rename={'icd9_dx': 'ami'})
46 | 
47 | The resulting data extract consists of a patient-level file of patients who are:
48 | 
49 | - Male
50 | - Aged 65-74 (inclusive) in any year from 2008 to 2013
51 | - Continuously enrolled in fee-for-service Medicare in any year from 2008 to 2013 (i.e. :code:`buyin_val` either :code:`3` or :code:`C`)
52 | 
53 | and a claim-level file of patients who were included in the above cohort and furthermore had a primary diagnosis code of AMI in either the `MedPAR <https://kylebarron.github.io/medicare-documentation/resdac/medpar-rif/>`_ or `Outpatient claims <https://kylebarron.github.io/medicare-documentation/resdac/op-rif/>`_ files. The patient-level file is accessed with :code:`mdf.pl` and the claim-level file is accessed with :code:`mdf.cl`.
54 | 
55 | This package also provides:
56 | 
57 | - Classes to work with NPI, ICD-9, and HCPCS codes. These commands will automatically download these data files for you. [#copyright]_
58 | - Codebooks for values of categorical variables.
59 | - A simple interface to convert data files from Stata format to the modern Parquet format.
60 | 
61 | This documentation aims to walk through everything needed to run these routines. Then you can keep working with these extracts in Python or easily export them to Stata's :code:`.dta` format. Head to the :doc:`Quick Start guide </quickstart>` to get started.
62 | 
63 | Caveats
64 | -------
65 | 
66 | This package contains no Medicare data or private information. It assumes you already have access to Medicare data.
67 | 
68 | This package was originally developed for use on the National Bureau of Economic
69 | Research's servers, but portions of the package may be useful for third parties
70 | as well.
71 | 
72 | 
73 | Indices and tables
74 | ------------------
75 | 
76 | * :ref:`genindex`
77 | * :ref:`modindex`
78 | * :ref:`search`
79 | 
80 | .. rubric:: Footnotes
81 | 
82 | .. [#copyright] Datasets with HCPCS codes and short descriptions from 2003 to the present are freely available on the CMS website in their `Relative Value Files <https://www.cms.gov/Medicare/Medicare-Fee-for-Service-Payment/PhysicianFeeSched/PFS-Relative-Value-Files.html>`_. These CMS files are released under the `End User Point and Click Agreement <https://www.cms.gov/apps/aha/license.asp?file=/Medicare/Medicare->`_. In order to not run afoul of this license agreement, this package does not distribute HCPCS codes. Rather, it provides code for the user to download and work with them. By using the HCPCS functions in this package, you agree to the above Agreement.
83 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
  1 | # Contributing
  2 | 
  3 | Contributions are welcome, and they are greatly appreciated! Every
  4 | little bit helps, and credit will always be given.
  5 | 
  6 | You can contribute in many ways:
  7 | 
  8 | ## Types of Contributions
  9 | 
 10 | ### Report Bugs
 11 | 
 12 | Report bugs at <https://github.com/kylebarron/medicare_utils/issues>.
 13 | 
 14 | If you are reporting a bug, please include:
 15 | 
 16 | -   Your operating system name and version.
 17 | -   Any details about your local setup that might be helpful in
 18 |     troubleshooting.
 19 | -   Detailed steps to reproduce the bug.
 20 | 
 21 | ### Fix Bugs or Implement Features
 22 | 
 23 | Look through the GitHub issues for bugs. Most issues are open to whoever wants to implement it, but comment on it so that I know you're working on it.
 24 | 
 25 | ### Write Documentation
 26 | 
 27 | medicare_utils could always use more documentation, whether as part of
 28 | the official medicare_utils docs, in docstrings, or even on the web in
 29 | blog posts, articles, and such.
 30 | 
 31 | ### Submit Feedback
 32 | 
 33 | The best way to send feedback is to file an issue at
 34 | <https://github.com/kylebarron/medicare_utils/issues>.
 35 | 
 36 | If you are proposing a feature:
 37 | 
 38 | -   Explain in detail how it would work.
 39 | -   Keep the scope as narrow as possible, to make it easier to
 40 |     implement.
 41 | -   Remember that this is a volunteer-driven project, and that
 42 |     contributions are welcome :)
 43 | 
 44 | ## Get Started!
 45 | 
 46 | Ready to contribute? Here's how to set up medicare_utils for local
 47 | development.
 48 | 
 49 | 1.  Fork the medicare_utils repo on GitHub.
 50 | 2.  Clone your fork locally:
 51 | 
 52 |     ```
 53 |     $ git clone git@github.com:your_github_user/medicare_utils.git
 54 |     ```
 55 | 
 56 | 3.  Install your local copy into a Conda environment. If you don't have Conda installed, install [Anaconda](https://www.anaconda.com/download/) or [Miniconda](https://conda.io/miniconda.html) first. Then set up your fork for local development with:
 57 | 
 58 |     ```
 59 |     $ cd medicare_utils/
 60 |     $ conda create env -f environment.yml
 61 |     $ source activate medicare_utils
 62 |     $ python setup.py develop
 63 |     ```
 64 | 
 65 | 4.  Create a branch for local development:
 66 | 
 67 |     ```
 68 |     $ git checkout -b name-of-your-bugfix-or-feature
 69 |     ```
 70 | 
 71 |     Now you can make your changes locally.
 72 | 
 73 | 5.  When you're done making changes, check that your changes pass
 74 |     flake8 and the tests, including testing other Python versions with
 75 |     tox:
 76 | 
 77 |     ```
 78 |     $ flake8 medicare_utils tests
 79 |     $ python setup.py test or py.test
 80 |     $ tox
 81 |     ```
 82 | 
 83 | 6.  Commit your changes and push your branch to GitHub:
 84 | 
 85 |     ```
 86 |     $ git add .
 87 |     $ git commit -m "Your detailed description of your changes."
 88 |     $ git push origin name-of-your-bugfix-or-feature
 89 |     ```
 90 | 
 91 | 7.  Submit a pull request through the GitHub website.
 92 | 
 93 | ## Pull Request Guidelines
 94 | 
 95 | Before you submit a pull request, check that it meets these guidelines:
 96 | 
 97 | 1.  The pull request should include tests.
 98 | 2.  If the pull request adds functionality, the docs should be updated.
 99 |     Put your new functionality into a function with a docstring, and add
100 |     the feature to the list in README.rst.
101 | 3.  The pull request should work for Python 3.6 and 3.7. Check
102 |     <https://travis-ci.org/kylebarron/medicare_utils/pull_requests> and
103 |     make sure that the tests pass for all supported Python versions.
104 | 
105 | ## Tips
106 | 
107 | To run a subset of tests:
108 | 
109 |     $ python -m unittest tests.test_medicare_utils
110 | 
111 | ## Deploying
112 | 
113 | A reminder for the maintainers on how to deploy. Make sure all your
114 | changes are committed (including an entry in `CHANGELOG.md`). Then run:
115 | 
116 | ```bash
117 | $ bumpversion patch # possible: major / minor / patch
118 | $ git push
119 | $ git push --tags
120 | ```
121 | 
122 | Travis will then deploy to PyPI if tests pass.
123 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | # medicare_utils documentation build configuration file, created by
  5 | # sphinx-quickstart on Fri Jun  9 13:47:02 2017.
  6 | #
  7 | # This file is execfile()d with the current directory set to its
  8 | # containing dir.
  9 | #
 10 | # Note that not all possible configuration values are present in this
 11 | # autogenerated file.
 12 | #
 13 | # All configuration values have a default; values that are commented out
 14 | # serve to show the default.
 15 | 
 16 | # If extensions (or modules to document with autodoc) are in another
 17 | # directory, add these directories to sys.path here. If the directory is
 18 | # relative to the documentation root, use os.path.abspath to make it
 19 | # absolute, like shown here.
 20 | #
 21 | import os
 22 | import sys
 23 | sys.path.insert(0, os.path.abspath('..'))
 24 | 
 25 | import sphinx_bootstrap_theme
 26 | import medicare_utils
 27 | 
 28 | # -- General configuration ---------------------------------------------
 29 | 
 30 | # If your documentation needs a minimal Sphinx version, state it here.
 31 | #
 32 | # needs_sphinx = '1.0'
 33 | 
 34 | # Add any Sphinx extension module names here, as strings. They can be
 35 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
 36 | extensions = [
 37 |     'sphinx.ext.autodoc',
 38 |     'sphinx.ext.coverage',
 39 |     'sphinx.ext.doctest',
 40 |     'sphinx.ext.githubpages',
 41 |     'sphinx.ext.mathjax',
 42 |     'sphinx.ext.napoleon',
 43 |     'sphinx.ext.viewcode',
 44 |     # Must be loaded after napoleon
 45 |     'sphinx_autodoc_typehints'
 46 | ]
 47 | 
 48 | napoleon_google_docstring = True
 49 | # Must be True to work with sphinx_autodoc_typehints
 50 | napoleon_use_param = True
 51 | napoleon_use_ivar = True
 52 | 
 53 | # Add any paths that contain templates here, relative to this directory.
 54 | templates_path = ['_templates']
 55 | 
 56 | # The suffix(es) of source filenames.
 57 | # You can specify multiple suffix as a list of string:
 58 | #
 59 | source_suffix = ['.rst', '.md']
 60 | 
 61 | # The master toctree document.
 62 | master_doc = 'index'
 63 | 
 64 | # General information about the project.
 65 | project = u'medicare_utils'
 66 | copyright = u"2018, Kyle Barron"
 67 | author = u"Kyle Barron"
 68 | 
 69 | # The version info for the project you're documenting, acts as replacement
 70 | # for |version| and |release|, also used in various other places throughout
 71 | # the built documents.
 72 | #
 73 | # The short X.Y version.
 74 | version = medicare_utils.__version__
 75 | # The full version, including alpha/beta/rc tags.
 76 | release = medicare_utils.__version__
 77 | 
 78 | # The language for content autogenerated by Sphinx. Refer to documentation
 79 | # for a list of supported languages.
 80 | #
 81 | # This is also used if you do content translation via gettext catalogs.
 82 | # Usually you set "language" from the command line for these cases.
 83 | language = None
 84 | 
 85 | # List of patterns, relative to source directory, that match files and
 86 | # directories to ignore when looking for source files.
 87 | # This patterns also effect to html_static_path and html_extra_path
 88 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
 89 | 
 90 | # The name of the Pygments (syntax highlighting) style to use.
 91 | pygments_style = 'sphinx'
 92 | 
 93 | # If true, `todo` and `todoList` produce output, else they produce nothing.
 94 | todo_include_todos = False
 95 | 
 96 | 
 97 | # -- Options for HTML output -------------------------------------------
 98 | 
 99 | # The theme to use for HTML and HTML Help pages.  See the documentation for
100 | # a list of builtin themes.
101 | #
102 | html_theme = 'bootstrap'
103 | html_theme_path = sphinx_bootstrap_theme.get_html_theme_path()
104 | 
105 | # Theme options are theme-specific and customize the look and feel of a
106 | # theme further.  For a list of options available for each theme, see the
107 | # documentation.
108 | #
109 | # html_theme_options = {}
110 | 
111 | # Add any paths that contain custom static files (such as style sheets) here,
112 | # relative to this directory. They are copied after the builtin static files,
113 | # so a file named "default.css" will overwrite the builtin "default.css".
114 | html_static_path = ['_static']
115 | 
116 | 
117 | # -- Options for HTMLHelp output ---------------------------------------
118 | 
119 | # Output file base name for HTML help builder.
120 | htmlhelp_basename = 'medicare_utilsdoc'
121 | 
122 | 
123 | # -- Options for LaTeX output ------------------------------------------
124 | 
125 | latex_elements = {
126 |     # The paper size ('letterpaper' or 'a4paper').
127 |     #
128 |     # 'papersize': 'letterpaper',
129 | 
130 |     # The font size ('10pt', '11pt' or '12pt').
131 |     #
132 |     # 'pointsize': '10pt',
133 | 
134 |     # Additional stuff for the LaTeX preamble.
135 |     #
136 |     # 'preamble': '',
137 | 
138 |     # Latex figure (float) alignment
139 |     #
140 |     # 'figure_align': 'htbp',
141 | }
142 | 
143 | # Grouping the document tree into LaTeX files. List of tuples
144 | # (source start file, target name, title, author, documentclass
145 | # [howto, manual, or own class]).
146 | latex_documents = [
147 |     (master_doc, 'medicare_utils.tex',
148 |      u'medicare_utils Documentation',
149 |      u'Kyle Barron', 'manual'),
150 | ]
151 | 
152 | 
153 | # -- Options for manual page output ------------------------------------
154 | 
155 | # One entry per manual page. List of tuples
156 | # (source start file, name, description, authors, manual section).
157 | man_pages = [
158 |     (master_doc, 'medicare_utils',
159 |      u'medicare_utils Documentation',
160 |      [author], 1)
161 | ]
162 | 
163 | 
164 | # -- Options for Texinfo output ----------------------------------------
165 | 
166 | # Grouping the document tree into Texinfo files. List of tuples
167 | # (source start file, target name, title, author,
168 | #  dir menu entry, description, category)
169 | texinfo_documents = [
170 |     (master_doc, 'medicare_utils',
171 |      u'medicare_utils Documentation',
172 |      author,
173 |      'medicare_utils',
174 |      'One line description of project.',
175 |      'Miscellaneous'),
176 | ]
177 | 
178 | source_parsers = {
179 |    '.md': 'recommonmark.parser.CommonMarkParser',
180 | }
181 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
  1 | [bumpversion]
  2 | current_version = 0.0.1
  3 | commit = True
  4 | tag = True
  5 | 
  6 | [bumpversion:file:setup.py]
  7 | search = version='{current_version}'
  8 | replace = version='{new_version}'
  9 | 
 10 | [bumpversion:file:medicare_utils/__init__.py]
 11 | search = __version__ = '{current_version}'
 12 | replace = __version__ = '{new_version}'
 13 | 
 14 | [bdist_wheel]
 15 | universal = 1
 16 | 
 17 | [flake8]
 18 | exclude = docs
 19 | 
 20 | [aliases]
 21 | # Define setup.py command aliases here
 22 | 
 23 | [pycodestyle]
 24 | max-line-length = 80
 25 | 
 26 | [yapf]
 27 | # Align closing bracket with visual indentation.
 28 | align_closing_bracket_with_visual_indent=False
 29 | 
 30 | # Allow dictionary keys to exist on multiple lines. For example:
 31 | #
 32 | #   x = {
 33 | #       ('this is the first element of a tuple',
 34 | #        'this is the second element of a tuple'):
 35 | #            value,
 36 | #   }
 37 | allow_multiline_dictionary_keys=False
 38 | 
 39 | # Allow lambdas to be formatted on more than one line.
 40 | allow_multiline_lambdas=False
 41 | 
 42 | # Allow splits before the dictionary value.
 43 | allow_split_before_dict_value=True
 44 | 
 45 | # Insert a blank line before a class-level docstring.
 46 | blank_line_before_class_docstring=False
 47 | 
 48 | # Insert a blank line before a 'def' or 'class' immediately nested
 49 | # within another 'def' or 'class'. For example:
 50 | #
 51 | #   class Foo:
 52 | #                      # <------ this blank line
 53 | #     def method():
 54 | #       ...
 55 | blank_line_before_nested_class_or_def=False
 56 | 
 57 | # Do not split consecutive brackets. Only relevant when
 58 | # dedent_closing_brackets is set. For example:
 59 | #
 60 | #    call_func_that_takes_a_dict(
 61 | #        {
 62 | #            'key1': 'value1',
 63 | #            'key2': 'value2',
 64 | #        }
 65 | #    )
 66 | #
 67 | # would reformat to:
 68 | #
 69 | #    call_func_that_takes_a_dict({
 70 | #        'key1': 'value1',
 71 | #        'key2': 'value2',
 72 | #    })
 73 | coalesce_brackets=True
 74 | 
 75 | # The column limit.
 76 | column_limit=80
 77 | 
 78 | # Indent width used for line continuations.
 79 | continuation_indent_width=4
 80 | 
 81 | # Put closing brackets on a separate line, dedented, if the bracketed
 82 | # expression can't fit in a single line. Applies to all kinds of brackets,
 83 | # including function definitions and calls. For example:
 84 | #
 85 | #   config = {
 86 | #       'key1': 'value1',
 87 | #       'key2': 'value2',
 88 | #   }        # <--- this bracket is dedented and on a separate line
 89 | #
 90 | #   time_series = self.remote_client.query_entity_counters(
 91 | #       entity='dev3246.region1',
 92 | #       key='dns.query_latency_tcp',
 93 | #       transform=Transformation.AVERAGE(window=timedelta(seconds=60)),
 94 | #       start_ts=now()-timedelta(days=3),
 95 | #       end_ts=now(),
 96 | #   )        # <--- this bracket is dedented and on a separate line
 97 | dedent_closing_brackets=False
 98 | 
 99 | # Place each dictionary entry onto its own line.
100 | each_dict_entry_on_separate_line=True
101 | 
102 | # The regex for an i18n comment. The presence of this comment stops
103 | # reformatting of that line, because the comments are required to be
104 | # next to the string they translate.
105 | i18n_comment=
106 | 
107 | # The i18n function call names. The presence of this function stops
108 | # reformattting on that line, because the string it has cannot be moved
109 | # away from the i18n comment.
110 | i18n_function_call=
111 | 
112 | # Indent the dictionary value if it cannot fit on the same line as the
113 | # dictionary key. For example:
114 | #
115 | #   config = {
116 | #       'key1':
117 | #           'value1',
118 | #       'key2': value1 +
119 | #               value2,
120 | #   }
121 | indent_dictionary_value=True
122 | 
123 | # The number of columns to use for indentation.
124 | indent_width=4
125 | 
126 | # Join short lines into one line. E.g., single line 'if' statements.
127 | join_multiple_lines=True
128 | 
129 | # Do not include spaces around selected binary operators. For example:
130 | #
131 | #   1 + 2 * 3 - 4 / 5
132 | #
133 | # will be formatted as follows when configured with a value "*,/":
134 | #
135 | #   1 + 2*3 - 4/5
136 | #
137 | no_spaces_around_selected_binary_operators=set()
138 | 
139 | # Use spaces around default or named assigns.
140 | spaces_around_default_or_named_assign=False
141 | 
142 | # Use spaces around the power operator.
143 | spaces_around_power_operator=True
144 | 
145 | # The number of spaces required before a trailing comment.
146 | spaces_before_comment=2
147 | 
148 | # Insert a space between the ending comma and closing bracket of a list,
149 | # etc.
150 | space_between_ending_comma_and_closing_bracket=True
151 | 
152 | # Split before arguments if the argument list is terminated by a
153 | # comma.
154 | split_arguments_when_comma_terminated=False
155 | 
156 | # Set to True to prefer splitting before '&', '|' or '^' rather than
157 | # after.
158 | split_before_bitwise_operator=True
159 | 
160 | split_before_closing_bracket=False
161 | 
162 | # Split before a dictionary or set generator (comp_for). For example, note
163 | # the split before the 'for':
164 | #
165 | #   foo = {
166 | #       variable: 'Hello world, have a nice day!'
167 | #       for variable in bar if variable != 42
168 | #   }
169 | split_before_dict_set_generator=True
170 | 
171 | # Split after the opening paren which surrounds an expression if it doesn't
172 | # fit on a single line.
173 | split_before_expression_after_opening_paren=False
174 | 
175 | # If an argument / parameter list is going to be split, then split before
176 | # the first argument.
177 | split_before_first_argument=True
178 | 
179 | # Set to True to prefer splitting before 'and' or 'or' rather than
180 | # after.
181 | split_before_logical_operator=True
182 | 
183 | # Split named assignments onto individual lines.
184 | split_before_named_assigns=False
185 | 
186 | # Set to True to split list comprehensions and generators that have
187 | # non-trivial expressions and multiple clauses before each of these
188 | # clauses. For example:
189 | #
190 | #   result = [
191 | #       a_long_var + 100 for a_long_var in xrange(1000)
192 | #       if a_long_var % 10]
193 | #
194 | # would reformat to something like:
195 | #
196 | #   result = [
197 | #       a_long_var + 100
198 | #       for a_long_var in xrange(1000)
199 | #       if a_long_var % 10]
200 | split_complex_comprehension=True
201 | 
202 | # The penalty for splitting right after the opening bracket.
203 | split_penalty_after_opening_bracket=30
204 | 
205 | # The penalty for splitting the line after a unary operator.
206 | split_penalty_after_unary_operator=10000
207 | 
208 | # The penalty for splitting right before an if expression.
209 | split_penalty_before_if_expr=30
210 | 
211 | # The penalty of splitting the line around the '&', '|', and '^'
212 | # operators.
213 | split_penalty_bitwise_operator=300
214 | 
215 | # The penalty for splitting a list comprehension or generator
216 | # expression.
217 | split_penalty_comprehension=80
218 | 
219 | # The penalty for characters over the column limit.
220 | split_penalty_excess_character=4500
221 | 
222 | # The penalty incurred by adding a line split to the unwrapped line. The
223 | # more line splits added the higher the penalty.
224 | split_penalty_for_added_line_split=30
225 | 
226 | # The penalty of splitting a list of "import as" names. For example:
227 | #
228 | #   from a_very_long_or_indented_module_name_yada_yad import (long_argument_1,
229 | #                                                             long_argument_2,
230 | #                                                             long_argument_3)
231 | #
232 | # would reformat to something like:
233 | #
234 | #   from a_very_long_or_indented_module_name_yada_yad import (
235 | #       long_argument_1, long_argument_2, long_argument_3)
236 | split_penalty_import_names=0
237 | 
238 | # The penalty of splitting the line around the 'and' and 'or'
239 | # operators.
240 | split_penalty_logical_operator=300
241 | 
242 | # Use the Tab character for indentation.
243 | use_tabs=False
244 | 
245 | 


--------------------------------------------------------------------------------
/medicare_utils/utils.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python3
  2 | from pathlib import Path
  3 | from textwrap import dedent, fill
  4 | from typing import Union
  5 | 
  6 | allowed_pcts = ['0001', '01', '05', '20', '100']
  7 | pct_dict = {0.01: '0001', 1: '01', 5: '05', 20: '20', 100: '100'}
  8 | 
  9 | 
 10 | def pq_vars(ParquetFile):
 11 |     return ParquetFile.schema.names
 12 | 
 13 | 
 14 | def _mywrap(text: str) -> str:
 15 |     text = dedent(text)
 16 |     lines = text.split('\n')
 17 |     lines = [
 18 |         fill(x, replace_whitespace=False, subsequent_indent='    ')
 19 |         for x in lines]
 20 |     text = '\n'.join(lines)
 21 |     return text
 22 | 
 23 | 
 24 | def fpath(percent, year, data_type, root_path, extension, new_style):
 25 |     """Generate path to Medicare files
 26 | 
 27 |     Args:
 28 |         percent:
 29 |             percent sample of data. Can be {'0001', '01', '05', '20', '100'}
 30 |         year: year of data.
 31 |         data_type:
 32 |             desired type of file
 33 | 
 34 |             - ``bsfab`` (`Beneficiary Summary File, Base segment`_)
 35 |             - ``bsfcc`` (`Beneficiary Summary File, Chronic Conditions segment`_)
 36 |             - ``bsfcu`` (`Beneficiary Summary File, Cost & Use segment`_)
 37 |             - ``bsfd``  (`Beneficiary Summary File, National Death Index segment`_)
 38 |             - ``carc``  (`Carrier File, Claims segment`_)
 39 |             - ``carl``  (`Carrier File, Line segment`_)
 40 |             - ``den``   (Denominator File)
 41 |             - ``dmec``  (`Durable Medical Equipment File, Claims segment`_)
 42 |             - ``dmel``  (`Durable Medical Equipment File, Line segment`_)
 43 |             - ``hhac``  (`Home Health Agency File, Claims segment`_)
 44 |             - ``hhar``  (`Home Health Agency File, Revenue Center segment`_)
 45 |             - ``hosc``  (`Hospice File, Claims segment`_)
 46 |             - ``hosr``  (`Hospice File, Revenue Center segment`_)
 47 |             - ``ipc``   (`Inpatient File, Claims segment`_)
 48 |             - ``ipr``   (`Inpatient File, Revenue Center segment`_)
 49 |             - ``med``   (`MedPAR File`_)
 50 |             - ``opc``   (`Outpatient File, Claims segment`_)
 51 |             - ``opr``   (`Outpatient File, Revenue Center segment`_)
 52 |             - ``snfc``  (`Skilled Nursing Facility File, Claims segment`_)
 53 |             - ``snfr``  (`Skilled Nursing Facility File, Revenue Center segment`_)
 54 |             - ``xw``    (Crosswalks files for ``ehic`` - ``bene_id``)
 55 |             - ``xw_bsf`` (Crosswalks files for ``ehic`` - ``bene_id``)
 56 | 
 57 |             .. _`Beneficiary Summary File, Base segment`: https://kylebarron.github.io/medicare-documentation/resdac/mbsf/#base-abcd-segment_2
 58 |             .. _`Beneficiary Summary File, Chronic Conditions segment`: https://kylebarron.github.io/medicare-documentation/resdac/mbsf/#chronic-conditions-segment_2
 59 |             .. _`Beneficiary Summary File, Cost & Use segment`: https://kylebarron.github.io/medicare-documentation/resdac/mbsf/#cost-and-use-segment_1
 60 |             .. _`Beneficiary Summary File, National Death Index segment`: https://kylebarron.github.io/medicare-documentation/resdac/mbsf/#national-death-index-segment_1
 61 |             .. _`Carrier File, Claims segment`: https://kylebarron.github.io/medicare-documentation/resdac/carrier-rif/#carrier-rif_1
 62 |             .. _`Carrier File, Line segment`: https://kylebarron.github.io/medicare-documentation/resdac/carrier-rif/#line-file
 63 |             .. _`Durable Medical Equipment File, Claims segment`: https://kylebarron.github.io/medicare-documentation/resdac/dme-rif/#durable-medical-equipment-rif_1
 64 |             .. _`Durable Medical Equipment File, Line segment`: https://kylebarron.github.io/medicare-documentation/resdac/dme-rif/#line-file
 65 |             .. _`Home Health Agency File, Claims segment`: https://kylebarron.github.io/medicare-documentation/resdac/hha-rif/#home-health-agency-rif_1
 66 |             .. _`Home Health Agency File, Revenue Center segment`: https://kylebarron.github.io/medicare-documentation/resdac/hha-rif/#revenue-center-file
 67 |             .. _`Hospice File, Claims segment`: https://kylebarron.github.io/medicare-documentation/resdac/hospice-rif/#hospice-rif_1
 68 |             .. _`Hospice File, Revenue Center segment`: https://kylebarron.github.io/medicare-documentation/resdac/hospice-rif/#revenue-center-file
 69 |             .. _`Inpatient File, Claims segment`: https://kylebarron.github.io/medicare-documentation/resdac/ip-rif/#inpatient-rif_1
 70 |             .. _`Inpatient File, Revenue Center segment`: https://kylebarron.github.io/medicare-documentation/resdac/ip-rif/#revenue-center-file
 71 |             .. _`MedPAR File`: https://kylebarron.github.io/medicare-documentation/resdac/medpar-rif/#medpar-rif_1
 72 |             .. _`Outpatient File, Claims segment`: https://kylebarron.github.io/medicare-documentation/resdac/op-rif/#outpatient-rif_1
 73 |             .. _`Outpatient File, Revenue Center segment`: https://kylebarron.github.io/medicare-documentation/resdac/op-rif/#revenue-center-file
 74 |             .. _`Skilled Nursing Facility File, Claims segment`: https://kylebarron.github.io/medicare-documentation/resdac/snf-rif/#skilled-nursing-facility-rif_1
 75 |             .. _`Skilled Nursing Facility File, Revenue Center segment`: https://kylebarron.github.io/medicare-documentation/resdac/snf-rif/#revenue-center-file
 76 | 
 77 |         root_path: top of tree for file path
 78 |         extension: file extension
 79 |         new_style:
 80 |             If False, matches the file names at /disk/aging/medicare/data, if
 81 |             True, uses simplified directory structure.
 82 |     Returns:
 83 |         (str) path to file
 84 |     """
 85 | 
 86 |     # Check types
 87 |     if type(data_type) != str:
 88 |         raise TypeError('data_type must be str')
 89 | 
 90 |     try:
 91 |         year = int(year)
 92 |     except ValueError:
 93 |         raise TypeError('Invalid year provided')
 94 | 
 95 |     allowed_pcts = ['0001', '01', '05', '20', '100']
 96 |     if percent not in allowed_pcts:
 97 |         msg = f'percent must be one of: {allowed_pcts}'
 98 |         raise ValueError(msg)
 99 | 
100 |     if extension == '':
101 |         raise ValueError('Must provide valid extension')
102 | 
103 |     if extension[0] != '.':
104 |         extension = '.' + extension
105 | 
106 |     root_path = Path(root_path).expanduser().resolve()
107 |     root_path /= f'{percent}pct'
108 |     if data_type in ['bsfab', 'bsfcc', 'bsfcu', 'bsfd', 'carc', 'carl', 'den',
109 |                      'dmec', 'dmel', 'hhac', 'hhar', 'hosc', 'hosr', 'med',
110 |                      'snfc', 'snfr']:
111 |         root_path /= data_type[:3]
112 |     elif data_type == 'xw_bsf' and not new_style:
113 |         root_path /= 'bsf'
114 |     else:
115 |         root_path /= data_type[:2]
116 | 
117 |     if new_style:
118 |         root_path /= f'{year}'
119 |         if data_type == 'xw':
120 |             root_path /= f'ehicbenex_one{year}{extension}'
121 |         elif data_type == 'xw_bsf':
122 |             root_path /= f'ehicbenex_unique{year}{extension}'
123 |         else:
124 |             root_path /= f'{data_type}{year}{extension}'
125 |     else:
126 |         root_path /= f'{year}'
127 |         if data_type in ['den', 'dmec', 'dmel', 'hhac', 'hhar', 'hosc', 'hosr',
128 |                          'med', 'snfc', 'snfr']:
129 |             root_path /= f'{data_type}{year}{extension}'
130 | 
131 |         elif data_type in ['bsfab', 'bsfcc', 'bsfcu', 'bsfd']:
132 |             root_path /= f'1/{data_type}{year}{extension}'
133 | 
134 |         elif data_type in ['carc', 'carl', 'ipc', 'ipr']:
135 |             if year >= 2002:
136 |                 root_path /= f'{data_type}{year}{extension}'
137 |             else:
138 |                 root_path /= f'{data_type[:-1]}{year}{extension}'
139 | 
140 |         elif data_type in ['opc', 'opr']:
141 |             if year >= 2001:
142 |                 root_path /= f'{data_type}{year}{extension}'
143 |             else:
144 |                 root_path /= f'{data_type[:-1]}{year}{extension}'
145 | 
146 |         elif data_type == 'xw':
147 |             root_path /= f'ehicbenex_one{year}{extension}'
148 | 
149 |         elif data_type == 'xw_bsf':
150 |             root_path /= f'xw/ehicbenex_unique{year}{extension}'
151 | 
152 |         else:
153 |             raise ValueError(f'Invalid data_type: {data_type}')
154 | 
155 |     return str(root_path)
156 | 


--------------------------------------------------------------------------------
/tests/test_medicare_df_with_data.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | import pandas as pd
  3 | import medicare_utils as med
  4 | 
  5 | 
  6 | class TestGetCohortGetVarsToload(object):
  7 |     @pytest.fixture
  8 |     def init(self):
  9 |         return {
 10 |             'gender': None,
 11 |             'ages': None,
 12 |             'races': None,
 13 |             'race_col': 'race',
 14 |             'buyin_val': None,
 15 |             'hmo_val': None,
 16 |             'keep_vars': []}
 17 | 
 18 |     @pytest.fixture
 19 |     def mdf(self, year, percent):
 20 |         return med.MedicareDF(percent, year)
 21 | 
 22 |     @pytest.fixture(params=['0001', '01', '05', '20', '100'])
 23 |     def percent(self, request):
 24 |         return request.param
 25 | 
 26 |     @pytest.fixture(params=[2005, 2012])
 27 |     def year(self, request):
 28 |         return request.param
 29 | 
 30 |     def add_ehic(self, x, year):
 31 |         if year >= 2006:
 32 |             return x
 33 |         else:
 34 |             x.append('ehic')
 35 |             return x
 36 | 
 37 |     def assert_exp(self, mdf, init, exp, year):
 38 |         res = mdf._get_cohort_get_vars_toload(**init)
 39 |         exp = TestGetCohortGetVarsToload().add_ehic(exp, year)
 40 |         assert set(res[year]) == set(exp)
 41 | 
 42 |     # Only need to adjust these inputs
 43 |     @pytest.mark.parametrize(
 44 |     'inputs,extra_vars',
 45 |     [
 46 |     ({'gender': '1'}, ['sex']),
 47 |     ({'ages': range(70, 80)}, ['age']),
 48 |     ({'races': ['1'], 'race_col': 'race'}, ['race']),
 49 |     ({'races': ['1'], 'race_col': 'rti_race_cd'}, ['rti_race_cd']),
 50 |     ({'buyin_val': ['1', '2']}, ['buyin01', 'buyin02', 'buyin03', 'buyin04', 'buyin05', 'buyin06', 'buyin07', 'buyin08', 'buyin09', 'buyin10', 'buyin11', 'buyin12']),
 51 |     ({'hmo_val': ['1', '2']}, ['hmoind01', 'hmoind02', 'hmoind03', 'hmoind04', 'hmoind05', 'hmoind06', 'hmoind07', 'hmoind08', 'hmoind09', 'hmoind10', 'hmoind11', 'hmoind12']),
 52 |     ]) # yapf: disable
 53 |     def test_gender(self, year, mdf, init, inputs, extra_vars):
 54 |         for key, val in inputs.items():
 55 |             init[key] = val
 56 |         exp = ['bene_id']
 57 |         exp.extend(extra_vars)
 58 |         TestGetCohortGetVarsToload().assert_exp(mdf, init, exp, year)
 59 | 
 60 | 
 61 | class TestGetCohortExtractEachYear(object):
 62 |     """Tests for a single year of cohort extraction
 63 |     """
 64 | 
 65 |     @pytest.fixture
 66 |     def init(self):
 67 |         return {
 68 |             'gender': None,
 69 |             'ages': None,
 70 |             'races': None,
 71 |             'rti_race': False,
 72 |             'buyin_val': None,
 73 |             'hmo_val': None,
 74 |             'join': 'outer',
 75 |             'keep_vars': [],
 76 |             'dask': False,
 77 |             'verbose': False}
 78 | 
 79 |     @pytest.fixture
 80 |     def full_df(self):
 81 |         path = med.fpath(percent='0001', year=2012, data_type='bsfab')
 82 |         cols = [
 83 |             'bene_id', 'age', 'sex', 'race', 'rti_race_cd', 'buyin01',
 84 |             'buyin02', 'buyin03', 'buyin04', 'buyin05', 'buyin06', 'buyin07',
 85 |             'buyin08', 'buyin09', 'buyin10', 'buyin11', 'buyin12', 'hmoind01',
 86 |             'hmoind02', 'hmoind03', 'hmoind04', 'hmoind05', 'hmoind06',
 87 |             'hmoind07', 'hmoind08', 'hmoind09', 'hmoind10', 'hmoind11',
 88 |             'hmoind12']
 89 |         full_df = pd.read_parquet(path, columns=cols)
 90 |         print('Finished reading 0.01% bsfab data in 2012')
 91 |         return full_df
 92 | 
 93 |     # gender = 'm'
 94 |     # ages = None
 95 |     # races = None
 96 |     # rti_race = False
 97 |     # buyin_val = None
 98 |     # hmo_val = None
 99 |     # join = 'outer'
100 |     # keep_vars = []
101 |     # dask = False
102 |     # verbose = False
103 | 
104 |     def setup_df(
105 |             self,
106 |             gender,
107 |             ages,
108 |             races,
109 |             rti_race,
110 |             buyin_val,
111 |             hmo_val,
112 |             join,
113 |             keep_vars,
114 |             dask,
115 |             verbose,
116 |             year_type='calendar',
117 |             pct='0001',
118 |             year=2012):
119 |         """Set up to run _get_cohort_extract_each_year()
120 | 
121 |         Replicate get_cohort methods up to call of _get_cohort_extract_each_year
122 |         """
123 | 
124 |         mdf = med.MedicareDF(pct, year, year_type=year_type)
125 |         objs = mdf._get_cohort_type_check(
126 |             gender=gender,
127 |             ages=ages,
128 |             races=races,
129 |             rti_race=rti_race,
130 |             buyin_val=buyin_val,
131 |             hmo_val=hmo_val,
132 |             join=join,
133 |             keep_vars=keep_vars,
134 |             dask=dask,
135 |             verbose=verbose)
136 |         gender = objs.gender
137 |         ages = objs.ages
138 |         races = objs.races
139 |         rti_race = objs.rti_race
140 |         race_col = objs.race_col
141 |         buyin_val = objs.buyin_val
142 |         hmo_val = objs.hmo_val
143 |         join = objs.join
144 |         keep_vars = objs.keep_vars
145 |         dask = objs.dask
146 |         verbose = objs.verbose
147 | 
148 |         toload_vars = mdf._get_cohort_get_vars_toload(
149 |             gender, ages, races, race_col, buyin_val, hmo_val, keep_vars)
150 | 
151 |         return mdf, {
152 |             'year': year,
153 |             'toload_vars': toload_vars[year],
154 |             'nobs_dropped': {
155 |                 year: {}},
156 |             'gender': gender,
157 |             'ages': ages,
158 |             'races': races,
159 |             'race_col': race_col,
160 |             'buyin_val': buyin_val,
161 |             'hmo_val': hmo_val,
162 |             'join': join,
163 |             'keep_vars': keep_vars,
164 |             'dask': dask,
165 |             'verbose': verbose}
166 | 
167 |     @pytest.mark.parametrize(
168 |         'attrs,values,exp_vars,exp_isin_vals',
169 |         [
170 |         (
171 |             ['gender'],
172 |             ['m'],
173 |             ['sex'],
174 |             [['1']]
175 |         ),
176 |         (
177 |             ['gender'],
178 |             ['f'],
179 |             ['sex'],
180 |             [['2']]
181 |         ),
182 |         (
183 |             ['gender'],
184 |             [None],
185 |             ['sex'],
186 |             [['0', '1', '2']]
187 |         ),
188 |         (
189 |             ['ages'],
190 |             [range(75, 85)],
191 |             ['age'],
192 |             [range(75, 85)]
193 |         ),
194 |         (
195 |             ['ages'],
196 |             [[75, 76, 77, 78, 79, 80, 81, 82, 83, 84]],
197 |             ['age'],
198 |             [range(75, 85)]
199 |         ),
200 |         (
201 |             ['races', 'rti_race'],
202 |             ['white', False],
203 |             ['race'],
204 |             [['1']]
205 |         ),
206 |         (
207 |             ['races', 'rti_race'],
208 |             ['black', False],
209 |             ['race'],
210 |             [['2']]
211 |         ),
212 |         (
213 |             ['races', 'rti_race'],
214 |             ['asian', False],
215 |             ['race'],
216 |             [['4']]
217 |         ),
218 |         (
219 |             ['races', 'rti_race'],
220 |             [['white', 'black', 'asian'], False],
221 |             ['race'],
222 |             [['1', '2', '4']]
223 |         ),
224 |         (
225 |             ['races', 'rti_race'],
226 |             ['white', True],
227 |             ['rti_race_cd'],
228 |             [['1']]
229 |         ),
230 |         (
231 |             ['races', 'rti_race'],
232 |             ['black', True],
233 |             ['rti_race_cd'],
234 |             [['2']]
235 |         ),
236 |         (
237 |             ['races', 'rti_race'],
238 |             ['asian', True],
239 |             ['rti_race_cd'],
240 |             [['4']]
241 |         ),
242 |         (
243 |             ['races', 'rti_race'],
244 |             [['white', 'black', 'asian'], True],
245 |             ['rti_race_cd'],
246 |             [['1', '2', '4']]
247 |         ),
248 |         (
249 |             ['buyin_val'],
250 |             ['1'],
251 |             ['buyin'],
252 |             [['1']]
253 |         ),
254 |         (
255 |             ['buyin_val'],
256 |             [['1', '2', '3']],
257 |             ['buyin'],
258 |             [['1', '2', '3']]
259 |         ),
260 |         (
261 |             ['buyin_val'],
262 |             [['2', '3', 'B', 'C']],
263 |             ['buyin'],
264 |             [['2', '3', 'B', 'C']]
265 |         ),
266 |         (
267 |             ['buyin_val'],
268 |             [['3', 'C']],
269 |             ['buyin'],
270 |             [['3', 'C']]
271 |         ),
272 |         (
273 |             ['hmo_val'],
274 |             ['1'],
275 |             ['hmoind'],
276 |             [['1']]
277 |         ),
278 |         (
279 |             ['hmo_val'],
280 |             [['1', '2', '3']],
281 |             ['hmoind'],
282 |             [['1', '2', '3']]
283 |         ),
284 |         (
285 |             ['hmo_val'],
286 |             [['2', '3', 'B', 'C']],
287 |             ['hmoind'],
288 |             [['2', '3', 'B', 'C']]
289 |         ),
290 |         (
291 |             ['hmo_val'],
292 |             [['3', 'C']],
293 |             ['hmoind'],
294 |             [['3', 'C']]
295 |         ),
296 |         (
297 |             ['gender', 'ages', 'races', 'rti_race', 'buyin_val'],
298 |             ['m', range(67, 74), ['black', 'asian'], False, ['3', 'C']],
299 |             ['sex', 'age', 'race', 'buyin'],
300 |             [['1'], range(67, 74), ['2', '4'], ['3', 'C']]
301 |         ),
302 |         (
303 |             ['gender', 'ages', 'races', 'rti_race', 'buyin_val'],
304 |             ['f', range(67, 85), ['white', 'hispanic'], True, ['3', 'C']],
305 |             ['sex', 'age', 'rti_race_cd', 'buyin'],
306 |             [['2'], range(67, 85), ['1', '5'], ['3', 'C']]
307 |         ),
308 |         ]) # yapf: disable
309 |     def test_df_is_expected(
310 |             self, init, full_df, attrs, values, exp_vars, exp_isin_vals):
311 |         for attr, value in zip(attrs, values):
312 |             init[attr] = value
313 | 
314 |         mdf, attrs = TestGetCohortExtractEachYear().setup_df(**init)
315 |         pl, nobs_dropped = mdf._get_cohort_extract_each_year(**attrs)
316 |         pl = pl.index
317 | 
318 |         query = []
319 |         for exp_var, exp_isin_val in zip(exp_vars, exp_isin_vals):
320 |             if isinstance(exp_isin_val, range):
321 |                 exp_isin_val = list(exp_isin_val)
322 |             if exp_var in ['buyin', 'hmoind']:
323 |                 for i in range(1, 13):
324 |                     j = str(i).zfill(2)
325 |                     query.append(f'{exp_var}{j}.isin({exp_isin_val})')
326 |             else:
327 |                 query.append(f'{exp_var}.isin({exp_isin_val})')
328 | 
329 |         query = ' & '.join(query)
330 |         expected = full_df.query(query)['bene_id']
331 | 
332 |         expected = pd.Index(expected.sort_values())
333 |         pl = pd.Index(pl.sort_values())
334 | 
335 |         assert expected.equals(pl)
336 | 


--------------------------------------------------------------------------------
/medicare_utils/metadata/xw/snfr.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "bene_id": {
  3 |         "2006": {
  4 |             "format": "%15s",
  5 |             "name": "bene_id",
  6 |             "type": "str15"
  7 |         },
  8 |         "2007": {
  9 |             "format": "%15s",
 10 |             "name": "bene_id",
 11 |             "type": "str15"
 12 |         },
 13 |         "2008": {
 14 |             "format": "%15s",
 15 |             "name": "bene_id",
 16 |             "type": "str15"
 17 |         },
 18 |         "2009": {
 19 |             "format": "%15s",
 20 |             "name": "bene_id",
 21 |             "type": "str15"
 22 |         },
 23 |         "2010": {
 24 |             "format": "%15s",
 25 |             "name": "bene_id",
 26 |             "type": "str15"
 27 |         },
 28 |         "2011": {
 29 |             "format": "%15s",
 30 |             "name": "bene_id",
 31 |             "type": "str15"
 32 |         },
 33 |         "2012": {
 34 |             "format": "%15s",
 35 |             "name": "bene_id",
 36 |             "type": "str15"
 37 |         },
 38 |         "2013": {
 39 |             "format": "%15s",
 40 |             "name": "bene_id",
 41 |             "type": "str15"
 42 |         },
 43 |         "desc": "encrypted 723 beneficiary id"
 44 |     },
 45 |     "claimindex": {
 46 |         "2002": {
 47 |             "format": "%12.0g",
 48 |             "name": "claimindex",
 49 |             "type": "long"
 50 |         },
 51 |         "2003": {
 52 |             "format": "%12.0g",
 53 |             "name": "claimindex",
 54 |             "type": "long"
 55 |         },
 56 |         "2004": {
 57 |             "format": "%12.0g",
 58 |             "name": "claimindex",
 59 |             "type": "long"
 60 |         },
 61 |         "2005": {
 62 |             "format": "%12.0g",
 63 |             "name": "claimindex",
 64 |             "type": "long"
 65 |         },
 66 |         "desc": "claimindex"
 67 |     },
 68 |     "clm_id": {
 69 |         "2006": {
 70 |             "format": "%15s",
 71 |             "name": "clm_id",
 72 |             "type": "str15"
 73 |         },
 74 |         "2007": {
 75 |             "format": "%15s",
 76 |             "name": "clm_id",
 77 |             "type": "str15"
 78 |         },
 79 |         "2008": {
 80 |             "format": "%15s",
 81 |             "name": "clm_id",
 82 |             "type": "str15"
 83 |         },
 84 |         "2009": {
 85 |             "format": "%15s",
 86 |             "name": "clm_id",
 87 |             "type": "str15"
 88 |         },
 89 |         "2010": {
 90 |             "format": "%15s",
 91 |             "name": "clm_id",
 92 |             "type": "str15"
 93 |         },
 94 |         "2011": {
 95 |             "format": "%15s",
 96 |             "name": "clm_id",
 97 |             "type": "str15"
 98 |         },
 99 |         "2012": {
100 |             "format": "%15s",
101 |             "name": "clm_id",
102 |             "type": "str15"
103 |         },
104 |         "2013": {
105 |             "format": "%15s",
106 |             "name": "clm_id",
107 |             "type": "str15"
108 |         },
109 |         "desc": "encrypted claim id"
110 |     },
111 |     "clm_ln": {
112 |         "2002": {
113 |             "format": "%8.0g",
114 |             "name": "cntrindex",
115 |             "type": "byte"
116 |         },
117 |         "2003": {
118 |             "format": "%8.0g",
119 |             "name": "cntrindex",
120 |             "type": "byte"
121 |         },
122 |         "2004": {
123 |             "format": "%8.0g",
124 |             "name": "cntrindex",
125 |             "type": "byte"
126 |         },
127 |         "2005": {
128 |             "format": "%8.0g",
129 |             "name": "cntrindex",
130 |             "type": "byte"
131 |         },
132 |         "2006": {
133 |             "format": "%8.0g",
134 |             "name": "clm_ln",
135 |             "type": "byte"
136 |         },
137 |         "2007": {
138 |             "format": "%8.0g",
139 |             "name": "clm_ln",
140 |             "type": "byte"
141 |         },
142 |         "2008": {
143 |             "format": "%8.0g",
144 |             "name": "clm_ln",
145 |             "type": "byte"
146 |         },
147 |         "2009": {
148 |             "format": "%8.0g",
149 |             "name": "clm_ln",
150 |             "type": "int"
151 |         },
152 |         "2010": {
153 |             "format": "%8.0g",
154 |             "name": "clm_ln",
155 |             "type": "byte"
156 |         },
157 |         "2011": {
158 |             "format": "%8.0g",
159 |             "name": "clm_ln",
160 |             "type": "int"
161 |         },
162 |         "2012": {
163 |             "format": "%8.0g",
164 |             "name": "clm_ln",
165 |             "type": "byte"
166 |         },
167 |         "2013": {
168 |             "format": "%8.0g",
169 |             "name": "clm_ln",
170 |             "type": "byte"
171 |         },
172 |         "desc": "claim line number"
173 |     },
174 |     "clm_type": {
175 |         "2006": {
176 |             "format": "%2s",
177 |             "name": "clm_type",
178 |             "type": "str2"
179 |         },
180 |         "2007": {
181 |             "format": "%2s",
182 |             "name": "clm_type",
183 |             "type": "str2"
184 |         },
185 |         "2008": {
186 |             "format": "%2s",
187 |             "name": "clm_type",
188 |             "type": "str2"
189 |         },
190 |         "2009": {
191 |             "format": "%2s",
192 |             "name": "clm_type",
193 |             "type": "str2"
194 |         },
195 |         "2010": {
196 |             "format": "%2s",
197 |             "name": "clm_type",
198 |             "type": "str2"
199 |         },
200 |         "2011": {
201 |             "format": "%2s",
202 |             "name": "clm_type",
203 |             "type": "str2"
204 |         },
205 |         "2012": {
206 |             "format": "%2s",
207 |             "name": "clm_type",
208 |             "type": "str2"
209 |         },
210 |         "2013": {
211 |             "format": "%2s",
212 |             "name": "clm_type",
213 |             "type": "str2"
214 |         },
215 |         "desc": "nch claim type code"
216 |     },
217 |     "ehic": {
218 |         "2002": {
219 |             "format": "%11s",
220 |             "name": "ehic",
221 |             "type": "str11"
222 |         },
223 |         "2003": {
224 |             "format": "%11s",
225 |             "name": "ehic",
226 |             "type": "str11"
227 |         },
228 |         "2004": {
229 |             "format": "%11s",
230 |             "name": "ehic",
231 |             "type": "str11"
232 |         },
233 |         "2005": {
234 |             "format": "%11s",
235 |             "name": "ehic",
236 |             "type": "str11"
237 |         },
238 |         "desc": ""
239 |     },
240 |     "hcpcs_cd": {
241 |         "2002": {
242 |             "format": "%5s",
243 |             "name": "hcpcs_cd",
244 |             "type": "str5"
245 |         },
246 |         "2003": {
247 |             "format": "%5s",
248 |             "name": "hcpcs_cd",
249 |             "type": "str5"
250 |         },
251 |         "2004": {
252 |             "format": "%5s",
253 |             "name": "hcpcs_cd",
254 |             "type": "str5"
255 |         },
256 |         "2005": {
257 |             "format": "%5s",
258 |             "name": "hcpcs_cd",
259 |             "type": "str5"
260 |         },
261 |         "2006": {
262 |             "format": "%5s",
263 |             "name": "hcpcs_cd",
264 |             "type": "str5"
265 |         },
266 |         "2007": {
267 |             "format": "%5s",
268 |             "name": "hcpcs_cd",
269 |             "type": "str5"
270 |         },
271 |         "2008": {
272 |             "format": "%5s",
273 |             "name": "hcpcs_cd",
274 |             "type": "str5"
275 |         },
276 |         "2009": {
277 |             "format": "%5s",
278 |             "name": "hcpcs_cd",
279 |             "type": "str5"
280 |         },
281 |         "2010": {
282 |             "format": "%5s",
283 |             "name": "hcpcs_cd",
284 |             "type": "str5"
285 |         },
286 |         "2011": {
287 |             "format": "%5s",
288 |             "name": "hcpcs_cd",
289 |             "type": "str5"
290 |         },
291 |         "2012": {
292 |             "format": "%5s",
293 |             "name": "hcpcs_cd",
294 |             "type": "str5"
295 |         },
296 |         "2013": {
297 |             "format": "%5s",
298 |             "name": "hcpcs_cd",
299 |             "type": "str5"
300 |         },
301 |         "desc": "revenue center healthcare common procedure coding system"
302 |     },
303 |     "rev_chrg": {
304 |         "2002": {
305 |             "format": "%9.0g",
306 |             "name": "rev_chrg",
307 |             "type": "float"
308 |         },
309 |         "2003": {
310 |             "format": "%10.0g",
311 |             "name": "rev_chrg",
312 |             "type": "double"
313 |         },
314 |         "2004": {
315 |             "format": "%9.0g",
316 |             "name": "rev_chrg",
317 |             "type": "float"
318 |         },
319 |         "2005": {
320 |             "format": "%9.0g",
321 |             "name": "rev_chrg",
322 |             "type": "float"
323 |         },
324 |         "2006": {
325 |             "format": "%9.0g",
326 |             "name": "rev_chrg",
327 |             "type": "float"
328 |         },
329 |         "2007": {
330 |             "format": "%9.0g",
331 |             "name": "rev_chrg",
332 |             "type": "float"
333 |         },
334 |         "2008": {
335 |             "format": "%9.0g",
336 |             "name": "rev_chrg",
337 |             "type": "float"
338 |         },
339 |         "2009": {
340 |             "format": "%10.0g",
341 |             "name": "rev_chrg",
342 |             "type": "double"
343 |         },
344 |         "2010": {
345 |             "format": "%12.0g",
346 |             "name": "rev_chrg",
347 |             "type": "double"
348 |         },
349 |         "2011": {
350 |             "format": "%10.0g",
351 |             "name": "rev_chrg",
352 |             "type": "double"
353 |         },
354 |         "2012": {
355 |             "format": "%12.0g",
356 |             "name": "rev_chrg",
357 |             "type": "double"
358 |         },
359 |         "2013": {
360 |             "format": "%12.0g",
361 |             "name": "rev_chrg",
362 |             "type": "double"
363 |         },
364 |         "desc": "revenue center total charge amount"
365 |     },
366 |     "rev_cntr": {
367 |         "2002": {
368 |             "format": "%4s",
369 |             "name": "rev_cntr",
370 |             "type": "str4"
371 |         },
372 |         "2003": {
373 |             "format": "%4s",
374 |             "name": "rev_cntr",
375 |             "type": "str4"
376 |         },
377 |         "2004": {
378 |             "format": "%4s",
379 |             "name": "rev_cntr",
380 |             "type": "str4"
381 |         },
382 |         "2005": {
383 |             "format": "%4s",
384 |             "name": "rev_cntr",
385 |             "type": "str4"
386 |         },
387 |         "2006": {
388 |             "format": "%4s",
389 |             "name": "rev_cntr",
390 |             "type": "str4"
391 |         },
392 |         "2007": {
393 |             "format": "%4s",
394 |             "name": "rev_cntr",
395 |             "type": "str4"
396 |         },
397 |         "2008": {
398 |             "format": "%4s",
399 |             "name": "rev_cntr",
400 |             "type": "str4"
401 |         },
402 |         "2009": {
403 |             "format": "%4s",
404 |             "name": "rev_cntr",
405 |             "type": "str4"
406 |         },
407 |         "2010": {
408 |             "format": "%4s",
409 |             "name": "rev_cntr",
410 |             "type": "str4"
411 |         },
412 |         "2011": {
413 |             "format": "%4s",
414 |             "name": "rev_cntr",
415 |             "type": "str4"
416 |         },
417 |         "2012": {
418 |             "format": "%4s",
419 |             "name": "rev_cntr",
420 |             "type": "str4"
421 |         },
422 |         "2013": {
423 |             "format": "%4s",
424 |             "name": "rev_cntr",
425 |             "type": "str4"
426 |         },
427 |         "desc": "revenue center code"
428 |     },
429 |     "rev_cntr_ndc_qty": {
430 |         "2010": {
431 |             "format": "%8.0g",
432 |             "name": "rev_cntr_ndc_qty",
433 |             "type": "byte"
434 |         },
435 |         "2011": {
436 |             "format": "%10.0g",
437 |             "name": "rev_cntr_ndc_qty",
438 |             "type": "double"
439 |         },
440 |         "2012": {
441 |             "format": "%8.0g",
442 |             "name": "rev_cntr_ndc_qty",
443 |             "type": "byte"
444 |         },
445 |         "2013": {
446 |             "format": "%8.0g",
447 |             "name": "rev_cntr_ndc_qty",
448 |             "type": "byte"
449 |         },
450 |         "desc": "revenue center ndc quantity"
451 |     },
452 |     "rev_cntr_ndc_qty_qlfr_cd": {
453 |         "2010": {
454 |             "format": "%1s",
455 |             "name": "rev_cntr_ndc_qty_qlfr_cd",
456 |             "type": "str1"
457 |         },
458 |         "2011": {
459 |             "format": "%2s",
460 |             "name": "rev_cntr_ndc_qty_qlfr_cd",
461 |             "type": "str2"
462 |         },
463 |         "2012": {
464 |             "format": "%1s",
465 |             "name": "rev_cntr_ndc_qty_qlfr_cd",
466 |             "type": "str1"
467 |         },
468 |         "2013": {
469 |             "format": "%1s",
470 |             "name": "rev_cntr_ndc_qty_qlfr_cd",
471 |             "type": "str1"
472 |         },
473 |         "desc": "revenue center ndc quantity qualifier code"
474 |     },
475 |     "rev_dt": {
476 |         "2002": {
477 |             "format": "%dD_m_Y",
478 |             "name": "srev_dt",
479 |             "type": "long"
480 |         },
481 |         "2003": {
482 |             "format": "%dD_m_Y",
483 |             "name": "srev_dt",
484 |             "type": "long"
485 |         },
486 |         "2004": {
487 |             "format": "%dD_m_Y",
488 |             "name": "srev_dt",
489 |             "type": "long"
490 |         },
491 |         "2005": {
492 |             "format": "%dD_m_Y",
493 |             "name": "srev_dt",
494 |             "type": "long"
495 |         },
496 |         "desc": "204. revenue center date (sas yyyymmdd)"
497 |     },
498 |     "rev_ncvr": {
499 |         "2002": {
500 |             "format": "%9.0g",
501 |             "name": "rev_ncvr",
502 |             "type": "float"
503 |         },
504 |         "2003": {
505 |             "format": "%9.0g",
506 |             "name": "rev_ncvr",
507 |             "type": "float"
508 |         },
509 |         "2004": {
510 |             "format": "%9.0g",
511 |             "name": "rev_ncvr",
512 |             "type": "float"
513 |         },
514 |         "2005": {
515 |             "format": "%9.0g",
516 |             "name": "rev_ncvr",
517 |             "type": "float"
518 |         },
519 |         "2006": {
520 |             "format": "%9.0g",
521 |             "name": "rev_ncvr",
522 |             "type": "float"
523 |         },
524 |         "2007": {
525 |             "format": "%9.0g",
526 |             "name": "rev_ncvr",
527 |             "type": "float"
528 |         },
529 |         "2008": {
530 |             "format": "%9.0g",
531 |             "name": "rev_ncvr",
532 |             "type": "float"
533 |         },
534 |         "2009": {
535 |             "format": "%9.0g",
536 |             "name": "rev_ncvr",
537 |             "type": "float"
538 |         },
539 |         "2010": {
540 |             "format": "%12.0g",
541 |             "name": "rev_ncvr",
542 |             "type": "double"
543 |         },
544 |         "2011": {
545 |             "format": "%10.0g",
546 |             "name": "rev_ncvr",
547 |             "type": "double"
548 |         },
549 |         "2012": {
550 |             "format": "%12.0g",
551 |             "name": "rev_ncvr",
552 |             "type": "double"
553 |         },
554 |         "2013": {
555 |             "format": "%12.0g",
556 |             "name": "rev_ncvr",
557 |             "type": "double"
558 |         },
559 |         "desc": "revenue center non-covered charge amount"
560 |     },
561 |     "rev_rate": {
562 |         "2002": {
563 |             "format": "%9.0g",
564 |             "name": "rev_rate",
565 |             "type": "float"
566 |         },
567 |         "2003": {
568 |             "format": "%9.0g",
569 |             "name": "rev_rate",
570 |             "type": "float"
571 |         },
572 |         "2004": {
573 |             "format": "%9.0g",
574 |             "name": "rev_rate",
575 |             "type": "float"
576 |         },
577 |         "2005": {
578 |             "format": "%9.0g",
579 |             "name": "rev_rate",
580 |             "type": "float"
581 |         },
582 |         "2006": {
583 |             "format": "%9.0g",
584 |             "name": "rev_rate",
585 |             "type": "float"
586 |         },
587 |         "2007": {
588 |             "format": "%9.0g",
589 |             "name": "rev_rate",
590 |             "type": "float"
591 |         },
592 |         "2008": {
593 |             "format": "%9.0g",
594 |             "name": "rev_rate",
595 |             "type": "float"
596 |         },
597 |         "2009": {
598 |             "format": "%9.0g",
599 |             "name": "rev_rate",
600 |             "type": "float"
601 |         },
602 |         "2010": {
603 |             "format": "%12.0g",
604 |             "name": "rev_rate",
605 |             "type": "double"
606 |         },
607 |         "2011": {
608 |             "format": "%10.0g",
609 |             "name": "rev_rate",
610 |             "type": "double"
611 |         },
612 |         "2012": {
613 |             "format": "%12.0g",
614 |             "name": "rev_rate",
615 |             "type": "double"
616 |         },
617 |         "2013": {
618 |             "format": "%12.0g",
619 |             "name": "rev_rate",
620 |             "type": "double"
621 |         },
622 |         "desc": "revenue center rate amount"
623 |     },
624 |     "rev_unit": {
625 |         "2002": {
626 |             "format": "%8.0g",
627 |             "name": "rev_unit",
628 |             "type": "int"
629 |         },
630 |         "2003": {
631 |             "format": "%8.0g",
632 |             "name": "rev_unit",
633 |             "type": "int"
634 |         },
635 |         "2004": {
636 |             "format": "%8.0g",
637 |             "name": "rev_unit",
638 |             "type": "int"
639 |         },
640 |         "2005": {
641 |             "format": "%8.0g",
642 |             "name": "rev_unit",
643 |             "type": "int"
644 |         },
645 |         "2006": {
646 |             "format": "%8.0g",
647 |             "name": "rev_unit",
648 |             "type": "int"
649 |         },
650 |         "2007": {
651 |             "format": "%8.0g",
652 |             "name": "rev_unit",
653 |             "type": "int"
654 |         },
655 |         "2008": {
656 |             "format": "%8.0g",
657 |             "name": "rev_unit",
658 |             "type": "int"
659 |         },
660 |         "2009": {
661 |             "format": "%8.0g",
662 |             "name": "rev_unit",
663 |             "type": "int"
664 |         },
665 |         "2010": {
666 |             "format": "%8.0g",
667 |             "name": "rev_unit",
668 |             "type": "int"
669 |         },
670 |         "2011": {
671 |             "format": "%12.0g",
672 |             "name": "rev_unit",
673 |             "type": "long"
674 |         },
675 |         "2012": {
676 |             "format": "%8.0g",
677 |             "name": "rev_unit",
678 |             "type": "int"
679 |         },
680 |         "2013": {
681 |             "format": "%8.0g",
682 |             "name": "rev_unit",
683 |             "type": "int"
684 |         },
685 |         "desc": "revenue center unit count"
686 |     },
687 |     "revdedcd": {
688 |         "2002": {
689 |             "format": "%1s",
690 |             "name": "revdedcd",
691 |             "type": "str1"
692 |         },
693 |         "2003": {
694 |             "format": "%1s",
695 |             "name": "revdedcd",
696 |             "type": "str1"
697 |         },
698 |         "2004": {
699 |             "format": "%1s",
700 |             "name": "revdedcd",
701 |             "type": "str1"
702 |         },
703 |         "2005": {
704 |             "format": "%1s",
705 |             "name": "revdedcd",
706 |             "type": "str1"
707 |         },
708 |         "2006": {
709 |             "format": "%1s",
710 |             "name": "revdedcd",
711 |             "type": "str1"
712 |         },
713 |         "2007": {
714 |             "format": "%1s",
715 |             "name": "revdedcd",
716 |             "type": "str1"
717 |         },
718 |         "2008": {
719 |             "format": "%1s",
720 |             "name": "revdedcd",
721 |             "type": "str1"
722 |         },
723 |         "2009": {
724 |             "format": "%1s",
725 |             "name": "revdedcd",
726 |             "type": "str1"
727 |         },
728 |         "2010": {
729 |             "format": "%1s",
730 |             "name": "revdedcd",
731 |             "type": "str1"
732 |         },
733 |         "2011": {
734 |             "format": "%1s",
735 |             "name": "revdedcd",
736 |             "type": "str1"
737 |         },
738 |         "2012": {
739 |             "format": "%1s",
740 |             "name": "revdedcd",
741 |             "type": "str1"
742 |         },
743 |         "2013": {
744 |             "format": "%1s",
745 |             "name": "revdedcd",
746 |             "type": "str1"
747 |         },
748 |         "desc": "revenue center deductible coinsurance code"
749 |     },
750 |     "rndrng_physn_npi": {
751 |         "2010": {
752 |             "format": "%1s",
753 |             "name": "rndrng_physn_npi",
754 |             "type": "str1"
755 |         },
756 |         "2011": {
757 |             "format": "%1s",
758 |             "name": "rndrng_physn_npi",
759 |             "type": "str1"
760 |         },
761 |         "2012": {
762 |             "format": "%1s",
763 |             "name": "rndrng_physn_npi",
764 |             "type": "str1"
765 |         },
766 |         "2013": {
767 |             "format": "%1s",
768 |             "name": "rndrng_physn_npi",
769 |             "type": "str1"
770 |         },
771 |         "desc": "revenue center rendering physician npi"
772 |     },
773 |     "rndrng_physn_upin": {
774 |         "2010": {
775 |             "format": "%1s",
776 |             "name": "rndrng_physn_upin",
777 |             "type": "str1"
778 |         },
779 |         "2011": {
780 |             "format": "%1s",
781 |             "name": "rndrng_physn_upin",
782 |             "type": "str1"
783 |         },
784 |         "2012": {
785 |             "format": "%1s",
786 |             "name": "rndrng_physn_upin",
787 |             "type": "str1"
788 |         },
789 |         "2013": {
790 |             "format": "%1s",
791 |             "name": "rndrng_physn_upin",
792 |             "type": "str1"
793 |         },
794 |         "desc": "revenue center rendering physician upin"
795 |     },
796 |     "thru_dt": {
797 |         "2006": {
798 |             "format": "%dD_m_Y",
799 |             "name": "thru_dt",
800 |             "type": "long"
801 |         },
802 |         "2007": {
803 |             "format": "%dD_m_Y",
804 |             "name": "thru_dt",
805 |             "type": "long"
806 |         },
807 |         "2008": {
808 |             "format": "%dD_m_Y",
809 |             "name": "thru_dt",
810 |             "type": "long"
811 |         },
812 |         "2009": {
813 |             "format": "%d",
814 |             "name": "thru_dt",
815 |             "type": "long"
816 |         },
817 |         "2010": {
818 |             "format": "%tdD_m_Y",
819 |             "name": "thru_dt",
820 |             "type": "long"
821 |         },
822 |         "2011": {
823 |             "format": "%tdD_m_Y",
824 |             "name": "thru_dt",
825 |             "type": "long"
826 |         },
827 |         "2012": {
828 |             "format": "%tdD_m_Y",
829 |             "name": "thru_dt",
830 |             "type": "long"
831 |         },
832 |         "2013": {
833 |             "format": "%tdD_m_Y",
834 |             "name": "thru_dt",
835 |             "type": "long"
836 |         },
837 |         "desc": "claim through date  (determines year of claim)"
838 |     }
839 | }


--------------------------------------------------------------------------------
/medicare_utils/metadata/codebook/bsfab.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "enrl_src": {
  3 |         "name": "Source of enrollment data",
  4 |         "values": {
  5 |             "EDB": "Enrollment Database",
  6 |             "CME": "Common Medicare Environment"
  7 |         }
  8 |     },
  9 |     "sample_group": {
 10 |         "name": "Medicare Sample Group Indicator",
 11 |         "values": {}
 12 |     },
 13 |     "efivepct": {
 14 |         "name": "Enhanced Medicare 5% Sample Indicator",
 15 |         "values": {
 16 |             "Y": "Yes, included in enhanced 5% sample",
 17 |             "NULL": "Not included in enhanced 5% sample"
 18 |         }
 19 |     },
 20 |     "crnt_bic": {
 21 |         "name": "Current Beneficiary Identification Code",
 22 |         "values": {}
 23 |     },
 24 |     "state_cd": {
 25 |         "name": "State code for beneficiary (SSA code)",
 26 |         "values": {
 27 |             "01": "Alabama",
 28 |             "02": "Alaska",
 29 |             "03": "Arizona",
 30 |             "04": "Arkansas",
 31 |             "05": "California",
 32 |             "06": "Colorado",
 33 |             "07": "Connecticut",
 34 |             "08": "Delaware",
 35 |             "09": "District of Columbia",
 36 |             "10": "Florida",
 37 |             "11": "Georgia",
 38 |             "12": "Hawaii",
 39 |             "13": "Idaho",
 40 |             "14": "Illinois",
 41 |             "15": "Indiana",
 42 |             "16": "Iowa",
 43 |             "17": "Kansas",
 44 |             "18": "Kentucky",
 45 |             "19": "Louisiana",
 46 |             "20": "Maine",
 47 |             "21": "Maryland",
 48 |             "22": "Massachusetts",
 49 |             "23": "Michigan",
 50 |             "24": "Minnesota",
 51 |             "25": "Mississippi",
 52 |             "26": "Missouri",
 53 |             "27": "Montana",
 54 |             "28": "Nebraska",
 55 |             "29": "Nevada",
 56 |             "30": "New Hampshire",
 57 |             "31": "New Jersey",
 58 |             "32": "New Mexico",
 59 |             "33": "New York",
 60 |             "34": "North Carolina",
 61 |             "35": "North Dakota",
 62 |             "36": "Ohio",
 63 |             "37": "Oklahoma",
 64 |             "38": "Oregon",
 65 |             "39": "Pennsylvania",
 66 |             "40": "Puerto Rico",
 67 |             "41": "Rhode Island",
 68 |             "42": "South Carolina",
 69 |             "43": "South Dakota",
 70 |             "44": "Tennessee",
 71 |             "45": "Texas",
 72 |             "46": "Utah",
 73 |             "47": "Vermont",
 74 |             "48": "Virgin Islands",
 75 |             "49": "Virginia",
 76 |             "50": "Washington",
 77 |             "51": "West Virginia",
 78 |             "52": "Wisconsin",
 79 |             "53": "Wyoming",
 80 |             "54": "Africa",
 81 |             "55": "California",
 82 |             "56": "Canada & Islands",
 83 |             "57": "Central America and West Indies",
 84 |             "58": "Europe",
 85 |             "59": "Mexico",
 86 |             "60": "Oceania",
 87 |             "61": "Philippines",
 88 |             "62": "South America",
 89 |             "63": "U.S. Possessions",
 90 |             "64": "American Samoa",
 91 |             "65": "Guam",
 92 |             "66": "Commonwealth of the Northern Marianas Islands",
 93 |             "67": "Texas",
 94 |             "68": "Florida",
 95 |             "69": "Florida",
 96 |             "70": "Kansas",
 97 |             "71": "Louisiana",
 98 |             "72": "Ohio",
 99 |             "73": "Pennsylvania",
100 |             "74": "Texas",
101 |             "80": "Maryland",
102 |             "97": "Northern Marianas",
103 |             "98": "Guam",
104 |             "99": "With 000 county code is American Samoa; otherwise unknown"
105 |         }
106 |     },
107 |     "v_dod_sw": {
108 |         "name": "Valid Date of Death Switch",
109 |         "values": {
110 |             "Null": "Default",
111 |             "V": "Valid death date"
112 |         }
113 |     },
114 |     "sex": {
115 |         "name": "Sex",
116 |         "values": {
117 |             "0": "Unknown",
118 |             "1": "Male",
119 |             "2": "Female"
120 |         }
121 |     },
122 |     "race": {
123 |         "name": "Beneficiary Race Code",
124 |         "values": {
125 |             "0": "Unknown",
126 |             "1": "White",
127 |             "2": "Black",
128 |             "3": "Other",
129 |             "4": "Asian",
130 |             "5": "Hispanic",
131 |             "6": "North American Native"
132 |         }
133 |     },
134 |     "rti_race_cd": {
135 |         "name": "Research Triangle Institute (RTI) Race Code",
136 |         "values": {
137 |             "0": "Unknown",
138 |             "1": "Non-Hispanic White",
139 |             "2": "Black (or African-American)",
140 |             "3": "Other",
141 |             "4": "Asian Pacific Islander",
142 |             "5": "Hispanic",
143 |             "6": "American Indian Alaska Native"
144 |         }
145 |     },
146 |     "orec": {
147 |         "name": "Original Reason for Entitlement Code",
148 |         "values": {
149 |             "0": "Old Age and Survivors Insurance (OASI)",
150 |             "1": "Disability Insurance Benefits (DIB)",
151 |             "2": "End-stage Renal Disease (ESRD)",
152 |             "3": "Both DIB and ESRD"
153 |         }
154 |     },
155 |     "crec": {
156 |         "name": "Current Reason for Entitlement Code",
157 |         "values": {
158 |             "0": "Old Age and Survivors Insurance (OASI)",
159 |             "1": "Disability Insurance Benefits (DIB)",
160 |             "2": "End-stage Renal Disease (ESRD)",
161 |             "3": "Both DIB and ESRD"
162 |         }
163 |     },
164 |     "esrd_ind": {
165 |         "name": "End-stage Renal Disease (ESRD) Indicator",
166 |         "values": {
167 |             "Y": "The beneficiary has ESRD",
168 |             "0": "The beneficiary does not have ESRD"
169 |         }
170 |     },
171 |     "mdcr_stus_cd_01": {
172 |         "name": "Medicare Status Code - January",
173 |         "values": {
174 |             "10": "Aged without ESRD",
175 |             "11": "Aged with ESRD",
176 |             "20": "Disabled without ESRD",
177 |             "21": "Disabled with ESRD",
178 |             "31": "ESRD only"
179 |         }
180 |     },
181 |     "mdcr_stus_cd_02": {
182 |         "name": "Medicare Status Code - February",
183 |         "values": {
184 |             "10": "Aged without ESRD",
185 |             "11": "Aged with ESRD",
186 |             "20": "Disabled without ESRD",
187 |             "21": "Disabled with ESRD",
188 |             "31": "ESRD only"
189 |         }
190 |     },
191 |     "mdcr_stus_cd_03": {
192 |         "name": "Medicare Status Code - March",
193 |         "values": {
194 |             "10": "Aged without ESRD",
195 |             "11": "Aged with ESRD",
196 |             "20": "Disabled without ESRD",
197 |             "21": "Disabled with ESRD",
198 |             "31": "ESRD only"
199 |         }
200 |     },
201 |     "mdcr_stus_cd_04": {
202 |         "name": "Medicare Status Code - April",
203 |         "values": {
204 |             "10": "Aged without ESRD",
205 |             "11": "Aged with ESRD",
206 |             "20": "Disabled without ESRD",
207 |             "21": "Disabled with ESRD",
208 |             "31": "ESRD only"
209 |         }
210 |     },
211 |     "mdcr_stus_cd_05": {
212 |         "name": "Medicare Status Code - May",
213 |         "values": {
214 |             "10": "Aged without ESRD",
215 |             "11": "Aged with ESRD",
216 |             "20": "Disabled without ESRD",
217 |             "21": "Disabled with ESRD",
218 |             "31": "ESRD only"
219 |         }
220 |     },
221 |     "mdcr_stus_cd_06": {
222 |         "name": "Medicare Status Code - June",
223 |         "values": {
224 |             "10": "Aged without ESRD",
225 |             "11": "Aged with ESRD",
226 |             "20": "Disabled without ESRD",
227 |             "21": "Disabled with ESRD",
228 |             "31": "ESRD only"
229 |         }
230 |     },
231 |     "mdcr_stus_cd_07": {
232 |         "name": "Medicare Status Code - July",
233 |         "values": {
234 |             "10": "Aged without ESRD",
235 |             "11": "Aged with ESRD",
236 |             "20": "Disabled without ESRD",
237 |             "21": "Disabled with ESRD",
238 |             "31": "ESRD only"
239 |         }
240 |     },
241 |     "mdcr_stus_cd_08": {
242 |         "name": "Medicare Status Code - August",
243 |         "values": {
244 |             "10": "Aged without ESRD",
245 |             "11": "Aged with ESRD",
246 |             "20": "Disabled without ESRD",
247 |             "21": "Disabled with ESRD",
248 |             "31": "ESRD only"
249 |         }
250 |     },
251 |     "mdcr_stus_cd_09": {
252 |         "name": "Medicare Status Code - September",
253 |         "values": {
254 |             "10": "Aged without ESRD",
255 |             "11": "Aged with ESRD",
256 |             "20": "Disabled without ESRD",
257 |             "21": "Disabled with ESRD",
258 |             "31": "ESRD only"
259 |         }
260 |     },
261 |     "mdcr_stus_cd_10": {
262 |         "name": "Medicare Status Code - October",
263 |         "values": {
264 |             "10": "Aged without ESRD",
265 |             "11": "Aged with ESRD",
266 |             "20": "Disabled without ESRD",
267 |             "21": "Disabled with ESRD",
268 |             "31": "ESRD only"
269 |         }
270 |     },
271 |     "mdcr_stus_cd_11": {
272 |         "name": "Medicare Status Code - November",
273 |         "values": {
274 |             "10": "Aged without ESRD",
275 |             "11": "Aged with ESRD",
276 |             "20": "Disabled without ESRD",
277 |             "21": "Disabled with ESRD",
278 |             "31": "ESRD only"
279 |         }
280 |     },
281 |     "mdcr_stus_cd_12": {
282 |         "name": "Medicare Status Code - December",
283 |         "values": {
284 |             "10": "Aged without ESRD",
285 |             "11": "Aged with ESRD",
286 |             "20": "Disabled without ESRD",
287 |             "21": "Disabled with ESRD",
288 |             "31": "ESRD only"
289 |         }
290 |     },
291 |     "a_trm_cd": {
292 |         "name": "Part A Termination Code",
293 |         "values": {
294 |             "0": "Not terminated",
295 |             "1": "Dead",
296 |             "2": "Non-payment of premium",
297 |             "3": "Voluntary withdrawl",
298 |             "9": "Other termination"
299 |         }
300 |     },
301 |     "b_trm_cd": {
302 |         "name": "Part B Termination Code",
303 |         "values": {
304 |             "0": "Not terminated",
305 |             "1": "Dead",
306 |             "2": "Non-payment of premium",
307 |             "3": "Voluntary withdrawl",
308 |             "9": "Other termination"
309 |         }
310 |     },
311 |     "buyin01": {
312 |         "name": "Medicare Entitlement/Buy-In Indicator - January",
313 |         "values": {
314 |             "0": "Not entitled",
315 |             "1": "Part A only",
316 |             "2": "Part B only",
317 |             "3": "Part A and Part B",
318 |             "A": "Part A state buy-in",
319 |             "B": "Part B state buy-in",
320 |             "C": "Part A and Part B state buy-in"
321 |         }
322 |     },
323 |     "buyin02": {
324 |         "name": "Medicare Entitlement/Buy-In Indicator - February",
325 |         "values": {
326 |             "0": "Not entitled",
327 |             "1": "Part A only",
328 |             "2": "Part B only",
329 |             "3": "Part A and Part B",
330 |             "A": "Part A state buy-in",
331 |             "B": "Part B state buy-in",
332 |             "C": "Part A and Part B state buy-in"
333 |         }
334 |     },
335 |     "buyin03": {
336 |         "name": "Medicare Entitlement/Buy-In Indicator - March",
337 |         "values": {
338 |             "0": "Not entitled",
339 |             "1": "Part A only",
340 |             "2": "Part B only",
341 |             "3": "Part A and Part B",
342 |             "A": "Part A state buy-in",
343 |             "B": "Part B state buy-in",
344 |             "C": "Part A and Part B state buy-in"
345 |         }
346 |     },
347 |     "buyin04": {
348 |         "name": "Medicare Entitlement/Buy-In Indicator - April",
349 |         "values": {
350 |             "0": "Not entitled",
351 |             "1": "Part A only",
352 |             "2": "Part B only",
353 |             "3": "Part A and Part B",
354 |             "A": "Part A state buy-in",
355 |             "B": "Part B state buy-in",
356 |             "C": "Part A and Part B state buy-in"
357 |         }
358 |     },
359 |     "buyin05": {
360 |         "name": "Medicare Entitlement/Buy-In Indicator - May",
361 |         "values": {
362 |             "0": "Not entitled",
363 |             "1": "Part A only",
364 |             "2": "Part B only",
365 |             "3": "Part A and Part B",
366 |             "A": "Part A state buy-in",
367 |             "B": "Part B state buy-in",
368 |             "C": "Part A and Part B state buy-in"
369 |         }
370 |     },
371 |     "buyin06": {
372 |         "name": "Medicare Entitlement/Buy-In Indicator - June",
373 |         "values": {
374 |             "0": "Not entitled",
375 |             "1": "Part A only",
376 |             "2": "Part B only",
377 |             "3": "Part A and Part B",
378 |             "A": "Part A state buy-in",
379 |             "B": "Part B state buy-in",
380 |             "C": "Part A and Part B state buy-in"
381 |         }
382 |     },
383 |     "buyin07": {
384 |         "name": "Medicare Entitlement/Buy-In Indicator - July",
385 |         "values": {
386 |             "0": "Not entitled",
387 |             "1": "Part A only",
388 |             "2": "Part B only",
389 |             "3": "Part A and Part B",
390 |             "A": "Part A state buy-in",
391 |             "B": "Part B state buy-in",
392 |             "C": "Part A and Part B state buy-in"
393 |         }
394 |     },
395 |     "buyin08": {
396 |         "name": "Medicare Entitlement/Buy-In Indicator - August",
397 |         "values": {
398 |             "0": "Not entitled",
399 |             "1": "Part A only",
400 |             "2": "Part B only",
401 |             "3": "Part A and Part B",
402 |             "A": "Part A state buy-in",
403 |             "B": "Part B state buy-in",
404 |             "C": "Part A and Part B state buy-in"
405 |         }
406 |     },
407 |     "buyin09": {
408 |         "name": "Medicare Entitlement/Buy-In Indicator - September",
409 |         "values": {
410 |             "0": "Not entitled",
411 |             "1": "Part A only",
412 |             "2": "Part B only",
413 |             "3": "Part A and Part B",
414 |             "A": "Part A state buy-in",
415 |             "B": "Part B state buy-in",
416 |             "C": "Part A and Part B state buy-in"
417 |         }
418 |     },
419 |     "buyin10": {
420 |         "name": "Medicare Entitlement/Buy-In Indicator - October",
421 |         "values": {
422 |             "0": "Not entitled",
423 |             "1": "Part A only",
424 |             "2": "Part B only",
425 |             "3": "Part A and Part B",
426 |             "A": "Part A state buy-in",
427 |             "B": "Part B state buy-in",
428 |             "C": "Part A and Part B state buy-in"
429 |         }
430 |     },
431 |     "buyin11": {
432 |         "name": "Medicare Entitlement/Buy-In Indicator - November",
433 |         "values": {
434 |             "0": "Not entitled",
435 |             "1": "Part A only",
436 |             "2": "Part B only",
437 |             "3": "Part A and Part B",
438 |             "A": "Part A state buy-in",
439 |             "B": "Part B state buy-in",
440 |             "C": "Part A and Part B state buy-in"
441 |         }
442 |     },
443 |     "buyin12": {
444 |         "name": "Medicare Entitlement/Buy-In Indicator - December",
445 |         "values": {
446 |             "0": "Not entitled",
447 |             "1": "Part A only",
448 |             "2": "Part B only",
449 |             "3": "Part A and Part B",
450 |             "A": "Part A state buy-in",
451 |             "B": "Part B state buy-in",
452 |             "C": "Part A and Part B state buy-in"
453 |         }
454 |     },
455 |     "hmoind01": {
456 |         "name": "HMO Indicator - January",
457 |         "values": {
458 |             "0": "Not a member of an HMO",
459 |             "1": "Non-lock-in, CMS to process provider claims",
460 |             "2": "Non-lock-in, group health organization (GHO; MA plan) to process in plan Part A and in area Part B claims",
461 |             "4": "Fee-for-service participant in case or disease management demonstration project",
462 |             "5": "Not in documentation",
463 |             "A": "Lock-in, CMS to process provider claims",
464 |             "B": "Lock-in, GHO to process in plan Part A and in area Part B claims",
465 |             "C": "Lock-in, GHO to process all provider claims"
466 |         }
467 |     },
468 |     "hmoind02": {
469 |         "name": "HMO Indicator - February",
470 |         "values": {
471 |             "0": "Not a member of an HMO",
472 |             "1": "Non-lock-in, CMS to process provider claims",
473 |             "2": "Non-lock-in, group health organization (GHO; MA plan) to process in plan Part A and in area Part B claims",
474 |             "4": "Fee-for-service participant in case or disease management demonstration project",
475 |             "5": "Not in documentation",
476 |             "A": "Lock-in, CMS to process provider claims",
477 |             "B": "Lock-in, GHO to process in plan Part A and in area Part B claims",
478 |             "C": "Lock-in, GHO to process all provider claims"
479 |         }
480 |     },
481 |     "hmoind03": {
482 |         "name": "HMO Indicator - March",
483 |         "values": {
484 |             "0": "Not a member of an HMO",
485 |             "1": "Non-lock-in, CMS to process provider claims",
486 |             "2": "Non-lock-in, group health organization (GHO; MA plan) to process in plan Part A and in area Part B claims",
487 |             "4": "Fee-for-service participant in case or disease management demonstration project",
488 |             "5": "Not in documentation",
489 |             "A": "Lock-in, CMS to process provider claims",
490 |             "B": "Lock-in, GHO to process in plan Part A and in area Part B claims",
491 |             "C": "Lock-in, GHO to process all provider claims"
492 |         }
493 |     },
494 |     "hmoind04": {
495 |         "name": "HMO Indicator - April",
496 |         "values": {
497 |             "0": "Not a member of an HMO",
498 |             "1": "Non-lock-in, CMS to process provider claims",
499 |             "2": "Non-lock-in, group health organization (GHO; MA plan) to process in plan Part A and in area Part B claims",
500 |             "4": "Fee-for-service participant in case or disease management demonstration project",
501 |             "5": "Not in documentation",
502 |             "A": "Lock-in, CMS to process provider claims",
503 |             "B": "Lock-in, GHO to process in plan Part A and in area Part B claims",
504 |             "C": "Lock-in, GHO to process all provider claims"
505 |         }
506 |     },
507 |     "hmoind05": {
508 |         "name": "HMO Indicator - May",
509 |         "values": {
510 |             "0": "Not a member of an HMO",
511 |             "1": "Non-lock-in, CMS to process provider claims",
512 |             "2": "Non-lock-in, group health organization (GHO; MA plan) to process in plan Part A and in area Part B claims",
513 |             "4": "Fee-for-service participant in case or disease management demonstration project",
514 |             "5": "Not in documentation",
515 |             "A": "Lock-in, CMS to process provider claims",
516 |             "B": "Lock-in, GHO to process in plan Part A and in area Part B claims",
517 |             "C": "Lock-in, GHO to process all provider claims"
518 |         }
519 |     },
520 |     "hmoind06": {
521 |         "name": "HMO Indicator - June",
522 |         "values": {
523 |             "0": "Not a member of an HMO",
524 |             "1": "Non-lock-in, CMS to process provider claims",
525 |             "2": "Non-lock-in, group health organization (GHO; MA plan) to process in plan Part A and in area Part B claims",
526 |             "4": "Fee-for-service participant in case or disease management demonstration project",
527 |             "5": "Not in documentation",
528 |             "A": "Lock-in, CMS to process provider claims",
529 |             "B": "Lock-in, GHO to process in plan Part A and in area Part B claims",
530 |             "C": "Lock-in, GHO to process all provider claims"
531 |         }
532 |     },
533 |     "hmoind07": {
534 |         "name": "HMO Indicator - July",
535 |         "values": {
536 |             "0": "Not a member of an HMO",
537 |             "1": "Non-lock-in, CMS to process provider claims",
538 |             "2": "Non-lock-in, group health organization (GHO; MA plan) to process in plan Part A and in area Part B claims",
539 |             "4": "Fee-for-service participant in case or disease management demonstration project",
540 |             "5": "Not in documentation",
541 |             "A": "Lock-in, CMS to process provider claims",
542 |             "B": "Lock-in, GHO to process in plan Part A and in area Part B claims",
543 |             "C": "Lock-in, GHO to process all provider claims"
544 |         }
545 |     },
546 |     "hmoind08": {
547 |         "name": "HMO Indicator - August",
548 |         "values": {
549 |             "0": "Not a member of an HMO",
550 |             "1": "Non-lock-in, CMS to process provider claims",
551 |             "2": "Non-lock-in, group health organization (GHO; MA plan) to process in plan Part A and in area Part B claims",
552 |             "4": "Fee-for-service participant in case or disease management demonstration project",
553 |             "5": "Not in documentation",
554 |             "A": "Lock-in, CMS to process provider claims",
555 |             "B": "Lock-in, GHO to process in plan Part A and in area Part B claims",
556 |             "C": "Lock-in, GHO to process all provider claims"
557 |         }
558 |     },
559 |     "hmoind09": {
560 |         "name": "HMO Indicator - September",
561 |         "values": {
562 |             "0": "Not a member of an HMO",
563 |             "1": "Non-lock-in, CMS to process provider claims",
564 |             "2": "Non-lock-in, group health organization (GHO; MA plan) to process in plan Part A and in area Part B claims",
565 |             "4": "Fee-for-service participant in case or disease management demonstration project",
566 |             "5": "Not in documentation",
567 |             "A": "Lock-in, CMS to process provider claims",
568 |             "B": "Lock-in, GHO to process in plan Part A and in area Part B claims",
569 |             "C": "Lock-in, GHO to process all provider claims"
570 |         }
571 |     },
572 |     "hmoind10": {
573 |         "name": "HMO Indicator - October",
574 |         "values": {
575 |             "0": "Not a member of an HMO",
576 |             "1": "Non-lock-in, CMS to process provider claims",
577 |             "2": "Non-lock-in, group health organization (GHO; MA plan) to process in plan Part A and in area Part B claims",
578 |             "4": "Fee-for-service participant in case or disease management demonstration project",
579 |             "5": "Not in documentation",
580 |             "A": "Lock-in, CMS to process provider claims",
581 |             "B": "Lock-in, GHO to process in plan Part A and in area Part B claims",
582 |             "C": "Lock-in, GHO to process all provider claims"
583 |         }
584 |     },
585 |     "hmoind11": {
586 |         "name": "HMO Indicator - November",
587 |         "values": {
588 |             "0": "Not a member of an HMO",
589 |             "1": "Non-lock-in, CMS to process provider claims",
590 |             "2": "Non-lock-in, group health organization (GHO; MA plan) to process in plan Part A and in area Part B claims",
591 |             "4": "Fee-for-service participant in case or disease management demonstration project",
592 |             "5": "Not in documentation",
593 |             "A": "Lock-in, CMS to process provider claims",
594 |             "B": "Lock-in, GHO to process in plan Part A and in area Part B claims",
595 |             "C": "Lock-in, GHO to process all provider claims"
596 |         }
597 |     },
598 |     "hmoind12": {
599 |         "name": "HMO Indicator - December",
600 |         "values": {
601 |             "0": "Not a member of an HMO",
602 |             "1": "Non-lock-in, CMS to process provider claims",
603 |             "2": "Non-lock-in, group health organization (GHO; MA plan) to process in plan Part A and in area Part B claims",
604 |             "4": "Fee-for-service participant in case or disease management demonstration project",
605 |             "5": "Not in documentation",
606 |             "A": "Lock-in, CMS to process provider claims",
607 |             "B": "Lock-in, GHO to process in plan Part A and in area Part B claims",
608 |             "C": "Lock-in, GHO to process all provider claims"
609 |         }
610 |     }
611 | }


--------------------------------------------------------------------------------
/medicare_utils/metadata/codebook/med.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "clm_type": {
  3 |         "name": "MEDPAR NCH Claim Type Code",
  4 |         "values": {
  5 |             "10": "HHA claim",
  6 |             "20": "Non swing bed SNF claim",
  7 |             "30": "Swing bed SNF claim",
  8 |             "40": "Outpatient claim",
  9 |             "50": "Hospice claim",
 10 |             "60": "Inpatient claim",
 11 |             "61": "Inpatient 'Full-Encounter' claim",
 12 |             "62": "Medicare Advantage IME/GME claims",
 13 |             "63": "Medicare Advantage (no-pay) claims",
 14 |             "64": "Medicare Advantage (paid as FFS) claim",
 15 |             "71": "RIC O local carrier non-DMEPOS claim",
 16 |             "72": "RIC O local carrier DMEPOS claim",
 17 |             "81": "RIC M DMERC non-DMEPOS claim",
 18 |             "82": "RIC M DMERC DMEPOS claim"
 19 |         }
 20 |     },
 21 |     "sex": {
 22 |         "name": "MEDPAR Beneficiary Sex Code",
 23 |         "values": {
 24 |             "0": "Unknown",
 25 |             "2": "Female",
 26 |             "1": "Male"
 27 |         }
 28 |     },
 29 |     "race": {
 30 |         "name": "MEDPAR Beneficiary Race Code",
 31 |         "values": {
 32 |             "1": "White",
 33 |             "2": "Black",
 34 |             "3": "Other",
 35 |             "4": "Asian",
 36 |             "5": "Hispanic",
 37 |             "6": "North American Native",
 38 |             "0": "Unknown"
 39 |         }
 40 |     },
 41 |     "ms_cd": {
 42 |         "name": "MEDPAR Beneficiary Medicare Status Code",
 43 |         "values": {
 44 |             "10": "Aged without ESRD",
 45 |             "11": "Aged with ESRD",
 46 |             "20": "Disabled without ESRD",
 47 |             "21": "Disabled with ESRD",
 48 |             "31": "ESRD only"
 49 |         }
 50 |     },
 51 |     "state_cd": {
 52 |         "name": "MEDPAR Beneficiary Residence SSA Standard State Code",
 53 |         "values": {
 54 |             "01": "Alabama",
 55 |             "02": "Alaska",
 56 |             "03": "Arizona",
 57 |             "04": "Arkansas",
 58 |             "05": "California",
 59 |             "06": "Colorado",
 60 |             "07": "Connecticut",
 61 |             "08": "Delaware",
 62 |             "09": "District of Columbia",
 63 |             "10": "Florida",
 64 |             "11": "Georgia",
 65 |             "12": "Hawaii",
 66 |             "13": "Idaho",
 67 |             "14": "Illinois",
 68 |             "15": "Indiana",
 69 |             "16": "Iowa",
 70 |             "17": "Kansas",
 71 |             "18": "Kentucky",
 72 |             "19": "Louisiana",
 73 |             "20": "Maine",
 74 |             "21": "Maryland",
 75 |             "22": "Massachusetts",
 76 |             "23": "Michigan",
 77 |             "24": "Minnesota",
 78 |             "25": "Mississippi",
 79 |             "26": "Missouri",
 80 |             "27": "Montana",
 81 |             "28": "Nebraska",
 82 |             "29": "Nevada",
 83 |             "30": "New Hampshire",
 84 |             "31": "New Jersey",
 85 |             "32": "New Mexico",
 86 |             "33": "New York",
 87 |             "34": "North Carolina",
 88 |             "35": "North Dakota",
 89 |             "36": "Ohio",
 90 |             "37": "Oklahoma",
 91 |             "38": "Oregon",
 92 |             "39": "Pennsylvania",
 93 |             "40": "Puerto Rico",
 94 |             "41": "Rhode Island",
 95 |             "42": "South Carolina",
 96 |             "43": "South Dakota",
 97 |             "44": "Tennesee",
 98 |             "45": "Texas",
 99 |             "46": "Utah",
100 |             "47": "Vermont",
101 |             "48": "Virgin Islands",
102 |             "49": "Virginia",
103 |             "50": "Washington",
104 |             "51": "West Virginia",
105 |             "52": "Wisconsin",
106 |             "53": "Wyoming",
107 |             "54": "Africa",
108 |             "55": "Asia",
109 |             "56": "Canada",
110 |             "57": "Central America & West Indies",
111 |             "58": "Europe",
112 |             "59": "Mexico",
113 |             "60": "Oceania",
114 |             "61": "Philippines",
115 |             "62": "South America",
116 |             "63": "U.S. Possessions",
117 |             "97": "Saipan - MP",
118 |             "98": "Guam",
119 |             "99": "American Samoa"
120 |         }
121 |     },
122 |     "admsnday": {
123 |         "name": "MEDPAR Admission Day Code",
124 |         "values": {
125 |             "1": "Sunday",
126 |             "2": "Monday",
127 |             "3": "Tuesday",
128 |             "4": "Wednesday",
129 |             "5": "Thursday",
130 |             "6": "Friday",
131 |             "7": "Saturday"
132 |         }
133 |     },
134 |     "dschrgcd": {
135 |         "name": "MEDPAR Beneficiary Discharge Status Code",
136 |         "values": {
137 |             "A": "Discharged alive",
138 |             "B": "Discharged dead",
139 |             "C": "Still a patient"
140 |         }
141 |     },
142 |     "ghopdcd": {
143 |         "name": "MEDPAR GHO Paid Code",
144 |         "values": {
145 |             "1": "GHO has paid the provider",
146 |             "0": "GHO has not paid the provider",
147 |             "": "GHO has not paid the provider"
148 |         }
149 |     },
150 |     "pps_ind": {
151 |         "name": "MEDPAR PPS Indicator Code",
152 |         "values": {
153 |             "0": "Non PPS",
154 |             "2": "PPS"
155 |         }
156 |     },
157 |     "prvdrnum": {
158 |         "name": "MEDPAR Provider Number",
159 |         "values": {}
160 |     },
161 |     "spclunit": {
162 |         "name": "MEDPAR Provider Number Special Unit Code",
163 |         "values": {
164 |             "M": "PPS-exempt psychiatric unit in CAH",
165 |             "R": "PPS-exempt rehabilitation unit in CAH",
166 |             "S": "PPS-exempt psychiatric unit",
167 |             "T": "PPS-exempt rehabilitation unit",
168 |             "U": "Swing-bed short-term/acute care hospital",
169 |             "W": "Swing-bed long-term hospital",
170 |             "Y": "Swing-bed rehabilitation hospital",
171 |             "Z": "Swing-bed rural primary care hospital; eff 10/97 changed to critical access hospitals",
172 |             "": "Not PPS-exempt or swing-bed designation"
173 |         }
174 |     },
175 |     "sslssnf": {
176 |         "name": "MEDPAR Short Stay/Long Stay/SNF Indicator Code",
177 |         "values": {
178 |             "N": "SNF Stay",
179 |             "S": "Short-Stay",
180 |             "L": "Long-Stay"
181 |         }
182 |     },
183 |     "actv_xref_ind": {
184 |         "name": "MEDPAR Active Cross Reference Indicator",
185 |         "values": {
186 |             "X": "Cross-Reference",
187 |             "A": "Active"
188 |         }
189 |     },
190 |     "icuindcd": {
191 |         "name": "MEDPAR Intensive Care Unit (ICU) Indicator Code",
192 |         "values": {
193 |             "0": "General",
194 |             "1": "Surgical",
195 |             "2": "Medical",
196 |             "3": "Pediatric",
197 |             "4": "Psychiatric",
198 |             "6": "Intermediate IOU",
199 |             "7": "Burn care",
200 |             "8": "Trauma",
201 |             "9": "Other intensive care"
202 |         }
203 |     },
204 |     "crnry_cd": {
205 |         "name": "MEDPAR Coronary Care Indicator Code",
206 |         "values": {
207 |             "0": "General",
208 |             "1": "Myocardial",
209 |             "2": "Pulmonary care",
210 |             "3": "Heart transplant",
211 |             "4": "Intermediate CCU",
212 |             "9": "Other Coronary Care",
213 |             "": "No coronary care indication"
214 |         }
215 |     },
216 |     "phrmcycd": {
217 |         "name": "MEDPAR Pharmacy Indicator Code",
218 |         "values": {
219 |             "0": "No drugs",
220 |             "1": "General drugs and/pr IV therapy",
221 |             "2": "Erythropoietin",
222 |             "3": "Blood clotting drugs",
223 |             "4": "General drugs and/or IV therapy; and epoetin",
224 |             "5": "General drugs and/or IV therapy; and blood clotting drugs"
225 |         }
226 |     },
227 |     "trnsplnt": {
228 |         "name": "MEDPAR Transplant Indicator Code",
229 |         "values": {
230 |             "0": "No organ or kidney transplant",
231 |             "2": "Organ transplant other than kidney",
232 |             "7": "Kidney transplant"
233 |         }
234 |     },
235 |     "onclgysw": {
236 |         "name": "MEDPAR Radiology Oncology Indicator Switch",
237 |         "values": {
238 |             "0": "No radiology-oncology",
239 |             "1": "Yes radiology-oncology"
240 |         }
241 |     },
242 |     "dgnstcsw": {
243 |         "name": "MEDPAR Radiology Diagnostic Indicator Switch",
244 |         "values": {
245 |             "0": "No radiology-diagnostic",
246 |             "1": "Yes radiology-diagnostic"
247 |         }
248 |     },
249 |     "thrptcsw": {
250 |         "name": "MEDPAR Radiology Therapeutic Indicator Switch",
251 |         "values": {
252 |             "0": "No radiology-therapeutic",
253 |             "1": "Yes radiology-therapeutic"
254 |         }
255 |     },
256 |     "nuclr_sw": {
257 |         "name": "MEDPAR Radiology Nuclear Medicine Indicator Switch",
258 |         "values": {
259 |             "0": "No nuclear medicine",
260 |             "1": "Yes nuclear medicine"
261 |         }
262 |     },
263 |     "ctscansw": {
264 |         "name": "MEDPAR Radiology CT Scan Indicator Switch",
265 |         "values": {
266 |             "0": "No radiology CT scan",
267 |             "1": "Yes radiology CT scan"
268 |         }
269 |     },
270 |     "imgng_sw": {
271 |         "name": "MEDPAR Radiology Other Imaging Indicator Switch",
272 |         "values": {
273 |             "0": "No other imaging services",
274 |             "1": "Yes other imaging services"
275 |         }
276 |     },
277 |     "opsrvccd": {
278 |         "name": "MEDPAR Outpatient Services Indicator Code",
279 |         "values": {
280 |             "0": "No outpatient services/ambulatory surgical care",
281 |             "1": "Outpatient services",
282 |             "2": "Ambulatory surgical care",
283 |             "3": "Outpatient services and ambulatory surgical care"
284 |         }
285 |     },
286 |     "orgncd": {
287 |         "name": "MEDPAR Organ Acquisition Indicator Code",
288 |         "values": {
289 |             "K1": "General classification",
290 |             "K2": "Living donor kidney",
291 |             "K3": "Cadaver donor kidney",
292 |             "K4": "Unknown donor kidney",
293 |             "K5": "Other kidney acquisition",
294 |             "H1": "Cadaver donor heart",
295 |             "H2": "Other heart acquisition",
296 |             "L1": "Donor liver",
297 |             "01": "Other organ acquisition",
298 |             "02": "General acquisition",
299 |             "B1": "Bone donor bank",
300 |             "03": "Organ donor bank other than kidney",
301 |             "S1": "Skin donor bank",
302 |             "04": "Other donor bank",
303 |             "": "No organ acquisition indication"
304 |         }
305 |     },
306 |     "esrdstg{x}": {
307 |         "name": "MEDPAR ESRD Setting Indicator Code",
308 |         "values": {
309 |             "00": "Ip renal dialysis-general",
310 |             "01": "Ip renal dialysis-hemodialysis",
311 |             "02": "Ip renal dialysis-peritoneal",
312 |             "03": "Ip renal dialysis-capd",
313 |             "04": "Ip renal dialysis-ccpd",
314 |             "09": "Ip renal dialysis-other",
315 |             "20": "Hemodialysis-op-general",
316 |             "21": "Hemodialysis-op-hemodialysis/composite",
317 |             "22": "Hemodialysis-op-home supplies",
318 |             "23": "Hemodialysis-op-home equipment",
319 |             "24": "Hemodialysis-op-maintenance/100%",
320 |             "25": "Hemodialysis-op-support services",
321 |             "29": "Hemodialysis-op-other",
322 |             "30": "Peritoneal-op/home-general",
323 |             "31": "Peritoneal-op/home-peritoneal/composite",
324 |             "32": "Peritoneal-op/home-home supplies",
325 |             "33": "Peritoneal-op/home-home equipment",
326 |             "34": "Peritoneal-op/home-maintenance/100%",
327 |             "35": "Peritoneal-op/home-support services",
328 |             "39": "Peritoneal-op/home-other",
329 |             "40": "Capd-op-capd/general",
330 |             "41": "Capd-op-capd/composite",
331 |             "42": "Capd-op-home supplies",
332 |             "43": "Capd-op-home equipment",
333 |             "44": "Capd-op-maintenance/100%",
334 |             "45": "Capd-op-support services",
335 |             "49": "Capd-op-other",
336 |             "50": "Ccpd-op-ccpd/general",
337 |             "51": "Ccpd-op-ccpd/composite",
338 |             "52": "Ccpd-op-home supplies",
339 |             "53": "Ccpd-op-home equipment",
340 |             "54": "Ccpd-op-maintenance/100%",
341 |             "55": "Ccpd-op-support services",
342 |             "59": "Ccpd-op-other",
343 |             "80": "Miscellaneous dialysis-general",
344 |             "81": "Miscellaneous dialysis-ultrafiltration",
345 |             "89": "Miscellaneous dialysis-other",
346 |             "": "No ESRD setting indication"
347 |         }
348 |     },
349 |     "poa_dgns_e_{x}_ind_cd": {
350 |         "name": "MEDPAR Diagnosis E Code Present on Admission Indicator",
351 |         "values": {
352 |             "Y": "Diagnosis was present at the time of inpatient admission. CMS will pay the CC/MCC DRG for those selected HACs that are coded as 'Y' for the POA Indicator.",
353 |             "N": "Diagnosis was not present at the time of inpatient admission. CMS will not pay the CC/MCC DRG for those selected HACs that are coded as 'N' for the POA Indicator.",
354 |             "U": "Documentation is insufficient to determine if the condition was present at the time of inpatient admission. CMS will not pay the CC/MCC DRG for those selected HACs that are coded as 'U' for the POA Indicator.",
355 |             "W": "Clinically undetermined. Provider is unable to clinically determine whether condition was present at the time of inpatient admission. CMS will pay the CC/MCC DRG for those selected HACs that are coded as 'W' for the POA Indicator.",
356 |             "1": "Unreported/not used -- exempt from POA reporting -- This code is equivalent to a blank pn the UB-04, however, it was determined that blanks are undesirable when submitting this data via the 4010A. CMS will not pay the CC/MCC DRG for those selected HACs that are coded as '1' for the POA Indicator. The '1' POA Indicator should not be applied to any codes on the HAC list.",
357 |             "Z": "Denotes the end of the POA indicators (terminated 1/2011).",
358 |             "X": "Denotes the end of the POA indicators in special data processing situations that may be identified by CMS in the future (terminated 1/2011).",
359 |             "": "Identifies diagnosis codes that are exempt from the POA reporting requirements (replaces the '1'). NOTE: NCH/NMUD will carry a '0' in place of a blank."
360 |         }
361 |     },
362 |     "poa_dgns_{x}_ind_cd": {
363 |         "name": "MEDPAR Diagnosis Present on Admission Indicator Code",
364 |         "values": {
365 |             "Y": "Diagnosis was present at the time of inpatient admission. CMS will pay the CC/MCC DRG for those selected HACs that are coded as 'Y' for the POA Indicator.",
366 |             "N": "Diagnosis was not present at the time of inpatient admission. CMS will not pay the CC/MCC DRG for those selected HACs that are coded as 'N' for the POA Indicator.",
367 |             "U": "Documentation is insufficient to determine if the condition was present at the time of inpatient admission. CMS will not pay the CC/MCC DRG for those selected HACs that are coded as 'U' for the POA Indicator.",
368 |             "W": "Clinically undetermined. Provider is unable to clinically determine whether condition was present at the time of inpatient admission. CMS will pay the CC/MCC DRG for those selected HACs that are coded as 'W' for the POA Indicator.",
369 |             "1": "Unreported/not used -- exempt from POA reporting -- This code is equivalent to a blank pn the UB-04, however, it was determined that blanks are undesirable when submitting this data via the 4010A. CMS will not pay the CC/MCC DRG for those selected HACs that are coded as '1' for the POA Indicator. The '1' POA Indicator should not be applied to any codes on the HAC list.",
370 |             "Z": "Denotes the end of the POA indicators (terminated 1/2011).",
371 |             "X": "Denotes the end of the POA indicators in special data processing situations that may be identified by CMS in the future (terminated 1/2011).",
372 |             "": "Identifies diagnosis codes that are exempt from the POA reporting requirements (replaces the '1'). NOTE: NCH/NMUD will carry a '0' in place of a blank."
373 |         }
374 |     },
375 |     "prcdrsw": {
376 |         "name": "MEDPAR Surgical Procedure Indicator Switch",
377 |         "values": {
378 |             "0": "No surgery indicated",
379 |             "1": "Yes surgery indicated"
380 |         }
381 |     },
382 |     "dstntncd": {
383 |         "name": "MEDPAR Discharge Destination Code",
384 |         "values": {
385 |             "01": "Discharged to home/self care (routine charge).",
386 |             "02": "Discharged/transferred to other short term general hospital for inpatient care.",
387 |             "03": "Discharged/transferred to skilled nursing facility (SNF)",
388 |             "04": "Discharged/transferred to intermediate care facility (ICF).",
389 |             "05": "Discharged/transferred to another type of institution for inpatient care",
390 |             "06": "Discharged/transferred to home care of organized home health service organization.",
391 |             "07": "Left against medical advice or discontinued care.",
392 |             "08": "Discharged/transferred to home under care of a home IV drug therapy provider.",
393 |             "09": "Admitted as an inpatient to this hospital (effective 3/1/91). In situations where a patient is admitted before midnight of the third day following the day of an outpatient service, the outpatient services are considered inpatient.",
394 |             "20": "Expired (did not recover - Christian Science patient).",
395 |             "30": "Still patient.",
396 |             "40": "Expired at home (hospice claims only)",
397 |             "41": "Expired in a medical facility such as hospital, SNF, ICF, or freestanding hospice. (Hospice claims only)",
398 |             "42": "Expired - place unknown (Hospice claims only)",
399 |             "43": "Discharged/transferred to a federal hospital",
400 |             "50": "Hospice - home",
401 |             "51": "Hospice - medical facility",
402 |             "61": "Discharged/transferred within this institution to a hospital-based swing bed",
403 |             "62": "Discharged/transferred to an inpatient rehabilitation facility.",
404 |             "63": "Discharged/transferred to a long term care hospitals.",
405 |             "64": "Discharged/transferred to a nursing facility certified under Medicaid only",
406 |             "65": "Discharged/Transferred to a psychiatric hospital or psychiatric distinct unit of a hospital.",
407 |             "66": "Discharged/transferred to a Critical Access Hospital (CAH)",
408 |             "70": "Other",
409 |             "71": "Discharged/transferred/referred to another institution for outpatient services as specified by the discharge plan of care",
410 |             "72": "Discharged/transferred/referred to this institution for outpatient services as specified by the discharge plan of care"
411 |         }
412 |     },
413 |     "outlr_cd": {
414 |         "name": "MEDPAR DRG/Outlier Stay Code",
415 |         "values": {
416 |             "0": "No Outlier",
417 |             "1": "Day Outlier",
418 |             "2": "Cost Outlier",
419 |             "6": "Valid DRG Received From Intermediary",
420 |             "7": "HCFA-Developed DRG",
421 |             "8": "HCFA-Developed DRG Using Claim Status Code",
422 |             "9": "Not Groupable"
423 |         }
424 |     },
425 |     "prpay_cd": {
426 |         "name": "MEDPAR Beneficiary Primary Payer Code",
427 |         "values": {
428 |             "A": "Working aged bene/spouse with eghp",
429 |             "B": "ESRD bene in 18-month coordination period with eghp",
430 |             "C": "Conditional Medicare payment; future reimbursement expected",
431 |             "D": "Auto no-fault or any liability insurance",
432 |             "E": "Worker's compensation",
433 |             "F": "Phs or other federal agency (other than dept of veterans affairs)",
434 |             "G": "Working disabled",
435 |             "H": "Black lung",
436 |             "I": "Dept of veterans affairs",
437 |             "J": "Any liability insurance",
438 |             "Z": "Medicare is primary payer",
439 |             "": "Medicare is primary payer"
440 |         }
441 |     },
442 |     "src_adms": {
443 |         "name": "MEDPAR Source Inpatient Admission Code",
444 |         "values": {
445 |             "0": "ANOMALY: invalid value, if present, translate to '9'",
446 |             "1": "Non-Health Care Facility Point of Origin (Physician Referral) - The patient was admitted to this facility upon an order of a physician.",
447 |             "2": "Clinical referral - The patient was admitted upon the recommendation of this facility's clinic physician.",
448 |             "3": "HMO referral - Reserved for national assignment. (eff. 3/08) Prior to 3/08, HMO referral - The patient was admitted upon the recommendation of a health maintenance organization (HMO) physician.",
449 |             "4": "Transfer from hospital (Different Facility) - The patient was admitted to this facility as a hospital transfer from an acute care facility where he or she was an inpatient.",
450 |             "5": "Transfer from a skilled nursing facility (SNF) or Intermediate Care Facility (ICF) - The patient was admitted to this facility as a transfer from a SNF or ICF where he or she was a resident.",
451 |             "6": "Transfer from another health care facility - The patient was admitted to this facility as a transfer from another type of health care facility not defined elsewhere in this code list where he or she was an inpatient.",
452 |             "7": "Emergency room - The patient was admitted to this facility after receiving services in this facility's emergency room department. (Obsolete - eff. 7/1/10)",
453 |             "8": "Court/law enforcement - The patient was admitted upon the direction of a court of law or upon the request of a law enforcement agency's representative. Includes transfers from incarceration facilities.",
454 |             "9": "Information not available - The means by which the patient was admitted is not known.",
455 |             "A": "Reserved for National Assignment. (eff. 3/08) Prior to 3/08 defined as: Transfer from a Critical Access Hospital - patient was admitted/referred to this facility as a transfer from a Critical Access Hospital.",
456 |             "B": "Transfer from Another Home Health Agency - The patient was admitted to this home health agency as a transfer from another home health agency. (Discontinued July 1, 2010 - See Condition Code 47)",
457 |             "C": "Readmission to Same Home Health Agency - The patient was readmitted to this home health agency within the same home health episode period. (Discontinued July 1, 2010)",
458 |             "D": "Transfer from hospital inpatient in the same facility resulting in a separate claim to the payer - The patient was admitted to this facility as a transfer from hospital inpatient within this facility resulting in a separate claim to the payer.",
459 |             "E": "Transfer from Ambulatory Surgery Center - The patient was admitted to this facility as a transfer from an ambulatory surgery center. (eff. 10/1/2007)",
460 |             "F": "Transfer from Hospice and is under a Hospice Plan of Care or Enrolled in a Hospice Program - The patient was admitted to this facility as a transfer from a hospice. (eff. 10/1/2007)"
461 |         }
462 |     },
463 |     "type_adm": {
464 |         "name": "MEDPAR Inpatient Admission Type Code",
465 |         "values": {
466 |             "0": "Blank",
467 |             "1": "Emergency - The patient required immediate medical intervention as a result of severe, life threatening, or potentially disabling conditions. Generally, the patient was admitted through the emergency room.",
468 |             "2": "Urgent - The patient required immediate attention for the care and treatment of a physical or mental disorder. Generally, the patient was admitted to the first available and suitable accommodation.",
469 |             "3": "Elective - The patient's condition permitted adequate time to schedule the availability of suitable accommodations.",
470 |             "4": "Newborn - Necessitates the use of special source of admission codes.",
471 |             "5": "Trauma Center - visits to a trauma center/hospital as licensed or designated by the State or local government authority authorized to do so, or as verified by the American College of Surgeons and involving a trauma activation.",
472 |             "9": "Unknown - Information not available."
473 |         }
474 |     },
475 |     "wrngcd": {
476 |         "name": "MEDPAR Warning Indicators Code",
477 |         "values": {}
478 |     }
479 | }


--------------------------------------------------------------------------------
/medicare_utils/parquet.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python3
  2 | import re
  3 | import math
  4 | import json
  5 | import inspect
  6 | import pkg_resources
  7 | import numpy as np
  8 | import pandas as pd
  9 | 
 10 | from time import time
 11 | from joblib import Parallel, delayed
 12 | from typing import Any, Dict, List, Optional, Union
 13 | from pathlib import Path
 14 | from pkg_resources import resource_filename
 15 | from pandas.api.types import CategoricalDtype
 16 | 
 17 | from .utils import fpath, _mywrap
 18 | pkg_resources.require("pandas>=0.21.0")
 19 | 
 20 | 
 21 | def convert_med(
 22 |         pcts: Union[str, List[str]] = ['0001', '01', '05', '100'],
 23 |         years: Union[int, List[int]] = range(2001, 2013),
 24 |         data_types: Union[str, List[str]] = ['carc', 'opc', 'bsfab', 'med'],
 25 |         rg_size: float = 2.5,
 26 |         parquet_engine: str = 'pyarrow',
 27 |         compression_type: str = 'SNAPPY',
 28 |         manual_schema: bool = False,
 29 |         ehic_xw: bool = True,
 30 |         n_jobs: int = 6,
 31 |         med_dta: str = '/disk/aging/medicare/data',
 32 |         med_pq:
 33 |         str = '/disk/agebulk3/medicare.work/doyle-DUA51929/barronk-DUA51929/raw/pq'
 34 |         ) -> None: # yapf: disable
 35 |     """Convert Medicare Stata files to parquet
 36 | 
 37 |     Args:
 38 |         pcts: percent samples to convert
 39 |         years: file years to convert
 40 |         data_types:
 41 |             types of data files to convert
 42 | 
 43 |             - ``bsfab`` (`Beneficiary Summary File, Base segment`_)
 44 |             - ``bsfcc`` (`Beneficiary Summary File, Chronic Conditions segment`_)
 45 |             - ``bsfcu`` (`Beneficiary Summary File, Cost & Use segment`_)
 46 |             - ``bsfd``  (`Beneficiary Summary File, National Death Index segment`_)
 47 |             - ``carc``  (`Carrier File, Claims segment`_)
 48 |             - ``carl``  (`Carrier File, Line segment`_)
 49 |             - ``den``   (Denominator File)
 50 |             - ``dmec``  (`Durable Medical Equipment File, Claims segment`_)
 51 |             - ``dmel``  (`Durable Medical Equipment File, Line segment`_)
 52 |             - ``hhac``  (`Home Health Agency File, Claims segment`_)
 53 |             - ``hhar``  (`Home Health Agency File, Revenue Center segment`_)
 54 |             - ``hosc``  (`Hospice File, Claims segment`_)
 55 |             - ``hosr``  (`Hospice File, Revenue Center segment`_)
 56 |             - ``ipc``   (`Inpatient File, Claims segment`_)
 57 |             - ``ipr``   (`Inpatient File, Revenue Center segment`_)
 58 |             - ``med``   (`MedPAR File`_)
 59 |             - ``opc``   (`Outpatient File, Claims segment`_)
 60 |             - ``opr``   (`Outpatient File, Revenue Center segment`_)
 61 |             - ``snfc``  (`Skilled Nursing Facility File, Claims segment`_)
 62 |             - ``snfr``  (`Skilled Nursing Facility File, Revenue Center segment`_)
 63 |             - ``xw``    (Crosswalks files for ``ehic`` - ``bene_id``)
 64 | 
 65 |             .. _`Beneficiary Summary File, Base segment`: https://kylebarron.github.io/medicare-documentation/resdac/mbsf/#base-abcd-segment_2
 66 |             .. _`Beneficiary Summary File, Chronic Conditions segment`: https://kylebarron.github.io/medicare-documentation/resdac/mbsf/#chronic-conditions-segment_2
 67 |             .. _`Beneficiary Summary File, Cost & Use segment`: https://kylebarron.github.io/medicare-documentation/resdac/mbsf/#cost-and-use-segment_1
 68 |             .. _`Beneficiary Summary File, National Death Index segment`: https://kylebarron.github.io/medicare-documentation/resdac/mbsf/#national-death-index-segment_1
 69 |             .. _`Carrier File, Claims segment`: https://kylebarron.github.io/medicare-documentation/resdac/carrier-rif/#carrier-rif_1
 70 |             .. _`Carrier File, Line segment`: https://kylebarron.github.io/medicare-documentation/resdac/carrier-rif/#line-file
 71 |             .. _`Durable Medical Equipment File, Claims segment`: https://kylebarron.github.io/medicare-documentation/resdac/dme-rif/#durable-medical-equipment-rif_1
 72 |             .. _`Durable Medical Equipment File, Line segment`: https://kylebarron.github.io/medicare-documentation/resdac/dme-rif/#line-file
 73 |             .. _`Home Health Agency File, Claims segment`: https://kylebarron.github.io/medicare-documentation/resdac/hha-rif/#home-health-agency-rif_1
 74 |             .. _`Home Health Agency File, Revenue Center segment`: https://kylebarron.github.io/medicare-documentation/resdac/hha-rif/#revenue-center-file
 75 |             .. _`Hospice File, Claims segment`: https://kylebarron.github.io/medicare-documentation/resdac/hospice-rif/#hospice-rif_1
 76 |             .. _`Hospice File, Revenue Center segment`: https://kylebarron.github.io/medicare-documentation/resdac/hospice-rif/#revenue-center-file
 77 |             .. _`Inpatient File, Claims segment`: https://kylebarron.github.io/medicare-documentation/resdac/ip-rif/#inpatient-rif_1
 78 |             .. _`Inpatient File, Revenue Center segment`: https://kylebarron.github.io/medicare-documentation/resdac/ip-rif/#revenue-center-file
 79 |             .. _`MedPAR File`: https://kylebarron.github.io/medicare-documentation/resdac/medpar-rif/#medpar-rif_1
 80 |             .. _`Outpatient File, Claims segment`: https://kylebarron.github.io/medicare-documentation/resdac/op-rif/#outpatient-rif_1
 81 |             .. _`Outpatient File, Revenue Center segment`: https://kylebarron.github.io/medicare-documentation/resdac/op-rif/#revenue-center-file
 82 |             .. _`Skilled Nursing Facility File, Claims segment`: https://kylebarron.github.io/medicare-documentation/resdac/snf-rif/#skilled-nursing-facility-rif_1
 83 |             .. _`Skilled Nursing Facility File, Revenue Center segment`: https://kylebarron.github.io/medicare-documentation/resdac/snf-rif/#revenue-center-file
 84 | 
 85 |         rg_size: size in GB of each Parquet row group
 86 |         parquet_engine: either 'fastparquet' or 'pyarrow'
 87 |         compression_type: 'SNAPPY' or 'GZIP'
 88 |         manual_schema: whether to create manual parquet schema. Doesn't
 89 |             always work.
 90 |         ehic_xw: Merge bene_id onto old files with ehic
 91 |         n_jobs: number of processes to use
 92 |         med_dta: top of tree for medicare stata files
 93 |         med_pq: top of tree to output new parquet files
 94 |     """
 95 | 
 96 |     if type(pcts) is str:
 97 |         pcts = [pcts]
 98 |     elif type(pcts) is list:
 99 |         pass
100 |     else:
101 |         raise TypeError('pcts must be string or list of strings')
102 | 
103 |     if type(years) is int:
104 |         years = [years]
105 |     elif type(years) is list:
106 |         pass
107 |     elif type(years) is range:
108 |         pass
109 |     else:
110 |         raise TypeError('years must be int, range, or list of ints')
111 | 
112 |     if type(data_types) is str:
113 |         data_types = [data_types]
114 |     elif type(data_types) is list:
115 |         pass
116 |     else:
117 |         raise TypeError('data_types must be string or list of strings')
118 | 
119 |     data_list = [[x, y, z] for x in pcts for y in years for z in data_types]
120 | 
121 |     # Drop 100% carrier:
122 |     # data_list = [
123 |     # x for x in data_list if not (x[2] == 'carc') & (x[0] == '100')]
124 | 
125 |     # Or:
126 |     # Replace 100% carrier with 20% carrier:
127 |     data_list = [['20', x[1], x[2]]
128 |                  if ((x[2] == 'carc') & (x[0] == '100')) else x
129 |                  for x in data_list]
130 | 
131 |     # Make sure list is unique:
132 |     data_list = sorted([list(x) for x in set(tuple(y) for y in data_list)])
133 | 
134 |     Parallel(n_jobs=n_jobs)(
135 |         delayed(_convert_med)(
136 |             *i,
137 |             rg_size=rg_size,
138 |             parquet_engine=parquet_engine,
139 |             compression_type=compression_type,
140 |             manual_schema=manual_schema,
141 |             ehic_xw=ehic_xw,
142 |             med_dta=med_dta,
143 |             med_pq=med_pq) for i in data_list)
144 | 
145 | 
146 | def _convert_med(
147 |         pct: str,
148 |         year: int,
149 |         data_type: Union[str, List[str]],
150 |         rg_size: float = 2.5,
151 |         parquet_engine: str = 'pyarrow',
152 |         compression_type: str = 'SNAPPY',
153 |         manual_schema: bool = False,
154 |         ehic_xw: bool = True,
155 |         med_dta: str = '/disk/aging/medicare/data',
156 |         med_pq:
157 |         str = '/disk/agebulk3/medicare.work/doyle-DUA51929/barronk-DUA51929/raw/pq'
158 |         ) -> None: # yapf: disable
159 |     """Convert a single Medicare file to parquet format.
160 | 
161 |     Args:
162 |         pct: percent sample to convert
163 |         year: year of data to convert
164 |         data_type:
165 |             type of data files to convert
166 | 
167 |             - ``bsfab`` Beneficiary Summary File, Base segment
168 |             - ``bsfcc`` Beneficiary Summary File, Chronic Conditions segment
169 |             - ``bsfcu`` Beneficiary Summary File, Cost & Use segment
170 |             - ``bsfd``  Beneficiary Summary File, National Death Index segment
171 |             - ``carc``  Carrier File, Claims segment
172 |             - ``carl``  Carrier File, Line segment
173 |             - ``den``   Denominator File
174 |             - ``dmec``  Durable Medical Equipment File, Claims segment
175 |             - ``dmel``  Durable Medical Equipment File, Line segment
176 |             - ``hhac``  Home Health Agency File, Claims segment
177 |             - ``hhar``  Home Health Agency File, Revenue Center segment
178 |             - ``hosc``  Hospice File, Claims segment
179 |             - ``hosr``  Hospice File, Revenue Center segment
180 |             - ``ipc``   Inpatient File, Claims segment
181 |             - ``ipr``   Inpatient File, Revenue Center segment
182 |             - ``med``   MedPAR File
183 |             - ``opc``   Outpatient File, Claims segment
184 |             - ``opr``   Outpatient File, Revenue Center segment
185 |             - ``snfc``  Skilled Nursing Facility File, Claims segment
186 |             - ``snfr``  Skilled Nursing Facility File, Revenue Center segment
187 |             - ``xw``    Crosswalks files for ``ehic`` - ``bene_id``
188 |         rg_size: size in GB of each Parquet row group
189 |         parquet_engine: either 'fastparquet' or 'pyarrow'
190 |         compression_type: 'SNAPPY' or 'GZIP'
191 |         manual_schema: whether to create manual parquet schema. Doesn't
192 |             always work.
193 |         med_dta: canonical path for raw medicare dta files
194 |         med_pq: top of tree to output new parquet files
195 |         ehic_xw: Merge bene_id onto old files with ehic
196 |     Returns:
197 |         nothing. Writes parquet file to disk.
198 |     Raises:
199 |         NameError if data_type is not one of the above
200 |     """
201 | 
202 |     if type(pct) != str:
203 |         raise TypeError('pct must be str')
204 |     if type(year) != int:
205 |         raise TypeError('year must be int')
206 | 
207 |     infile = fpath(percent=pct, year=year, data_type=data_type, dta=True)
208 |     outfile = fpath(
209 |         percent=pct, year=year, data_type=data_type, dta=False, pq_path=med_pq)
210 | 
211 |     if not data_type.startswith('bsf'):
212 |         # TODO Refactor this into separate function.
213 |         path = resource_filename(
214 |             'medicare_utils', f'metadata/xw/{data_type}.json')
215 |         try:
216 |             with open(path) as f:
217 |                 varnames = json.load(f)
218 |         except OSError:
219 |             varnames = {}
220 | 
221 |         rename_dict = {}
222 |         for varname, names in varnames.items():
223 |             n = {k: v for k, v in names.items() if k == str(year)}
224 |             if n:
225 |                 rename_dict[n[str(year)]['name']] = varname
226 | 
227 |         if rename_dict:
228 |             # Remove items from dict that map to duplicate values
229 |             # Can't save a parquet file where multiple cols have same name
230 |             rev_rename_dict = {}
231 |             for key, value in rename_dict.items():
232 |                 rev_rename_dict.setdefault(value, set()).add(key)
233 |             dups = [key for key, val in rev_rename_dict.items() if len(val) > 1]
234 | 
235 |             for k, v in rename_dict.copy().items():
236 |                 if v in dups:
237 |                     rename_dict.pop(k)
238 |         else:
239 |             print(f'Year not in variable dictionary: {year}')
240 |             rename_dict = None
241 |     else:
242 |         rename_dict = None
243 | 
244 |     # Make folder path if it doesn't exist
245 |     folder = Path(outfile).parents[0]
246 |     folder.mkdir(exist_ok=True, parents=True)
247 | 
248 |     msg = f"""\
249 |     Starting {data_type} conversion
250 |     - Percent: {pct}
251 |     - Year {year}
252 |     """
253 |     print(_mywrap(msg))
254 | 
255 |     if ehic_xw and (year <= 2005) and not (data_type.startswith('bsf')):
256 |         ehic_xw = fpath(pct, year, 'xw_bsf', pq_path=med_pq)
257 |         if not Path(ehic_xw).is_file():
258 |             ehic_xw = fpath(pct, year, 'xw_bsf', dta=True, dta_path=med_dta)
259 |     else:
260 |         ehic_xw = None
261 | 
262 |     try:
263 |         convert_file(
264 |             infile=infile,
265 |             outfile=outfile,
266 |             rename_dict=rename_dict,
267 |             rg_size=rg_size,
268 |             parquet_engine=parquet_engine,
269 |             compression_type=compression_type,
270 |             manual_schema=manual_schema,
271 |             ehic_xw=ehic_xw)
272 |     except:
273 |         pass
274 | 
275 | 
276 | def convert_file(
277 |         infile: str,
278 |         outfile: str,
279 |         rename_dict: Dict[str, str] = None,
280 |         rg_size: float = 2.5,
281 |         parquet_engine: str = 'pyarrow',
282 |         compression_type: str = 'SNAPPY',
283 |         manual_schema: bool = False,
284 |         ehic_xw: Optional[str] = None) -> None:
285 |     """Convert arbitrary Stata file to Parquet format
286 | 
287 |     Args:
288 |         infile: path of file to read from
289 |         outfile: path of file to export to
290 |         rename_dict: keys should be initial variable names; values should
291 |             be new variable names
292 |         rg_size: Size in GB of the individual row groups
293 |         parquet_engine: either ``pyarrow`` or ``fastparquet``
294 |         compression_type: Compression algorithm to use. Can be ``SNAPPY`` or
295 |             ``GZIP``.
296 |         manual_schema: Create parquet schema manually. For use with
297 |             pyarrow; doesn't always work
298 |         ehic_xw: Merge bene_id onto old files with ehic
299 |     Returns:
300 |         Writes .parquet file to disk.
301 |     """
302 |     if parquet_engine == 'pyarrow':
303 |         import pyarrow as pa
304 |         import pyarrow.parquet as pq
305 |     elif parquet_engine == 'fastparquet':
306 |         import fastparquet as fp
307 | 
308 |     t0 = time()
309 | 
310 |     infile = Path(infile)
311 |     # File name without suffix
312 |     infile_stub = infile.stem
313 |     # Extension
314 |     infile_type = infile.suffix[1:]
315 | 
316 |     # Set row group size. The following makes an even multiple of row groups
317 |     # as close as possible to the given `rg_size`
318 |     file_size = infile.stat().st_size / (1024 ** 3)
319 |     n_rg = round(file_size / rg_size)
320 |     if n_rg == 0:
321 |         n_rg += 1
322 | 
323 |     nrow_total = pd.read_stata(infile, iterator=True).nobs
324 |     nrow_rg = math.ceil(nrow_total / n_rg)
325 |     gb_per_rg = file_size / n_rg
326 | 
327 |     msg = f"""\
328 |     Row groups:
329 |     - {n_rg} of size {gb_per_rg:.2f} GB
330 |     Beginning scanning dtypes of file:
331 |     - infile: {infile_stub}.{infile_type}
332 |     - time: {(time() - t0) / 60:.2f} minutes
333 |     """
334 |     print(_mywrap(msg))
335 | 
336 |     if parquet_engine == 'pyarrow':
337 |         dtypes = _scan_file(infile, categorical=False)
338 |     elif parquet_engine == 'fastparquet':
339 |         dtypes = _scan_file(infile, categorical=True)
340 | 
341 |     if rename_dict is not None:
342 |         for old_name, new_name in rename_dict.items():
343 |             try:
344 |                 dtypes[new_name] = dtypes.pop(old_name)
345 |             except KeyError:
346 |                 pass
347 | 
348 |     msg = f"""\
349 |     Finished scanning dtypes of file
350 |     - infile: {infile_stub}.{infile_type}
351 |     - time: {(time() - t0) / 60:.2f} minutes
352 |     """
353 |     print(_mywrap(msg))
354 | 
355 |     if ehic_xw:
356 |         ehic_xw = Path(ehic_xw)
357 |         if ehic_xw.suffix == '.parquet':
358 |             xw = pd.read_parquet(ehic_xw, columns=['ehic', 'bene_id'])
359 |         elif ehic_xw.suffix == '.dta':
360 |             xw = pd.read_stata(ehic_xw, columns=['ehic', 'bene_id'])
361 |         xw = xw.set_index('ehic')
362 | 
363 |     itr = pd.read_stata(infile, chunksize=nrow_rg)
364 |     i = 0
365 |     for df in itr:
366 |         i += 1
367 |         msg = f"""\
368 |         Read from file:
369 |         - Group {i}
370 |         - infile: {infile_stub}.{infile_type}
371 |         - time: {(time() - t0) / 60:.2f} minutes
372 |         """
373 |         print(_mywrap(msg))
374 | 
375 |         if rename_dict is not None:
376 |             df = df.rename(columns=rename_dict)
377 | 
378 |             # Rename columns that aren't in XW with `x_` prefix
379 |             non_xw_cols = set(df.columns).difference(rename_dict.values())
380 |             df = df.rename(columns={x: 'x_' + x for x in non_xw_cols})
381 |             for col in non_xw_cols:
382 |                 try:
383 |                     dtypes['x_' + col] = dtypes.pop(col)
384 |                 except KeyError:
385 |                     pass
386 | 
387 |         df = df.astype(dtypes)
388 | 
389 |         if ehic_xw:
390 |             df = df.merge(xw, how='left', left_on='ehic', right_index=True)
391 | 
392 |         msg = f"""\
393 |         Cleaned file:
394 |         - Group {i}
395 |         - infile: {infile_stub}.{infile_type}
396 |         - time: {(time() - t0) / 60:.2f} minutes
397 |         """
398 |         print(_mywrap(msg))
399 | 
400 |         if parquet_engine == 'pyarrow':
401 |             if i == 1:
402 |                 if manual_schema:
403 |                     schema = _create_parquet_schema(df.dtypes)
404 |                 else:
405 |                     schema = pa.Table.from_pandas(
406 |                         df, preserve_index=False).schema
407 |                 writer = pq.ParquetWriter(outfile, schema, flavor='spark')
408 | 
409 |             writer.write_table(pa.Table.from_pandas(df, preserve_index=False))
410 |         elif parquet_engine == 'fastparquet':
411 |             if i == 1:
412 |                 fp.write(
413 |                     outfile,
414 |                     df,
415 |                     compression=compression_type,
416 |                     has_nulls=False,
417 |                     write_index=False,
418 |                     object_encoding='utf8')
419 |             else:
420 |                 fp.write(
421 |                     outfile,
422 |                     df,
423 |                     compression=compression_type,
424 |                     has_nulls=False,
425 |                     write_index=False,
426 |                     object_encoding='utf8',
427 |                     append=True)
428 | 
429 |         msg = f"""\
430 |         Wrote to parquet:
431 |         - Group {i}
432 |         - infile: {infile_stub}.{infile_type}
433 |         - time: {(time() - t0) / 60:.2f} minutes
434 |         """
435 |         print(_mywrap(msg))
436 | 
437 |     if parquet_engine == 'pyarrow':
438 |         writer.close()
439 | 
440 |     print('Wrote to .parquet:\n\tAll groups')
441 | 
442 | 
443 | def _convert_dates(df, datecols):
444 |     for col in datecols:
445 |         if not pd.core.dtypes.common.is_datetimelike(df.iloc[:, col]):
446 |             if df[col].dtype == np.number:
447 |                 df.iloc[:, col] = pd.to_datetime(
448 |                     df.iloc[:, col],
449 |                     unit='D',
450 |                     origin=pd.Timestamp('1960-01-01'),
451 |                     errors='coerce')
452 |             elif df[col].dtype == 'object':
453 |                 df.loc[:, 'from_dt'] = pd.to_datetime(
454 |                     df.loc[:, 'from_dt'], format='%Y-%m-%d', errors='coerce')
455 |     return df
456 | 
457 | 
458 | def _scan_file(
459 |         infile: Union[str, Path],
460 |         categorical: bool = True,
461 |         chunksize: int = 100000,
462 |         cat_threshold: float = 0.1,
463 |         unsigned: bool = False) -> Dict[str, Any]:
464 |     """Scan dta file to find minimal dtypes to hold data in
465 | 
466 |     For each of the chunks of df:
467 |         for string columns: hold all unique values if I want them categorical
468 |         for float columns: do nothing
469 |         for integer columns: search for missings, highest and lowest value
470 |         for date columns: nothing
471 | 
472 |     Args:
473 |         infile: dta file to scan
474 |         categorical: whether to change strings to categorical
475 |         chunksize: number of rows of infile to read at a time
476 |         cat_threshold: maximum fraction of unique values in order
477 |             to convert to categorical
478 | 
479 |     Returns:
480 |         dictionary with variable names and dtyplist
481 |     """
482 |     itr = pd.read_stata(infile, iterator=True)
483 |     varlist_df = pd.DataFrame({
484 |         'format': itr.fmtlist,
485 |         'name': itr.varlist,
486 |         'col_size': itr.col_sizes,
487 |         'dtype': itr.dtyplist,
488 |         'label': list(itr.variable_labels().values())})
489 | 
490 |     start_cols = {}
491 | 
492 |     date_fmts = ('%tc', '%tC', '%td', '%d', '%tw', '%tm', '%tq', '%th', '%ty')
493 |     date_cols = varlist_df['format'].apply(lambda x: x.startswith(date_fmts))
494 |     date_cols = varlist_df[date_cols]['name'].values.tolist()
495 |     start_cols['date_cols'] = date_cols
496 | 
497 |     int_cols = varlist_df['dtype'].apply(
498 |         lambda x: np.issubdtype(x, np.integer) if inspect.isclass(x) else False)
499 |     int_cols = varlist_df[int_cols]['name'].values.tolist()
500 |     int_cols = sorted(list(set(int_cols) - set(date_cols)))
501 |     start_cols['int_cols'] = int_cols
502 | 
503 |     regex = r'%.+s'
504 |     str_cols = varlist_df['format'].apply(lambda x: bool(re.search(regex, x)))
505 |     str_cols = varlist_df[str_cols]['name'].values.tolist()
506 |     start_cols['str_cols'] = str_cols
507 | 
508 |     float_cols = varlist_df['dtype'].apply(
509 |         lambda x: np.issubdtype(x, np.floating) if inspect.isclass(x) else False
510 |     )
511 |     float_cols = varlist_df[float_cols]['name'].values.tolist()
512 |     start_cols['float_cols'] = float_cols
513 | 
514 |     end_cols = {
515 |         'date_cols': start_cols['date_cols'],
516 |         'int_cols': {
517 |             'names': start_cols['int_cols'],
518 |             'min': {key: None
519 |                     for key in start_cols['int_cols']},
520 |             'max': {key: None
521 |                     for key in start_cols['int_cols']}},
522 |         'float_cols': start_cols['float_cols']}
523 |     if categorical:
524 |         end_cols['cat_cols'] = {
525 |             'names': start_cols['str_cols'],
526 |             'cats': {key: set()
527 |                      for key in start_cols['str_cols']}}
528 |         end_cols['str_cols'] = []
529 |     else:
530 |         end_cols['cat_cols'] = {}
531 |         end_cols['str_cols'] = start_cols['str_cols']
532 | 
533 |     tokeep = []
534 |     tokeep.extend(start_cols['int_cols'])
535 |     if categorical:
536 |         tokeep.extend(start_cols['str_cols'])
537 |     itr = pd.read_stata(infile, columns=tokeep, chunksize=chunksize)
538 | 
539 |     i = 0
540 |     for df in itr:
541 |         i += 1
542 |         print(f'Scanning group {i} of data')
543 |         # Integer vars:
544 |         int_cols = end_cols['int_cols']['names'].copy()
545 |         for col in int_cols:
546 |             # Check missings
547 |             if df.loc[:, col].isnull().values.any():
548 |                 # If missings, convert to float
549 |                 end_cols['float_cols'].append(col)
550 |                 end_cols['int_cols']['names'].remove(col)
551 |                 end_cols['int_cols']['max'].pop(col)
552 |                 end_cols['int_cols']['min'].pop(col)
553 |             else:
554 |                 # Check minimum
555 |                 minval = min(df.loc[:, col])
556 |                 if end_cols['int_cols']['min'][col] is None:
557 |                     end_cols['int_cols']['min'][col] = minval
558 |                 elif minval < end_cols['int_cols']['min'][col]:
559 |                     end_cols['int_cols']['min'][col] = minval
560 | 
561 |                 # Check maximum
562 |                 maxval = max(df.loc[:, col])
563 |                 if end_cols['int_cols']['max'][col] is None:
564 |                     end_cols['int_cols']['max'][col] = maxval
565 |                 elif maxval > end_cols['int_cols']['max'][col]:
566 |                     end_cols['int_cols']['max'][col] = maxval
567 | 
568 |         if categorical:
569 |             # Scan str vars for categories
570 |             cat_cols = end_cols['cat_cols']['names'].copy()
571 |             for col in cat_cols:
572 |                 num_unique_values = len(df[col].unique())
573 |                 num_total_values = len(df[col])
574 | 
575 |                 if num_unique_values / num_total_values < cat_threshold:
576 |                     # Then stays as category
577 |                     # Add category values
578 |                     unique_vals = df[col].unique().tolist()
579 |                     end_cols['cat_cols']['cats'][col].update(unique_vals)
580 |                 else:
581 |                     print(f'{col} is now a string')
582 |                     # Becomes regular string column
583 |                     end_cols['str_cols'].append(col)
584 |                     end_cols['cat_cols']['cats'].pop(col)
585 |                     end_cols['cat_cols']['names'].remove(col)
586 | 
587 |         # Not currently scanning date or float vars
588 | 
589 |     dtypes_dict = {}
590 | 
591 |     # Int dtypes:
592 |     for col in end_cols['int_cols']['names']:
593 |         if unsigned and (end_cols['int_cols']['min'][col] >= 0):
594 |             if end_cols['int_cols']['max'][col] <= np.iinfo(np.uint8).max:
595 |                 dtypes_dict[col] = np.uint8
596 |             elif end_cols['int_cols']['max'][col] <= np.iinfo(np.uint16).max:
597 |                 dtypes_dict[col] = np.uint16
598 |             elif end_cols['int_cols']['max'][col] <= np.iinfo(np.uint32).max:
599 |                 dtypes_dict[col] = np.uint32
600 |             elif end_cols['int_cols']['max'][col] <= np.iinfo(np.uint64).max:
601 |                 dtypes_dict[col] = np.uint64
602 |         else:
603 |             if False:
604 |                 pass
605 |             elif ((end_cols['int_cols']['max'][col] <= np.iinfo(np.int8).max) &
606 |                   (end_cols['int_cols']['min'][col] >= np.iinfo(np.int8).min)):
607 |                 dtypes_dict[col] = np.int8
608 |             elif ((end_cols['int_cols']['max'][col] <= np.iinfo(np.int16).max) &
609 |                   (end_cols['int_cols']['min'][col] >= np.iinfo(np.int16).min)):
610 |                 dtypes_dict[col] = np.int16
611 |             elif ((end_cols['int_cols']['max'][col] <= np.iinfo(np.int32).max) &
612 |                   (end_cols['int_cols']['min'][col] >= np.iinfo(np.int32).min)):
613 |                 dtypes_dict[col] = np.int32
614 |             elif ((end_cols['int_cols']['max'][col] <= np.iinfo(np.int64).max) &
615 |                   (end_cols['int_cols']['min'][col] >= np.iinfo(np.int64).min)):
616 |                 dtypes_dict[col] = np.int64
617 | 
618 |     for col in end_cols['float_cols']:
619 |         dtypes_dict[col] = np.float64
620 | 
621 |     if categorical:
622 |         for col in end_cols['cat_cols']['names']:
623 |             dtypes_dict[col] = CategoricalDtype(
624 |                 end_cols['cat_cols']['cats'][col])
625 | 
626 |     return dtypes_dict
627 | 
628 | 
629 | def _create_parquet_schema(dtypes):
630 |     """Create parquet schema from Pandas dtypes
631 | 
632 |     Args:
633 |         dtypes: A dict or Series of dtypes
634 |     Returns:
635 |         pyarrow.Schema
636 |     """
637 |     import pyarrow as pa
638 | 
639 |     dtypes = dict(dtypes)
640 |     fields = []
641 |     for varname, vartype in dtypes.items():
642 |         if vartype == np.float16:
643 |             fields.append(pa.field(varname, pa.float16()))
644 |         elif vartype == np.float32:
645 |             fields.append(pa.field(varname, pa.float32()))
646 |         elif vartype == np.float64:
647 |             fields.append(pa.field(varname, pa.float64()))
648 |         elif vartype == np.int8:
649 |             fields.append(pa.field(varname, pa.int8()))
650 |         elif vartype == np.int16:
651 |             fields.append(pa.field(varname, pa.int16()))
652 |         elif vartype == np.int32:
653 |             fields.append(pa.field(varname, pa.int32()))
654 |         elif vartype == np.int64:
655 |             fields.append(pa.field(varname, pa.int64()))
656 |         elif vartype == np.uint8:
657 |             fields.append(pa.field(varname, pa.uint8()))
658 |         elif vartype == np.uint16:
659 |             fields.append(pa.field(varname, pa.uint16()))
660 |         elif vartype == np.uint32:
661 |             fields.append(pa.field(varname, pa.uint32()))
662 |         elif vartype == np.uint64:
663 |             fields.append(pa.field(varname, pa.uint64()))
664 |         elif vartype == np.bool_:
665 |             fields.append(pa.field(varname, pa.bool_()))
666 |         elif (vartype == object) | (vartype.name == 'category'):
667 |             fields.append(pa.field(varname, pa.string()))
668 |         elif np.issubdtype(vartype, np.datetime64):
669 |             fields.append(pa.field(varname, pa.timestamp('ns')))
670 | 
671 |     assert len(dtypes) == len(fields)
672 |     schema = pa.schema(fields)
673 |     return schema
674 | 
675 | 
676 | if __name__ == '__main__':
677 |     convert_med()
678 | 


--------------------------------------------------------------------------------
/tests/test_medicare_df.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | import pytest
  3 | import pandas as pd
  4 | import medicare_utils as med
  5 | 
  6 | 
  7 | class TestInit(object):
  8 |     # All the non-default arguments
  9 |     @pytest.fixture
 10 |     def init(self):
 11 |         return {'percent': '01', 'years': 2012}
 12 | 
 13 |     @pytest.mark.parametrize(
 14 |         'pct,pct_act',
 15 |         [('0001', '0001'),
 16 |          ('01', '01'),
 17 |          ('05', '05'),
 18 |          ('20', '20'),
 19 |          ('100', '100'),
 20 |          (0.01, '0001'),
 21 |          (1, '01'),
 22 |          (5, '05'),
 23 |          (20, '20'),
 24 |          (100, '100')]) # yapf: disable
 25 |     def test_percents(self, init, pct, pct_act):
 26 |         init['percent'] = pct
 27 |         mdf = med.MedicareDF(**init)
 28 |         assert mdf.percent == pct_act
 29 | 
 30 |     @pytest.mark.parametrize('pct', ['02', '45', 2, 56])
 31 |     def test_invalid_percents(self, init, pct):
 32 |         init['percent'] = pct
 33 |         with pytest.raises(ValueError):
 34 |             med.MedicareDF(**init)
 35 | 
 36 |     @pytest.mark.parametrize(
 37 |         'years,years_act',
 38 |         [(2005, [2005]),
 39 |          (range(2010, 2013), range(2010, 2013)),
 40 |          ([2010, 2011, 2012], [2010, 2011, 2012])]) # yapf: disable
 41 |     def test_years(self, init, years, years_act):
 42 |         init['years'] = years
 43 |         mdf = med.MedicareDF(**init)
 44 |         assert mdf.years == years_act
 45 | 
 46 |     @pytest.mark.parametrize('years', ['2012', 2012.0])
 47 |     def test_invalid_years(self, init, years):
 48 |         init['years'] = years
 49 |         with pytest.raises(TypeError):
 50 |             med.MedicareDF(**init)
 51 | 
 52 |     @pytest.mark.parametrize('year_type', ['calendar', 'age'])
 53 |     def test_year_type(self, year_type):
 54 |         mdf = med.MedicareDF('01', [2011, 2012], year_type=year_type)
 55 |         mdf.year_type == year_type
 56 | 
 57 |     @pytest.mark.parametrize(
 58 |         'years', [2012, [2012], range(2012, 2013), [2010, 2012]])
 59 |     def test_invalid_age_years(self, init, years):
 60 |         init['year_type'] = 'age'
 61 |         init['years'] = years
 62 |         with pytest.raises(ValueError):
 63 |             med.MedicareDF(**init)
 64 | 
 65 | 
 66 | class TestGetCohortTypeCheck(object):
 67 |     @pytest.fixture
 68 |     def init(self):
 69 |         return {
 70 |             'gender': None,
 71 |             'ages': None,
 72 |             'races': None,
 73 |             'rti_race': False,
 74 |             'buyin_val': None,
 75 |             'hmo_val': None,
 76 |             'join': 'left',
 77 |             'keep_vars': None,
 78 |             'dask': False,
 79 |             'verbose': True}
 80 | 
 81 |     @pytest.fixture
 82 |     def mdf(self):
 83 |         return med.MedicareDF('01', 2012)
 84 | 
 85 |     @pytest.mark.parametrize(
 86 |         'gender,expected',
 87 |         [(None, None),
 88 |          ('unknown', '0'),
 89 |          ('male', '1'),
 90 |          ('female', '2'),
 91 |          ('u', '0'),
 92 |          ('m', '1'),
 93 |          ('f', '2'),
 94 |          ('UNKNOWN', '0'),
 95 |          ('MALE', '1'),
 96 |          ('FEMALE', '2'),
 97 |          ('U', '0'),
 98 |          ('M', '1'),
 99 |          ('F', '2'),
100 |          ('0', '0'),
101 |          ('1', '1'),
102 |          ('2', '2')]) # yapf: disable
103 |     def test_gender(self, mdf, init, gender, expected):
104 |         init['gender'] = gender
105 |         result = mdf._get_cohort_type_check(**init)
106 |         assert result.gender == expected
107 | 
108 |     @pytest.mark.parametrize(
109 |         'gender,error', [
110 |             (['string_in_list'], TypeError),
111 |             ([1], TypeError),
112 |             (1, TypeError),
113 |             (2, TypeError),
114 |             (0.1, TypeError),
115 |             ('ma', ValueError),
116 |             ('mal', ValueError),
117 |             ('fem', ValueError),
118 |             ('femal', ValueError),
119 |             ('3', ValueError),
120 |             ('-1', ValueError),
121 |             ('unkn', ValueError), ])
122 |     def test_gender_type_error(self, mdf, init, gender, error):
123 |         init['gender'] = gender
124 |         with pytest.raises(error):
125 |             mdf._get_cohort_type_check(**init)
126 | 
127 |     @pytest.mark.parametrize('ages', ['65', 65.5, ['65'], [65, '66'], True])
128 |     def test_ages_type_error(self, mdf, init, ages):
129 |         init['ages'] = ages
130 |         with pytest.raises(TypeError):
131 |             mdf._get_cohort_type_check(**init)
132 | 
133 |     @pytest.mark.parametrize('rti_race', ['1', '0', 0, 1])
134 |     def test_rti_race(self, mdf, init, rti_race):
135 |         init['rti_race'] = rti_race
136 |         with pytest.raises(TypeError):
137 |             mdf._get_cohort_type_check(**init)
138 | 
139 |     @pytest.mark.parametrize(
140 |         'races,expected',
141 |         [(None, None),
142 |          ('unknown', ['0']),
143 |          ('white', ['1']),
144 |          ('black (or african-american)', ['2']),
145 |          ('black', ['2']),
146 |          ('african-american', ['2']),
147 |          ('other', ['3']),
148 |          ('asian pacific islander', ['4']),
149 |          ('asian', ['4']),
150 |          ('hispanic', ['5']),
151 |          ('american indian alaska native', ['6']),
152 |          ('american indian', ['6']),
153 |          ('UNKNOWN', ['0']),
154 |          ('WHITE', ['1']),
155 |          ('BLACK (OR AFRICAN-AMERICAN)', ['2']),
156 |          ('BLACK', ['2']),
157 |          ('AFRICAN-AMERICAN', ['2']),
158 |          ('OTHER', ['3']),
159 |          ('ASIAN PACIFIC ISLANDER', ['4']),
160 |          ('ASIAN', ['4']),
161 |          ('HISPANIC', ['5']),
162 |          ('AMERICAN INDIAN ALASKA NATIVE', ['6']),
163 |          ('AMERICAN INDIAN', ['6']),
164 |          (['white', 'black'], ['1', '2']),
165 |          (['white', 'black', 'asian'], ['1', '2', '4']),
166 |          (['white', 'asian'], ['1', '4']),
167 |          (['black', 'asian'], ['2', '4']),
168 |          (['0', '1', '2'], ['0', '1', '2']),
169 |          ('0', ['0']),
170 |          ('1', ['1']),
171 |          ('2', ['2']),
172 |          ('3', ['3']),
173 |          ('4', ['4']),
174 |          ('5', ['5']),
175 |          ('6', ['6'])]) # yapf: disable
176 |     def test_races_rti_true(self, mdf, init, races, expected):
177 |         init['rti_race'] = True
178 |         init['races'] = races
179 |         result = mdf._get_cohort_type_check(**init)
180 |         assert result.races == expected
181 | 
182 |     @pytest.mark.parametrize(
183 |         'races,expected',
184 |         [(None, None),
185 |          ('unknown', ['0']),
186 |          ('white', ['1']),
187 |          ('black', ['2']),
188 |          ('other', ['3']),
189 |          ('asian', ['4']),
190 |          ('hispanic', ['5']),
191 |          ('north american native', ['6']),
192 |          ('UNKNOWN', ['0']),
193 |          ('WHITE', ['1']),
194 |          ('BLACK', ['2']),
195 |          ('OTHER', ['3']),
196 |          ('ASIAN', ['4']),
197 |          ('HISPANIC', ['5']),
198 |          ('NORTH AMERICAN NATIVE', ['6']),
199 |          (['white', 'black'], ['1', '2']),
200 |          (['white', 'black', 'asian'], ['1', '2', '4']),
201 |          (['white', 'asian'], ['1', '4']),
202 |          (['black', 'asian'], ['2', '4']),
203 |          (['0', '1', '2'], ['0', '1', '2']),
204 |          ('0', ['0']),
205 |          ('1', ['1']),
206 |          ('2', ['2']),
207 |          ('3', ['3']),
208 |          ('4', ['4']),
209 |          ('5', ['5']),
210 |          ('6', ['6'])
211 |          ]) # yapf: disable
212 |     def test_races_rti_false(self, mdf, init, races, expected):
213 |         init['rti_race'] = False
214 |         init['races'] = races
215 |         result = mdf._get_cohort_type_check(**init)
216 |         assert result.races == expected
217 | 
218 |     @pytest.mark.parametrize(
219 |         'buyin_val,expected', [('3', ['3']), (['3'], ['3'])])
220 |     def test_buyin_val(self, mdf, init, buyin_val, expected):
221 |         init['buyin_val'] = buyin_val
222 |         result = mdf._get_cohort_type_check(**init)
223 |         assert result.buyin_val == expected
224 | 
225 |     @pytest.mark.parametrize('hmo_val,expected', [('3', ['3']), (['3'], ['3'])])
226 |     def test_hmo_val(self, mdf, init, hmo_val, expected):
227 |         init['hmo_val'] = hmo_val
228 |         result = mdf._get_cohort_type_check(**init)
229 |         assert result.hmo_val == expected
230 | 
231 |     @pytest.mark.parametrize(
232 |         'keep_vars,expected',
233 |         [
234 |         ('3', ['3']),
235 |         (['3'], ['3']),
236 |         (['3', re.compile('a')], ['3', re.compile('a')]),
237 |         (re.compile('a'), [re.compile('a')]),
238 |         ]) # yapf: disable
239 |     def test_keep_vars(self, mdf, init, keep_vars, expected):
240 |         init['keep_vars'] = keep_vars
241 |         result = mdf._get_cohort_type_check(**init)
242 |         assert result.keep_vars == expected
243 | 
244 |     @pytest.mark.parametrize('join,expected',
245 |         [('left', 'left'),
246 |          ('right', 'right'),
247 |          ('inner', 'inner'),
248 |          ('outer', 'outer')]) # yapf: disable
249 |     def test_allowed_join(self, mdf, init, join, expected):
250 |         init['join'] = join
251 |         result = mdf._get_cohort_type_check(**init)
252 |         assert result.join == expected
253 | 
254 |     def test_allowed_join_value_error(self, mdf, init):
255 |         init['join'] = 'invalid_string'
256 |         with pytest.raises(ValueError):
257 |             mdf._get_cohort_type_check(**init)
258 | 
259 |     def test_dask_type_error(self, mdf, init):
260 |         init['dask'] = 'string'
261 |         with pytest.raises(TypeError):
262 |             mdf._get_cohort_type_check(**init)
263 | 
264 |     def test_verbose_type_error(self, mdf, init):
265 |         init['verbose'] = 'string'
266 |         with pytest.raises(TypeError):
267 |             mdf._get_cohort_type_check(**init)
268 | 
269 | 
270 | class TestGetCohortMonthFilter(object):
271 |     @pytest.fixture
272 |     def mdf(self):
273 |         return med.MedicareDF('01', [2010, 2011, 2012], year_type='age')
274 | 
275 |     @pytest.fixture
276 |     def pl(self):
277 |         # yapf: disable
278 |         data = [
279 |             [1, '2','2','1','1','2','2','1','2','1','2','2','2'],
280 |             [2, '2','2','2','1','2','2','1','1','2','2','2','1'],
281 |             [3, '2','2','2','1','2','2','1','1','1','2','2','2'],
282 |             [4, '1','2','1','1','1','1','2','2','2','1','2','2'],
283 |             [5, '2','2','2','1','1','2','2','1','2','1','2','1'],
284 |             [6, '2','1','1','1','2','1','1','1','1','2','2','2'],
285 |             [7, '2','2','1','1','2','2','1','2','1','2','2','2'],
286 |             [8, '2','2','2','1','2','2','1','1','2','2','2','2'],
287 |             [9, '2','2','1','1','2','2','1','1','2','2','2','2'],
288 |             [10, '1','2','1','1','1','1','2','2','2','2','2','2'],
289 |             [11, '2','2','2','1','1','2','2','1','2','2','2','2'],
290 |             [12, '2','1','1','1','2','1','1','1','1','2','2','2']]
291 |         # yapf: enable
292 |         cols = [
293 |             'dob_month', 'var01', 'var02', 'var03', 'var04', 'var05', 'var06',
294 |             'var07', 'var08', 'var09', 'var10', 'var11', 'var12']
295 |         return pd.DataFrame.from_records(data, columns=cols)
296 | 
297 |     @pytest.fixture
298 |     def exp(self):
299 |         return pd.DataFrame({
300 |             'dob_month': [1, 2, 3, 9, 10, 11, 12],
301 |             'var_younger': [True, True, True, False, False, False, False],
302 |             'var_older': [False, False, False, True, True, True, True]},
303 |                             index=[0, 1, 2, 8, 9, 10, 11])
304 | 
305 |     def test_month_filter_mid(self, mdf, pl, exp):
306 |         df = mdf._get_cohort_month_filter(
307 |             pl=pl, var='var', values=['2'], year=2011, keep_vars=[])
308 |         assert df.equals(exp)
309 | 
310 |     def test_month_filter_first(self, mdf, pl, exp):
311 |         df = mdf._get_cohort_month_filter(
312 |             pl=pl, var='var', values=['2'], year=2010, keep_vars=[])
313 |         exp = exp.loc[exp['var_older'], ['dob_month', 'var_older']]
314 |         assert df.equals(exp)
315 | 
316 |     def test_month_filter_last(self, mdf, pl, exp):
317 |         df = mdf._get_cohort_month_filter(
318 |             pl=pl, var='var', values=['2'], year=2012, keep_vars=[])
319 |         exp = exp.loc[exp['var_younger'], ['dob_month', 'var_younger']]
320 |         assert df.equals(exp)
321 | 
322 | 
323 | class TestStrInKeepVars(object):
324 |     @pytest.fixture
325 |     def mdf(self):
326 |         return med.MedicareDF('01', 2012)
327 | 
328 |     @pytest.mark.parametrize(
329 |         'instr,keep_vars,res',
330 |         [('a', ['a', 'b', 'c'], True),
331 |         ('d', ['a', 'b', 'c'], False),
332 |         ('a', ['a', re.compile(r'b')], True),
333 |         ('d', ['a', re.compile(r'b')], False),
334 |         ('a', [re.compile(r'a')], True),
335 |         ('a', [re.compile(r'b')], False)]) # yapf: disable
336 |     def test_str_in_keep_vars(self, mdf, instr, keep_vars, res):
337 |         assert res == mdf._str_in_keep_vars(instr, keep_vars)
338 | 
339 | 
340 | class TestGetPattern(object):
341 |     @pytest.fixture
342 |     def mdf(self):
343 |         return med.MedicareDF('01', 2012)
344 | 
345 |     def test_get_pattern_str(self, mdf):
346 |         assert mdf._get_pattern('string') == 'string'
347 | 
348 |     def test_get_pattern_regex(self, mdf):
349 |         regex = re.compile('regex_match')
350 |         assert mdf._get_pattern(regex) == 'regex_match'
351 | 
352 |     @pytest.mark.parametrize(
353 |         'obj', [True, 1, 1.0, ['string'], [re.compile('regex')]])
354 |     def test_get_pattern_invalid_type(self, mdf, obj):
355 |         with pytest.raises(TypeError):
356 |             mdf._get_pattern(obj)
357 | 
358 | 
359 | class TestCreateRenameDict(object):
360 |     @pytest.fixture
361 |     def mdf(self):
362 |         return med.MedicareDF('01', 2012)
363 | 
364 |     @pytest.mark.parametrize('hcpcs,icd9_dx,icd9_sg,rename,expected', [
365 |         (None, None, None, {}, {}),
366 |         ('a', 'b', 'c',
367 |             {'hcpcs': '1', 'icd9_dx': '2', 'icd9_sg': '3'},
368 |             {'a': '1', 'b': '2', 'c': '3'}),
369 |         ('a', 'b', 'c',
370 |             {'hcpcs': ['1'], 'icd9_dx': ['2'], 'icd9_sg': ['3']},
371 |             {'a': '1', 'b': '2', 'c': '3'}),
372 |         (['a'], ['b'], ['c'],
373 |             {'hcpcs': ['1'], 'icd9_dx': ['2'], 'icd9_sg': ['3']},
374 |             {'a': '1', 'b': '2', 'c': '3'}),
375 |         (['a'], ['b'], ['c'],
376 |             {'hcpcs': {'a': '1'}, 'icd9_dx': {'b': '2'}, 'icd9_sg': {'c': '3'}},
377 |             {'a': '1', 'b': '2', 'c': '3'}),
378 |         (['a', 'd'], ['b', 'e'], ['c', 'f'],
379 |              {'hcpcs': ['1', '4'], 'icd9_dx': ['2', '5'], 'icd9_sg': ['3', '6']},
380 |              {'a': '1', 'b': '2', 'c': '3', 'd': '4', 'e': '5', 'f': '6'}),
381 |         (['a', 'b'], ['c', 'd'], ['e', 'f'],
382 |             {'hcpcs': {'a': '1'}, 'icd9_dx': {'c': '2'}, 'icd9_sg': {'e': '3'}},
383 |             {'a': '1', 'c': '2', 'e': '3'}),
384 |     ]) # yapf: disable
385 |     def test_rename_dict_noerror(
386 |             self, mdf, hcpcs, icd9_dx, icd9_sg, rename, expected):
387 |         codes = {'hcpcs': hcpcs, 'icd9_dx': icd9_dx, 'icd9_sg': icd9_sg}
388 |         result = mdf._create_rename_dict(codes=codes, rename=rename)
389 |         assert result == expected
390 | 
391 |     @pytest.mark.parametrize('hcpcs,icd9_dx,icd9_sg,rename', [
392 |         (None, None, None,
393 |             {'hcpcs': ['1', '2'],
394 |              'icd9_dx': ['2', '3'],
395 |              'icd9_sg': ['3', '4']}),
396 |         ('a', 'b', 'c',
397 |             {'hcpcs': ['1', '2'],
398 |              'icd9_dx': ['2', '3'],
399 |              'icd9_sg': ['3', '4']}),
400 |         ('a', 'b', 'c', {'hcpcs': [], 'icd9_dx': [], 'icd9_sg': []}),
401 |         (['a', 'b'], ['c', 'd'], ['e', 'f'],
402 |             {'hcpcs': [], 'icd9_dx': [], 'icd9_sg': []}),
403 |         (['a', 'b'], ['c', 'd'], ['e', 'f'],
404 |             {'hcpcs': '1', 'icd9_dx': '2', 'icd9_sg': '3'}),
405 |     ]) # yapf: disable
406 |     def test_rename_dict_wrong_list_len(
407 |             self, mdf, hcpcs, icd9_dx, icd9_sg, rename):
408 |         codes = {'hcpcs': hcpcs, 'icd9_dx': icd9_dx, 'icd9_sg': icd9_sg}
409 |         with pytest.raises(AssertionError):
410 |             mdf._create_rename_dict(codes=codes, rename=rename)
411 | 
412 |     @pytest.mark.parametrize('hcpcs,icd9_dx,icd9_sg,rename', [
413 |         (None, None, None,
414 |             {'hcpcs': '1', 'icd9_dx': '2', 'icd9_sg': '3'}),
415 |         ('a', 'b', 'c',
416 |             {'hcpcs': {'a': '1', 'x': '5'},
417 |              'icd9_dx': {'b': '2', 'y': '6'},
418 |              'icd9_sg': {'c': '3', 'z': '7'}}),
419 |     ]) # yapf: disable
420 |     def test_rename_dict_wrong_dict_length(
421 |             self, mdf, hcpcs, icd9_dx, icd9_sg, rename):
422 |         codes = {'hcpcs': hcpcs, 'icd9_dx': icd9_dx, 'icd9_sg': icd9_sg}
423 |         with pytest.raises(AssertionError):
424 |             mdf._create_rename_dict(codes=codes, rename=rename)
425 | 
426 | 
427 | class TestSearchForCodesTypeCheck(object):
428 |     @pytest.fixture
429 |     def init(self):
430 |         return {
431 |             'data_types': 'med',
432 |             'pl': None,
433 |             'hcpcs': None,
434 |             'icd9_dx': None,
435 |             'icd9_dx_max_cols': None,
436 |             'icd9_sg': None,
437 |             'icd9_sg_max_cols': None,
438 |             'keep_vars': {},
439 |             'collapse_codes': True,
440 |             'rename': {
441 |                 'hcpcs': None,
442 |                 'icd9_dx': None,
443 |                 'icd9_sg': None},
444 |             'convert_ehic': True,
445 |             'dask': False,
446 |             'verbose': False}
447 | 
448 |     @pytest.fixture
449 |     def mdf(self):
450 |         return med.MedicareDF('01', 2012)
451 | 
452 |     @pytest.mark.parametrize(
453 |         'data_types,expected',
454 |         [('carc', ['carc']),
455 |          (['carc'], ['carc']),
456 |          (['carc', 'carl', 'ipc', 'ipr', 'med', 'opc', 'opr'],
457 |             ['carc', 'carl', 'ipc', 'ipr', 'med', 'opc', 'opr']),
458 |         ]) # yapf: disable
459 |     def test_data_types(self, mdf, init, data_types, expected):
460 |         init['data_types'] = data_types
461 |         result = mdf._search_for_codes_type_check(**init)
462 |         assert result.data_types == expected
463 | 
464 |     @pytest.mark.parametrize(
465 |         'data_types,error',
466 |         [(None, TypeError),
467 |          ('a', ValueError),
468 |          ('sdb', ValueError),
469 |          (1, TypeError)]) # yapf: disable
470 |     def test_wrong_data_types(self, mdf, init, data_types, error):
471 |         init['data_types'] = data_types
472 |         with pytest.raises(error):
473 |             mdf._search_for_codes_type_check(**init)
474 | 
475 |     @pytest.mark.parametrize(
476 |         'pl,error',
477 |         [('a', TypeError),
478 |          (1, TypeError),
479 |          (pd.DataFrame({'a': [1, 2, 3, 4]}), ValueError)]) # yapf: disable
480 |     def test_pl_error(self, mdf, init, pl, error):
481 |         init['pl'] = pl
482 |         with pytest.raises(error):
483 |             mdf._search_for_codes_type_check(**init)
484 | 
485 |     @pytest.mark.parametrize(
486 |         'pl,res',
487 |         [(
488 |             pd.DataFrame({'bene_id': [1, 2, 3, 4]}),
489 |             pd.DataFrame({'bene_id': [1, 2, 3, 4]})
490 |          ), (
491 |             pd.DataFrame({'bene_id': [1, 2, 3, 4], 'other': ['a', 'b', 'c', 'd']}),
492 |             pd.DataFrame({'bene_id': [1, 2, 3, 4]})
493 |          ), (
494 |             pd.DataFrame({'bene_id': [1, 2, 3, 4], 'ehic': ['a', 'b', 'c', 'd']}),
495 |             pd.DataFrame({'bene_id': [1, 2, 3, 4], 'ehic': ['a', 'b', 'c', 'd']}),
496 |          ), (
497 |             pd.DataFrame({'bene_id': [1, 2, 3, 4], 'ehic': ['a', 'b', 'c', 'd'], 'other': ['a', 'b', 'c', 'd']}),
498 |             pd.DataFrame({'bene_id': [1, 2, 3, 4], 'ehic': ['a', 'b', 'c', 'd']}),
499 |          ), (
500 |             pd.DataFrame({'ehic': [1, 2, 3, 4], 'other': ['a', 'b', 'c', 'd']}),
501 |             pd.DataFrame({'ehic': [1, 2, 3, 4]})
502 |          )]) # yapf: disable
503 |     def test_pl(self, mdf, init, pl, res):
504 |         init['pl'] = pl
505 |         obj = mdf._search_for_codes_type_check(**init)
506 |         pl = obj.pl_ids_to_filter
507 |         for col in pl.columns:
508 |             assert pl[col].equals(res[col])
509 | 
510 |     @pytest.mark.parametrize(
511 |         'codes,error',
512 |         [(1, TypeError),
513 |          (1.1, TypeError),
514 |          ([1], TypeError),
515 |          ([1.1], TypeError),
516 |          ([['a']], TypeError),
517 |          (['a', ['b']], TypeError),
518 |          ([[re.compile('a')]], TypeError),
519 |          ([re.compile('a'), [re.compile('b')]], TypeError),
520 |          ]) # yapf: disable
521 |     def test_codes_error(self, mdf, init, codes, error):
522 |         for x in ['hcpcs', 'icd9_dx', 'icd9_sg']:
523 |             init[x] = codes
524 |             with pytest.raises(error):
525 |                 mdf._search_for_codes_type_check(**init)
526 | 
527 |     @pytest.mark.parametrize(
528 |         'hcpcs,icd9_dx,icd9_sg,expected',
529 |         [
530 |         (None, None, None,
531 |          {'hcpcs': [],
532 |           'icd9_dx': [],
533 |           'icd9_sg': []}),
534 |         ('a', 'a', 'a',
535 |          {'hcpcs': ['a'],
536 |           'icd9_dx': ['a'],
537 |           'icd9_sg': ['a']}),
538 |         (['a'], ['a'], ['a'],
539 |          {'hcpcs': ['a'],
540 |           'icd9_dx': ['a'],
541 |           'icd9_sg': ['a']}),
542 |         ('a', 'b', 'c',
543 |          {'hcpcs': ['a'],
544 |           'icd9_dx': ['b'],
545 |           'icd9_sg': ['c']}),
546 |         (['a'], ['b'], ['c'],
547 |          {'hcpcs': ['a'],
548 |           'icd9_dx': ['b'],
549 |           'icd9_sg': ['c']}),
550 |         ('', '', '',
551 |          {'hcpcs': [''],
552 |           'icd9_dx': [''],
553 |           'icd9_sg': ['']}),
554 |         ([''], [''], [''],
555 |          {'hcpcs': [''],
556 |           'icd9_dx': [''],
557 |           'icd9_sg': ['']}),
558 |         ('a', re.compile('b'), 'c',
559 |          {'hcpcs': ['a'],
560 |           'icd9_dx': [re.compile('b')],
561 |           'icd9_sg': ['c']}),
562 |         (re.compile('a'), re.compile('a'), re.compile('a'),
563 |          {'hcpcs': [re.compile('a')],
564 |           'icd9_dx': [re.compile('a')],
565 |           'icd9_sg': [re.compile('a')]}),
566 |         ([re.compile('a')], [re.compile('a')], [re.compile('a')],
567 |          {'hcpcs': [re.compile('a')],
568 |           'icd9_dx': [re.compile('a')],
569 |           'icd9_sg': [re.compile('a')]}),
570 |          ]) # yapf: disable
571 |     def test_codes(self, mdf, init, hcpcs, icd9_dx, icd9_sg, expected):
572 |         init['collapse_codes'] = True
573 |         init['hcpcs'] = hcpcs
574 |         init['icd9_dx'] = icd9_dx
575 |         init['icd9_sg'] = icd9_sg
576 |         result = mdf._search_for_codes_type_check(**init)
577 |         assert result.codes == expected
578 | 
579 |     @pytest.mark.parametrize(
580 |         'hcpcs,icd9_dx,icd9_sg,error',
581 |         [
582 |         ('a', 'a', None, ValueError),
583 |         (None, 'a', 'a', ValueError),
584 |         ('a', None, 'a', ValueError),
585 |         (re.compile('a'), re.compile('a'), None, ValueError),
586 |         (None, re.compile('a'), re.compile('a'), ValueError),
587 |         (re.compile('a'), None, re.compile('a'), ValueError),
588 |         (re.compile('a'), 'a', None, ValueError),
589 |         (None, re.compile('a'), 'a', ValueError),
590 |         (re.compile('a'), None, 'a', ValueError),
591 |         ]) # yapf: disable
592 |     def test_dup_code_patterns(self, mdf, init, hcpcs, icd9_dx, icd9_sg, error):
593 |         init['collapse_codes'] = False
594 |         init['hcpcs'] = hcpcs
595 |         init['icd9_dx'] = icd9_dx
596 |         init['icd9_sg'] = icd9_sg
597 |         with pytest.raises(error):
598 |             mdf._search_for_codes_type_check(**init)
599 | 
600 |     def test_icd9_dx_max_cols(self, mdf, init):
601 |         init['icd9_dx'] = None
602 |         init['icd9_dx_max_cols'] = 5
603 |         with pytest.raises(ValueError):
604 |             mdf._search_for_codes_type_check(**init)
605 | 
606 |     def test_icd9_sg_max_cols(self, mdf, init):
607 |         init['icd9_sg'] = None
608 |         init['icd9_sg_max_cols'] = 5
609 |         with pytest.raises(ValueError):
610 |             mdf._search_for_codes_type_check(**init)
611 | 
612 |     @pytest.mark.parametrize(
613 |         'value,error',
614 |         [(1, TypeError),
615 |          ('a', TypeError),
616 |          ([1], TypeError),
617 |          (True, TypeError),
618 |          ({'invalid_key': 'string'}, ValueError),
619 |          ({'med': 1}, TypeError),
620 |          ({'med': True}, TypeError),
621 |          ]) # yapf: disable
622 |     def test_keep_vars_error(self, mdf, init, value, error):
623 |         init['keep_vars'] = value
624 |         with pytest.raises(error):
625 |             mdf._search_for_codes_type_check(**init)
626 | 
627 |     @pytest.mark.parametrize(
628 |         'value,expected',
629 |         [({'med': 'string'}, {'med': ['string']}),
630 |          ({'med': ['string']}, {'med': ['string']})]) # yapf: disable
631 |     def test_keep_vars(self, mdf, init, value, expected):
632 |         init['keep_vars'] = value
633 |         result = mdf._search_for_codes_type_check(**init)
634 |         assert result.keep_vars == expected
635 | 
636 |     @pytest.mark.parametrize(
637 |         'hcpcs,icd9_dx,icd9_sg,rename',
638 |         [(None, None, None, {
639 |             'wrongkey': ['new_name']}),
640 |          ('a', 'b', 'c', {
641 |              'wrongkey': ['new_name']})])
642 |     # More `rename` tests in TestCreateRenameDict class
643 |     def test_rename_dict_wrong_dict_key(
644 |             self, mdf, init, hcpcs, icd9_dx, icd9_sg, rename):
645 |         init['hcpcs'] = hcpcs
646 |         init['icd9_dx'] = icd9_dx
647 |         init['icd9_sg'] = icd9_sg
648 |         init['rename'] = rename
649 |         with pytest.raises(ValueError):
650 |             mdf._search_for_codes_type_check(**init)
651 | 
652 |     @pytest.mark.parametrize(
653 |         'rename,error',
654 |         [({'hcpcs': ['somevalue']}, ValueError),
655 |         ({'icd9_dx': 'string'}, ValueError)]) # yapf: disable
656 |     # Rename argument not allowed when collapse_codes is True
657 |     def test_rename_collapse_codes_error(self, mdf, init, rename, error):
658 |         init['collapse_codes'] = True
659 |         init['rename'] = rename
660 |         with pytest.raises(error):
661 |             mdf._search_for_codes_type_check(**init)
662 | 
663 |     @pytest.mark.parametrize(
664 |         'value,var,error',
665 |         [(1, 'collapse_codes', TypeError),
666 |          ('a', 'collapse_codes', TypeError),
667 |          ([True], 'collapse_codes', TypeError),
668 |          (None, 'collapse_codes', TypeError),
669 |          (1, 'convert_ehic', TypeError),
670 |          ('a', 'convert_ehic', TypeError),
671 |          ([True], 'convert_ehic', TypeError),
672 |          (None, 'convert_ehic', TypeError),
673 |          (1, 'verbose', TypeError),
674 |          ('a', 'verbose', TypeError),
675 |          ([True], 'verbose', TypeError),
676 |          (None, 'verbose', TypeError),
677 |          ]) # yapf: disable
678 |     def test_bool_input_type_error(self, mdf, init, value, var, error):
679 |         init[var] = value
680 |         with pytest.raises(error):
681 |             mdf._search_for_codes_type_check(**init)
682 | 
683 | 
684 | class TestSearchForCodesDfInner(object):
685 |     @pytest.fixture
686 |     def init(self):
687 |         # yapf: disable
688 |         # strings of random numbers between 10000 and 20000
689 |         cl = [
690 |             ['a', 'a1', '12330', '11561', '16595', '19645', '12857'],
691 |             ['a', 'a2', '19119', '15046', '11443', '10912', '12049'],
692 |             ['a', 'a3', '11970', '11287', '15761', '18922', '17237'],
693 |             ['d', 'd1', '12339', '13261', '16721', '16916', '14030'],
694 |             ['d', 'd2', '17472', '12268', '16866', '19018', '15955'],
695 |             ['d', 'd3', '19984', '12176', '15422', '17639', '15978'],
696 |             ['g', 'g1', '14664', '16756', '17961', '11753', '14142'],
697 |             ['h', 'h1', '17978', '17134', '19126', '15506', '19840'],
698 |             ['h', 'h2', '19970', '14396', '10766', '13759', '16496'],
699 |             ['h', 'h3', '10135', '19787', '15254', '16429', '19755'],
700 |             ['k', 'k1', '14184', '14980', '11988', '19129', '15954'],
701 |             ['l', 'l1', '18656', '16262', '17277', '14809', '13158'],
702 |             ['l', 'l2', '12183', '17934', '14647', '16925', '10645'],
703 |             ['l', 'l3', '16389', '15936', '15057', '11984', '16037'],
704 |             ['o', 'o1', '17409', '13543', '10463', '12570', '14592'],
705 |             ['p', 'p1', '14828', '11101', '18290', '15968', '10171'],
706 |             ['q', 'q1', '15680', '10538', '16378', '18132', '15117'],
707 |             ['r', 'r1', '19623', '17485', '11370', '18089', '14946'],
708 |             ['r', 'r2', '12488', '13445', '16946', '11697', '17000'],
709 |             ['r', 'r3', '12433', '15126', '16657', '10305', '13371'],
710 |             ['r', 'r4', '14366', '11205', '18033', '15486', '10191'],
711 |             ['v', 'v1', '19662', '16793', '18033', '10708', '17447'],
712 |             ['v', 'v2', '13856', '16934', '19373', '13596', '19218'],
713 |             ['v', 'v3', '18428', '12335', '14074', '15931', '12287'],
714 |             ['v', 'v4', '11527', '16453', '15934', '11127', '19378'],
715 |             ['z', 'z1', '13459', '17823', '15864', '19867', '11651'],
716 |             ]
717 |         # yapf: enable
718 |         cols_toload = [
719 |             'bene_id', 'medparid', 'dgnscd1', 'dgnscd2', 'dgnscd3', 'dgnscd4',
720 |             'dgnscd5']
721 |         cl = pd.DataFrame.from_records(cl, columns=cols_toload)
722 |         cl = cl.set_index('bene_id')
723 | 
724 |         codes = {'icd9_dx': [re.compile(r'^12'), '18428']}
725 | 
726 |         cols = {
727 |             'cl_id': 'medparid',
728 |             'pl_id': 'bene_id',
729 |             'keep_vars': [],
730 |             'icd9_dx': ['dgnscd1', 'dgnscd2', 'dgnscd3', 'dgnscd4', 'dgnscd5']}
731 | 
732 |         return {
733 |             'cl': cl,
734 |             'codes': codes,
735 |             'cols': cols,
736 |             'year': 2012,
737 |             'keep_vars': [],
738 |             'rename': {},
739 |             'collapse_codes': True,
740 |             'pl_ids_to_filter': None}
741 | 
742 |     @pytest.fixture
743 |     def mdf(self):
744 |         return med.MedicareDF('01', 2012)
745 | 
746 |     # yapf: disable
747 |     @pytest.mark.parametrize(
748 |         'args,exp',
749 |         [(
750 |             {},
751 |             pd.Series(
752 |                 True,
753 |                 index=['a1', 'a2', 'd1', 'd2', 'd3', 'l2', 'o1', 'r2', 'r3',
754 |                        'v3'])
755 |         ), (
756 |             {'cols': {'cl_id': 'medparid',
757 |              'pl_id': 'bene_id',
758 |              'keep_vars': [],
759 |              'icd9_dx': ['dgnscd1']}},
760 |             pd.Series(True, index=['a1', 'd1', 'l2', 'r2', 'r3', 'v3'])
761 |             )
762 |         ])
763 |     # yapf: enable
764 |     def test_output_collapse_codes(self, mdf, init, args, exp):
765 |         args = {**init, **args}
766 | 
767 |         df = mdf._search_for_codes_df_inner(**args)
768 |         df = df[df['match']]
769 |         assert df.set_index('medparid')['match'].equals(exp)
770 | 
771 | # verbose
772 | 


--------------------------------------------------------------------------------