├── .github └── workflows │ └── main.yml ├── .gitignore ├── Dockerfile ├── LICENSE ├── Makefile ├── README.md ├── cmd ├── parse.go ├── pipeline.go ├── root.go └── split.go ├── config.yaml ├── data ├── test_services.csv └── tic_500_shoppable_services.csv ├── go.mod ├── go.sum ├── in-network-rates_schema.json ├── main.go └── pkg └── mrfparse ├── cloud ├── blob.go └── blob_test.go ├── http ├── http.go ├── http_retry_after.go └── http_retry_after_test.go ├── models └── mrf.go ├── mrf ├── errors.go ├── in_network_rates.go ├── in_network_rates_test.go ├── parse.go ├── provider_list.go ├── provider_list_test.go ├── provider_references.go ├── provider_references_test.go ├── record_writer.go ├── root.go ├── root_test.go ├── services.go └── services_test.go ├── parquet ├── pq_writer_factory.go ├── pq_writer_factory_test.go └── writer.go ├── pipeline ├── framework.go ├── steps.go └── steps_test.go ├── split └── split.go └── utils ├── crypto.go ├── crypto_test.go ├── error.go ├── func.go ├── func_test.go ├── json.go ├── json_amd64.go ├── json_other.go ├── json_test.go ├── logger.go ├── logger_test.go └── timer.go /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | # This workflow will build a golang project 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-go 3 | 4 | name: Build and Test 5 | 6 | on: 7 | push: 8 | branches: [ "main" ] 9 | pull_request: 10 | branches: [ "main" ] 11 | 12 | jobs: 13 | 14 | build: 15 | strategy: 16 | matrix: 17 | arch: 18 | - "amd64" 19 | - "aarch64" 20 | 21 | name: "Build - ${{ matrix.arch }}" 22 | 23 | runs-on: ubuntu-latest 24 | steps: 25 | 26 | - name: Checkout - amd64 27 | if: matrix.arch == 'amd64' 28 | uses: actions/checkout@v3 29 | 30 | - name: Checkout - aarch64 31 | if: matrix.arch == 'aarch64' 32 | uses: actions/checkout@v3 33 | with: 34 | path: 'repo' 35 | 36 | - name: Go module cache - amd64 37 | if: matrix.arch == 'amd64' 38 | uses: actions/cache@v3 39 | with: 40 | path: | 41 | ~/.cache/go-build 42 | ~/go/pkg/mod 43 | key: ${{ runner.os }}-${{ matrix.arch }}-go-${{ hashFiles('**/go.sum') }} 44 | restore-keys: | 45 | ${{ runner.os }}-${{ matrix.arch }}-go- 46 | 47 | - name: Go module cache - aarch64 48 | if: matrix.arch == 'aarch64' 49 | uses: actions/cache@v3 50 | with: 51 | path: | 52 | ${{ github.workspace }}/go 53 | key: ${{ runner.os }}-${{ matrix.arch }}-go-${{ hashFiles('**/go.sum') }} 54 | restore-keys: | 55 | ${{ runner.os }}-${{ matrix.arch }}-go- 56 | 57 | - name: Set up Go - amd64 58 | if: matrix.arch == 'amd64' 59 | uses: actions/setup-go@v3 60 | with: 61 | go-version-file: 'go.mod' 62 | 63 | - name: Build - amd64 64 | if: matrix.arch == 'amd64' 65 | run: make 66 | 67 | - name: Run tests - amd64 68 | if: matrix.arch == 'amd64' 69 | run: make test 70 | 71 | - name: Run tests - aarch64 72 | if: matrix.arch == 'aarch64' 73 | uses: pguyot/arm-runner-action@v2 74 | id: arm-image 75 | with: 76 | cpu: 'cortex-a53' 77 | base_image: 'https://raspi.debian.net/tested/20230102_raspi_4_bullseye.img.xz' 78 | bind_mount_repository: true 79 | import_github_env: true 80 | image_additional_mb: 2000 81 | # language=Shell script 82 | commands: | 83 | echo '::group::Update and install OS dependencies' 84 | apt update \ 85 | && apt upgrade -y \ 86 | && apt install -y \ 87 | build-essential \ 88 | ca-certificates \ 89 | curl \ 90 | gcc \ 91 | openssl \ 92 | tar \ 93 | unzip \ 94 | wget 95 | echo '::endgroup' 96 | 97 | echo '::group::Ensure go is installed' 98 | if ! [ -d "$(pwd)/go" ]; then 99 | wget -q https://go.dev/dl/go1.19.5.linux-arm64.tar.gz 100 | tar -C "$(pwd)/" -xzf go1.19.5.linux-arm64.tar.gz 101 | fi 102 | echo '::endgroup' 103 | 104 | echo '::Setup GOPATH and PATH' 105 | export GOPATH="/$(pwd)/go" 106 | export PATH="$GOPATH/bin:$PATH" 107 | echo "GOPATH=$GOPATH" 108 | echo "PATH=$PATH" 109 | echo '::endgroup' 110 | 111 | # move into repo dir 112 | cd repo \ 113 | && echo '::group::go build...' \ 114 | && go build \ 115 | && echo '::endgroup' \ 116 | && echo '::group::go vet...' \ 117 | && go vet ./... \ 118 | && echo '::endgroup' \ 119 | && echo '::group::go test...' \ 120 | && go test ./... \ 121 | && echo '::endgroup' 122 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Binaries for programs and plugins 2 | out/ 3 | *.exe 4 | *.exe~ 5 | *.dll 6 | *.so 7 | *.dylib 8 | 9 | *.prof 10 | 11 | .idea/ 12 | .aws/ 13 | aws/ 14 | 15 | # Test binary, built with `go test -c` 16 | *.test 17 | 18 | # Output of the go coverage tool, specifically when used with LiteIDE 19 | *.out 20 | 21 | # Dependency directories (remove the comment below to include it) 22 | vendor/ 23 | 24 | /tmp -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # syntax=docker/dockerfile:1 2 | FROM golang:1.19.12-bullseye AS build 3 | 4 | ARG TARGETARCH 5 | 6 | WORKDIR /app 7 | 8 | COPY go.mod . 9 | COPY go.sum . 10 | COPY main.go . 11 | RUN go mod download 12 | 13 | COPY cmd cmd 14 | COPY pkg pkg 15 | 16 | RUN if [ "$TARGETARCH" = "amd64" ]; then \ 17 | GOARCH=amd64 GOAMD64=v3 go build -ldflags="-w -s" -o /mrfparse ; \ 18 | else \ 19 | go build -ldflags="-w -s" -o /mrfparse; \ 20 | fi 21 | 22 | FROM debian:bullseye-20230725-slim AS runtime 23 | 24 | RUN mkdir /app 25 | WORKDIR /app 26 | 27 | COPY --from=build /mrfparse . 28 | COPY --from=build /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt 29 | 30 | COPY config.yaml . 31 | COPY data/tic_500_shoppable_services.csv services.csv 32 | 33 | ENTRYPOINT [ "/app/mrfparse" ] -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | GOCMD=go 2 | GOTEST=$(GOCMD) test 3 | GOVET=$(GOCMD) vet 4 | BINARY_NAME=mrfparse 5 | VERSION?=0.0.0 6 | SERVICE_PORT?= 7 | DOCKER_REGISTRY?= 8 | EXPORT_RESULT?=false # for CI please set EXPORT_RESULT to true 9 | 10 | BUILD_VAR= 11 | ARCH=$(shell uname -m) 12 | ifeq ($(ARCH),x86_64) 13 | BUILD_VAR+=GOARCH=amd64 GOAMD64=v3 14 | endif 15 | 16 | GREEN := $(shell tput -Txterm setaf 2) 17 | YELLOW := $(shell tput -Txterm setaf 3) 18 | WHITE := $(shell tput -Txterm setaf 7) 19 | CYAN := $(shell tput -Txterm setaf 6) 20 | RESET := $(shell tput -Txterm sgr0) 21 | 22 | .PHONY: all test build vendor 23 | 24 | all: build 25 | 26 | ## Build: 27 | build: ## Build your project 28 | mkdir -p ./out/bin 29 | GO111MODULE=on $(BUILD_VAR) $(GOCMD) build -o ./out/bin/$(BINARY_NAME) 30 | 31 | clean: ## Remove build related file 32 | rm -f $(BINARY_NAME) 33 | rm -f ./junit-report.xml checkstyle-report.xml ./coverage.xml ./profile.cov yamllint-checkstyle.xml 34 | 35 | watch: ## Run the code with cosmtrek/air to have automatic reload on changes 36 | $(eval PACKAGE_NAME=$(shell head -n 1 go.mod | cut -d ' ' -f2)) 37 | docker run -it --rm -w /go/src/$(PACKAGE_NAME) -v $(shell pwd):/go/src/$(PACKAGE_NAME) -p $(SERVICE_PORT):$(SERVICE_PORT) cosmtrek/air 38 | 39 | ## Test: 40 | test: ## Run the tests of the project 41 | ifeq ($(EXPORT_RESULT), true) 42 | GO111MODULE=off go get -u github.com/jstemmer/go-junit-report 43 | $(eval OUTPUT_OPTIONS = | tee /dev/tty | go-junit-report -set-exit-code > junit-report.xml) 44 | endif 45 | GOAMD64=v3 $(GOTEST) -v -parallel 4 -race ./... $(OUTPUT_OPTIONS) 46 | 47 | coverage: ## Run the tests of the project and export the coverage 48 | $(GOTEST) -cover -covermode=count -coverprofile=profile.cov ./... 49 | $(GOCMD) tool cover -func profile.cov 50 | ifeq ($(EXPORT_RESULT), true) 51 | GO111MODULE=off go get -u github.com/AlekSi/gocov-xml 52 | GO111MODULE=off go get -u github.com/axw/gocov/gocov 53 | gocov convert profile.cov | gocov-xml > coverage.xml 54 | endif 55 | 56 | ## Lint: 57 | lint: 58 | docker run --rm -v $(shell pwd):/app -w /app golangci/golangci-lint:latest-alpine golangci-lint run --deadline=65s 59 | 60 | ## Docker: 61 | docker-build: ## Use the dockerfile to build the container 62 | DOCKER_BUILDKIT=1 docker build --rm --tag $(BINARY_NAME) . 63 | 64 | docker-release: ## Release the container with tag latest and version 65 | docker tag $(BINARY_NAME) $(DOCKER_REGISTRY)$(BINARY_NAME):latest 66 | docker tag $(BINARY_NAME) $(DOCKER_REGISTRY)$(BINARY_NAME):$(VERSION) 67 | # Push the docker images 68 | docker push $(DOCKER_REGISTRY)$(BINARY_NAME):latest 69 | docker push $(DOCKER_REGISTRY)$(BINARY_NAME):$(VERSION) 70 | 71 | ## Help: 72 | help: ## Show this help. 73 | @echo '' 74 | @echo 'Usage:' 75 | @echo ' ${YELLOW}make${RESET} ${GREEN}${RESET}' 76 | @echo '' 77 | @echo 'Targets:' 78 | @awk 'BEGIN {FS = ":.*?## "} { \ 79 | if (/^[a-zA-Z_-]+:.*?##.*$$/) {printf " ${YELLOW}%-20s${GREEN}%s${RESET}\n", $$1, $$2} \ 80 | else if (/^## .*$$/) {printf " ${CYAN}%s${RESET}\n", substr($$1,4)} \ 81 | }' $(MAKEFILE_LIST) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Build and Test](https://github.com/danielchalef/mrfparse/actions/workflows/main.yml/badge.svg)](https://github.com/danielchalef/mrfparse/actions/workflows/main.yml) 2 | 3 | **This repo is no longer maintained** 4 | 5 | # A Go parser for _Transparency in Coverage_ MRF files. 6 | `mrfparse` is a memory and CPU efficient parser for _Transparency in Coverage_ Machine Readable Format (MRF) files. The parser is designed to be easily containerized and scaled on modern cloud container platforms (and potentially cloud function infrastructure). 7 | 8 | `mrfparse` is fast: Parsing out pricing and providers for the CMS' _500 shoppable services_ from an 80GB Anthem _in-network-rates_ fileset in NDJSON format to parquet takes <5 minutes on a 12-core workstation with container memory limited to 6GB. Doing the same from the gzip compressed source file takes an additional ~5 minutes. 9 | 10 | Features: 11 | 12 | - Outputs to a parquet dataset, allowing easy ingestion into data warehouses and data lakes. 13 | - Supports reading from HTTP, and S3 / GS cloud storage, and writing to S3 / GS cloud storage buckets. 14 | - Filter for a subset of CPT/HCPCS service codes (provided as a simple CSV file). 15 | - Filters for only providers for whom pricing data is present in the MRF file, dropping extranous provider data. 16 | - Supports reading Gzip compressed MRF files. 17 | - The output schema is designed to support ingestion into graph databases. 18 | 19 | ## Background 20 | As of July 1, 2022, _The Centers for Medicare and Medicaid Services (CMS)_ mandated that most group health plans and issuers of group or individual health insurance (payers) [must post pricing information for covered items and services](https://www.cms.gov/healthplan-price-transparency/public-data). The data is available in a machine readable format (MRF) that is described in the [Transparency in Coverage](https://github.com/CMSgov/price-transparency-guide) Github repo. 21 | 22 | Working with MRF files is challenging: 23 | - Each payer's MRF dataset is tens to hundreds of terabytes of data and is updated monthly. No monthly deltas are available and individual JSON documents can be over 1TB in size. 24 | - Some payers have included provider data for providers for whom the MRF file does not have pricing data. That is, there are provider reference records where in_network rates are not present. 25 | - Some payers have provided pricing data for services that providers do not offer. 26 | 27 | ## Usage 28 | The following examples illustrate using the binary from a command line. 29 | 30 | 31 | Parse a gzipped MRF file hosted on a payer's website and output the parquet dataset to an S3 bucket 32 | ```bash 33 | mrfparse pipeline -i https://mrf.healthsparq.com/aetnacvs/inNetworkRates/2022-12-05_Innovation-Health-Plan-Inc.json.gz \ 34 | -o s3://mrfdata/staging/2022-12-05/aetnacvs/ \ 35 | -p 99 36 | ``` 37 | 38 | 39 | Parse a gzipped MRF file hosted in a Google Cloud Storage bucket and output the parquet dataset to the local filesystem. 40 | ```bash 41 | mrfparse pipeline -i gs://mrfdata/staging/2022-12-05_Innovation-Health-Plan-Inc.json.gz \ 42 | -o mrfdata/staging/2022-12-05/aetnacvs/ \ 43 | -p 99 44 | ``` 45 | 46 | `mrfparse` operates in several stages each of which can be executed independently. See `mrfparse --help` for more options. 47 | 48 | ### Production Use 49 | It is strongly recommended that you use the containerized parser and run it on a cloud container platform, allowing many files to be parsed concurrenlty. The "all-in-one" `pipeline` is not recommended for production use. For more resilient data pipelines, it is recommended that you use something like Airflow to run each of the download, `split` and `parse` steps sequentially in a recoverable way. 50 | 51 | Additionally, see the note below regarding not using `mrfparse` on ARM64 processors in production. 52 | 53 | ## Requirements 54 | `mrfparse` makes extensive use of [`simdjson-go`](https://github.com/minio/simdjson-go) to parse MRF JSON documents. A CPU with both AVX2 and CLMUL instruction support is required (most modern Intel or AMD processors). Unfortunately, `simdjson-go` does not (yet) support ARM64 NEON. 55 | 56 | Other requirements: 57 | - 6GB of RAM (though I'd like to reduce this) 58 | - Adequate temporal storage for intermediate data files. 59 | 60 | ### Note on ARM Compatibility 61 | To enable local testing with non-amd64 cpu's, such as Apple's new M# series of machines, this utility makes use of the 62 | [fakesimdjson](https://github.com/kiwicom/fakesimdjson) package. When using this simdjson simulacrum parsing speed and 63 | efficiency will be drastically reduced. It is therefore _not_ recommended to use this on ARM-based machines in a 64 | production environment. 65 | 66 | ## Build and Installation 67 | Using `go install`: 68 | ```bash 69 | go install github.com/danielchalef/mrfparse@latest 70 | ``` 71 | 72 | Use the `Makefile` to build the binary or container. 73 | 74 | Build the binary 75 | ```bash 76 | make 77 | ``` 78 | 79 | Build the container 80 | ```bash 81 | make docker-build 82 | ``` 83 | 84 | Edit the `Makefile` to change the container registry and tag and then release to your registry: 85 | ```bash 86 | make docker-release 87 | ``` 88 | 89 | See `make help` for more options. 90 | 91 | ## Configuration and Tuning 92 | 93 | ### Configuration via `config.yml` and environment variables 94 | 95 | A number of runtime options can be set via a `config.yml` file. The default location is `./config.yml`. The location can be changed via the `--config` flag. These options may also be set via environment variables prefixed with `MRF_`. 96 | ```yaml 97 | log: 98 | level: info 99 | services: 100 | file: services.csv 101 | writer: 102 | max_rows_per_file: 100_000_000 103 | filename_template: "_%04d.zstd.parquet" 104 | max_rows_per_group: 1_000_000 105 | tmp: 106 | path: /tmp 107 | pipeline: 108 | download_timeout: 20 # minutes 109 | ``` 110 | 111 | ### The `services` file 112 | `mrfparse` is designed to parse out only a selected list of services identified by CPT/HCPCS codes. This list of codes needs to be provided to `mrfparse` in the form of a simple `csv` file which may be on a local filesystem or hosted on S3/GS. 113 | 114 | Use either the `config.yaml` file or the `--services` flag to specify the location of the `services` file. The default location is `./services.csv`. A sample services file containing the CMS' _500 Shoppable Services_ may be found in the `data` folder in this repo. 115 | 116 | ### Tuning 117 | UPDATE: `jsplit` now makes use of pooled buffers and is much faster than it was when this was written. YMMV on the following. 118 | 119 | Splitting an MRF JSON document into NDJSON using `jsplit` takes time. `jsplit` makes heavy usage of the GC and can be sped up by setting a `GOGC` value far higher than the default of 200, at the expense of a non-linear increase in memory usage. 120 | 121 | ## Parquet Schema 122 | 123 | See the models in [`models/mrf.go`](pkg/mrfparse/models/mrf.go) for the parquet schema. 124 | 125 | ## How the core parser works 126 | An MRF file is split into a set of JSON documents using a fork of [`jsplit`](https://github.com/dolthub/jsplit) that has been modified to support reading and writing to cloud storage and use as a Go module. `jsplit` generates a root document and set of `provider-reference` and `in-network-rates` files. These files are in NDJSON format, allowing them to be consumed memory efficently. They are parsed line by line using [`simdjson-go`](https://github.com/minio/simdjson-go) and output to a parquet dataset. 127 | 128 | `in-network-rates` files are parsed first, allowing us to filter against our `services` list and build up a list of providers for whom we have pricing data. This provider list is then used to filter the `provider-reference` files. 129 | 130 | ## Status 131 | - Currently, only [in-network-rates](https://github.com/CMSgov/price-transparency-guide/tree/master/schemas/in-network-rates) files are supported. 132 | - Providers are indentified by either their NPI number or EIN. No effort has been made to enrich the data with additional provider information (e.g. provider name, address, etc.). 133 | - The parser does not attempt to validate that a provider actually provides a specific service that the MRF file offers pricing for. 134 | - `mrfparse` is not a validating parser but does attempt to detect and report some errors in the MRF file. Note that payers _do_ deviate from the CMS' schema! 135 | - The parser has been extensively tested with Anthem and Aetna datasets. YMMV with other payers. 136 | 137 | Contributions and feedback are welcome. This was my first large-ish Go project. Please do let me know if you have any suggestions for improvement. 138 | 139 | ## Acknowledgments 140 | - [simdjson-go](https://github.com/minio/simdjson-go) 141 | - [jsplit](https://github.com/dolthub/jsplit) 142 | 143 | ## License 144 | This project is licensed under the Apache 2.0 License - see the [LICENSE](LICENSE) file for details. 145 | 146 | Copyright 2023 Daniel Chalef 147 | 148 | Licensed under the Apache License, Version 2.0 (the "License"); 149 | you may not use this file except in compliance with the License. 150 | You may obtain a copy of the License at 151 | 152 | http://www.apache.org/licenses/LICENSE-2.0 153 | 154 | Unless required by applicable law or agreed to in writing, software 155 | distributed under the License is distributed on an "AS IS" BASIS, 156 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 157 | See the License for the specific language governing permissions and 158 | limitations under the License. 159 | -------------------------------------------------------------------------------- /cmd/parse.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2023 Daniel Chalef 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package cmd 17 | 18 | import ( 19 | "github.com/danielchalef/mrfparse/pkg/mrfparse/mrf" 20 | "github.com/danielchalef/mrfparse/pkg/mrfparse/utils" 21 | 22 | "github.com/spf13/cobra" 23 | ) 24 | 25 | var servicesFile string 26 | 27 | // parseCmd represents the parseMrf command 28 | var parseCmd = &cobra.Command{ 29 | Use: "parse", 30 | Short: "Parse in-network MRF files. Expects split NDJSON files as input.", 31 | Long: `Parse in-network MRF files. Expects split NDJSON files as input. 32 | 33 | parseMrf outputs a parquet fileset. See README for schema.`, 34 | Run: func(cmd *cobra.Command, args []string) { 35 | inputPath, err := cmd.Flags().GetString("input") 36 | utils.ExitOnError(err) 37 | 38 | outputPath, err := cmd.Flags().GetString("output") 39 | utils.ExitOnError(err) 40 | 41 | serviceFile, err := cmd.Flags().GetString("services") 42 | utils.ExitOnError(err) 43 | 44 | planID, err := cmd.Flags().GetInt64("planid") 45 | utils.ExitOnError(err) 46 | 47 | fn := func() { mrf.Parse(inputPath, outputPath, planID, serviceFile) } 48 | 49 | elapsed := utils.Timed(fn) 50 | log.Infof("Completed in %d seconds", elapsed) 51 | }, 52 | } 53 | 54 | func init() { 55 | rootCmd.AddCommand(parseCmd) 56 | 57 | parseCmd.Flags().StringP("input", "i", "", "input path to NDJSON files") 58 | err := parseCmd.MarkFlagRequired("input") 59 | utils.ExitOnError(err) 60 | 61 | parseCmd.Flags().StringP("output", "o", "", "output path for parsed MRF files in parquet format") 62 | err = parseCmd.MarkFlagRequired("output") 63 | utils.ExitOnError(err) 64 | 65 | parseCmd.Flags().StringVarP(&servicesFile, "services", "s", "", "path to a CSV file containing a list of CPT/HCPCS service codes to filter on") 66 | 67 | parseCmd.Flags().Int64P("planid", "p", -1, "the planid acquired from the index file") 68 | err = parseCmd.MarkFlagRequired("planid") 69 | utils.ExitOnError(err) 70 | } 71 | -------------------------------------------------------------------------------- /cmd/pipeline.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2023 Daniel Chalef 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package cmd 17 | 18 | import ( 19 | "github.com/danielchalef/mrfparse/pkg/mrfparse/pipeline" 20 | "github.com/danielchalef/mrfparse/pkg/mrfparse/utils" 21 | 22 | "github.com/spf13/cobra" 23 | ) 24 | 25 | // pipelineCmd represents the pipeline command 26 | var pipelineCmd = &cobra.Command{ 27 | Use: "pipeline", 28 | Short: "Parse in-network MRF files. Input is a single MRF JSON file. Output is a parquet fileset.", 29 | Long: `Parse in-network MRF files. Input is a single MRF JSON file. Output is a parquet fileset. 30 | 31 | - Input is a JSON MRF file. Can be located at a local, HTTP, S3, or GCS path. 32 | Supports GZIPed files. 33 | - Output is a fileset in parquet format. See README for schema. 34 | 35 | Requires a services file containing a list of CPT/HCPCS service codes to filter on. Typically, we'd use the CMS 500 Shoppable Services list. 36 | 37 | Plan ID is acquired from the carrier's Index file.`, 38 | Run: func(cmd *cobra.Command, args []string) { 39 | inputPath, err := cmd.Flags().GetString("input") 40 | utils.ExitOnError(err) 41 | 42 | outputPath, err := cmd.Flags().GetString("output") 43 | utils.ExitOnError(err) 44 | 45 | serviceFile, err := cmd.Flags().GetString("services") 46 | utils.ExitOnError(err) 47 | 48 | planID, err := cmd.Flags().GetInt64("planid") 49 | utils.ExitOnError(err) 50 | 51 | p := pipeline.NewParsePipeline(inputPath, outputPath, serviceFile, planID) 52 | p.Run() 53 | }, 54 | } 55 | 56 | func init() { 57 | rootCmd.AddCommand(pipelineCmd) 58 | 59 | pipelineCmd.Flags().StringP("input", "i", "", "Input path to JSON MRF file. Can be a local, HTTP, S3, or GCS path. Supports GZIPed files.") 60 | err := pipelineCmd.MarkFlagRequired("input") 61 | utils.ExitOnError(err) 62 | 63 | pipelineCmd.Flags().StringP("output", "o", "", "Output path for parsed MRF fileset in parquet format") 64 | err = pipelineCmd.MarkFlagRequired("output") 65 | utils.ExitOnError(err) 66 | 67 | pipelineCmd.Flags().StringVarP(&servicesFile, "services", "s", "", "Path to a CSV file containing a list of CPT/HCPCS service codes to filter on") 68 | 69 | pipelineCmd.Flags().Int64P("planid", "p", -1, "The planid acquired from the index file") 70 | err = pipelineCmd.MarkFlagRequired("planid") 71 | utils.ExitOnError(err) 72 | } 73 | -------------------------------------------------------------------------------- /cmd/root.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2023 Daniel Chalef 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package cmd 17 | 18 | import ( 19 | "github.com/danielchalef/mrfparse/pkg/mrfparse/utils" 20 | "os" 21 | "runtime/pprof" 22 | "strings" 23 | 24 | "github.com/sirupsen/logrus" 25 | "github.com/spf13/cobra" 26 | "github.com/spf13/viper" 27 | ) 28 | 29 | var ( 30 | log *logrus.Logger 31 | cfgFile string 32 | memProfileFile string 33 | cpuProfileFile string 34 | ) 35 | 36 | // rootCmd represents the base command when called without any subcommands 37 | var rootCmd = &cobra.Command{ 38 | Use: "mrfparse", 39 | Short: "A parser for Transparency in Coverage Machine Readable Format (MRF) files", 40 | Long: `MRFParse is a Go parser for Transparency in Coverage Machine Readable Format (MRF) files. 41 | 42 | The parser is designed to be memory and CPU efficient, and easily containerized. It will run on any modern cloud container platform (and potentially cloud function infrastructure). 43 | 44 | Input and Output paths can be local filesytem paths or AWS S3 and Google Cloud Storage paths.`, 45 | PersistentPreRun: func(cmd *cobra.Command, args []string) { 46 | if cpuProfileFile != "" { 47 | f, err := os.Create(cpuProfileFile) 48 | if err != nil { 49 | log.Fatal(err) 50 | } 51 | if err := pprof.StartCPUProfile(f); err != nil { 52 | log.Fatal(err) 53 | } 54 | } 55 | }, 56 | PersistentPostRun: func(cmd *cobra.Command, args []string) { 57 | if cpuProfileFile != "" { 58 | pprof.StopCPUProfile() 59 | } 60 | if memProfileFile != "" { 61 | f, err := os.Create(memProfileFile) 62 | if err != nil { 63 | log.Fatal(err) 64 | } 65 | if err := pprof.WriteHeapProfile(f); err != nil { 66 | log.Fatal(err) 67 | } 68 | f.Close() 69 | } 70 | }, 71 | } 72 | 73 | // Execute adds all child commands to the root command and sets flags appropriately. 74 | // This is called by main.main(). It only needs to happen once to the rootCmd. 75 | func Execute() { 76 | err := rootCmd.Execute() 77 | 78 | if err != nil { 79 | os.Exit(1) 80 | } 81 | } 82 | 83 | func init() { 84 | cobra.OnInitialize(initConfig) 85 | 86 | rootCmd.PersistentFlags().StringVar(&cfgFile, "config", "", "config file (default config.yaml)") 87 | rootCmd.PersistentFlags().StringVar(&memProfileFile, "memprofile", "", "Write memory profile to this file") 88 | rootCmd.PersistentFlags().StringVar(&cpuProfileFile, "cpuprofile", "", "Write CPU profile to this file") 89 | } 90 | 91 | // initConfig reads in config file and ENV variables if set. 92 | func initConfig() { 93 | if cfgFile != "" { 94 | // Use config file from the flag. 95 | viper.SetConfigFile(cfgFile) 96 | } else { 97 | viper.AddConfigPath(".") 98 | viper.SetConfigType("yaml") 99 | viper.SetConfigName("config") 100 | } 101 | 102 | viper.SetEnvPrefix("mrf") // ENV variables will be prefixed with MRF_ 103 | viper.SetEnvKeyReplacer(strings.NewReplacer(`.`, `_`)) // replaced nested . with _ 104 | viper.AutomaticEnv() // read in environment variables that match 105 | 106 | // If a config file is found, read it in. 107 | if err := viper.ReadInConfig(); err != nil { 108 | log.Fatal(err) 109 | } 110 | 111 | // Initialize or update logger with level from ENV or config 112 | log = utils.GetLogger() 113 | } 114 | -------------------------------------------------------------------------------- /cmd/split.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2023 Daniel Chalef 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package cmd 17 | 18 | import ( 19 | "github.com/danielchalef/mrfparse/pkg/mrfparse/split" 20 | "github.com/danielchalef/mrfparse/pkg/mrfparse/utils" 21 | 22 | "github.com/spf13/cobra" 23 | ) 24 | 25 | // splitCmd represents the splitFile command 26 | var splitCmd = &cobra.Command{ 27 | Use: "split", 28 | Short: "Split JSON files.", 29 | Long: `Split JSON files into a root.json and a series of NDJSON files for each top-level array element. 30 | 31 | The input JSON file can be gzipped and may be located on the local filesystem or in a S3/GCS bucket.`, 32 | Run: func(cmd *cobra.Command, args []string) { 33 | inputPath, err := cmd.Flags().GetString("input") 34 | utils.ExitOnError(err) 35 | 36 | outputPath, err := cmd.Flags().GetString("output") 37 | utils.ExitOnError(err) 38 | 39 | overwrite, err := cmd.Flags().GetBool("overwrite") 40 | utils.ExitOnError(err) 41 | 42 | fn := func() { split.File(inputPath, outputPath, overwrite) } 43 | 44 | elapsed := utils.Timed(fn) 45 | log.Infof("Completed in %d seconds", elapsed) 46 | }, 47 | } 48 | 49 | func init() { 50 | rootCmd.AddCommand(splitCmd) 51 | 52 | splitCmd.Flags().StringP("input", "i", "", "input path to JSON file.") 53 | err := splitCmd.MarkFlagRequired("input") 54 | utils.ExitOnError(err) 55 | 56 | splitCmd.Flags().StringP("output", "o", "", "output path for split NDJSON files") 57 | err = splitCmd.MarkFlagRequired("output") 58 | utils.ExitOnError(err) 59 | 60 | splitCmd.Flags().Bool("overwrite", false, "overwrite contents of output path if it exists") 61 | } 62 | -------------------------------------------------------------------------------- /config.yaml: -------------------------------------------------------------------------------- 1 | log: 2 | level: info 3 | services: 4 | file: services.csv 5 | writer: 6 | max_rows_per_file: 100_000_000 7 | filename_template: "_%04d.zstd.parquet" 8 | max_rows_per_group: 1_000_000 9 | tmp: 10 | path: /tmp 11 | pipeline: 12 | download_timeout: 20 # minutes -------------------------------------------------------------------------------- /data/test_services.csv: -------------------------------------------------------------------------------- 1 | billing_code,description,plain_language_description 2 | J0702,BETAMETHASONE ACET&SOD PHOSP,Injection to treat reaction to a drug 3 | J1745,INFLIXIMAB NOT BIOSIMIL 10MG,A biologic medication 4 | -------------------------------------------------------------------------------- /data/tic_500_shoppable_services.csv: -------------------------------------------------------------------------------- 1 | billing_code,description,plain_language_description 2 | J0702,BETAMETHASONE ACET&SOD PHOSP,Injection to treat reaction to a drug 3 | J1745,INFLIXIMAB NOT BIOSIMIL 10MG,A biologic medication 4 | G0102,Prostate cancer screening; digital rectal examination,Prostate cancer screening; digital rectal examination 5 | G0103,Prostate cancer screening; prostate specific antigen test (psa),Prostate cancer screening; prostate specific antigen test (psa) 6 | G0121,Colon ca scrn; not hi risk ind,Colorectal cancer screening; colonoscopy on individual not meeting criteria for high risk 7 | G0105,Colorectal ca scrn; hi risk ind,Colorectal cancer screening; colonoscopy on individual at high risk 8 | S0285,Cnslt before screen colonosc,Colonoscopy consultation performed prior to a screening colonoscopy procedure 9 | G0289,"Arthro, loose body + chondro","Arthroscopy, knee, surgical, for removal of loose body, foreign body, debridement/shaving of articular cartilage (chondroplasty) at the time of other surgical knee arthroscopy in a different compartment of the same knee" 10 | G0120,Colon ca scrn; barium enema,"Colorectal cancer screening; alternative to g0105, screening colonoscopy, barium enema" 11 | 460,SPINAL FUSION (POSTERIOR),Spinal fusion except cervical 12 | 470,KNEE REPLACEMENT,Major joint replacement or reattachment of lower extremity 13 | 473,SPINAL FUSION (ANTERIOR),Cervical spinal fusion 14 | 743,HYSTERECTOMY,Uterine and adnexa procedures for non-malignancy 15 | 1960,Anesthesia for vaginal delivery,Anesthesia for vaginal delivery 16 | 1961,Anesthesia for cesarean delivery,Anesthesia for cesarean delivery 17 | 1967,Anesthesia for labor during planned vaginal delivery,Anesthesia for labor during planned vaginal delivery 18 | 1968,Anesthesia for cesarean delivery following labor,Anesthesia for cesarean delivery following labor 19 | 10005,FNA W IMAGE,"Fine needle aspiration biopsy, including ultrasound guidance; first lesion" 20 | 10021,FNA W/O IMAGE,Fine Needle Aspiration Biopsy without imaging 21 | 10040,ACNE SURGERY,"Incision and Drainage Procedures on the Skin, Subcutaneous and Accessory Structures" 22 | 10060,DRAINAGE OF SKIN ABSCESS,Incision and drainage of abscess; simple or single and complex or multiple 23 | 10140,DRAINAGE OF HEMATOMA/FLUID,"Incision and drainage of hematoma, seroma or fluid collection" 24 | 10160,PUNCTURE DRAINAGE OF LESION,"Puncture aspiration of abscess, hematoma, bulla, or cyst" 25 | 11000,DEBRIDE INFECTED SKIN,Removal of infected skin 26 | 11056,TRIM SKIN LESIONS 2 TO 4,Paring or cutting of benign hyperkeratotic lesion 27 | 11102,BIOPSY SKIN LESION,"Tangential biopsy of skin (e.g., for example, shave, scoop, saucerize, curette); single lesion" 28 | 11103,BIOPSY SKIN ADD-ON,"Tangential biopsy of skin (e.g., for example, shave, scoop, saucerize, curette); each separate/additional lesion" 29 | 11200,REMOVAL OF SKIN TAGS 15 common or plantar warts 42 | 17250,CHEM CAUT OF GRANLTJ TISSUE,Chemical destruction of pre-cancerous lesions of the skin 43 | 17311,MOHS 1 STAGE H/N/HF/G,"Micrographic technique, including removal of all gross tumor, surgical excision of tissue specimens, mapping, color coding of specimens, microscopic examination of specimens" 44 | 19120,REMOVAL OF BREAST LESION,REMOVAL OF BREAST LESION 45 | 20550,INJ TENDON SHEATH/LIGAMENT,Injection of medication into a tendon or ligament 46 | 20551,INJ TENDON ORIGIN/INSERTION,Injection of medication into the tendon/ligament origin 47 | 20553,INJECT TRIGGER POINTS 3/>,Injection of medication into an area that triggers pain 48 | 20600,DRAIN/INJ JOINT/BURSA W/O US,Draining or injecting medication into a small joint/bursa without ultrasound 49 | 20605,DRAIN/INJ JOINT/BURSA W/O US,Draining or injecting medication into a large joint/bursa without ultrasound 50 | 20610,DRAIN/INJ JOINT/BURSA W/O US,Draining or injecting medication into a major joint/bursa without ultrasound 51 | 20612,ASPIRATE/INJ GANGLION CYST,Removal of fluid or injection of medication into a ganglion cyst 52 | 27440,Revision of knee joint,Repair of knee joint 53 | 27441,Revision of knee joint,Repair of knee joint 54 | 27442,Revision of knee joint,Repair of knee joint 55 | 27443,Revision of knee joint,Repair of knee joint 56 | 27445,Revision of knee joint,Repair of knee joint with hinged prosthesis 57 | 27446,Revision of knee joint,Repair of knee joint 58 | 28296,CORRECTION HALLUX VALGUS,"Under Repair, Revision, and/or Reconstruction Procedures on the Foot and Toes" 59 | 29826,Subacromial Decompression,Shaving of shoulder bone using an endoscope 60 | 29848,WRIST ENDOSCOPY/SURGERY,Carpal tunnel release 61 | 29880,KNEE ARTHROSCOPY/SURGERY,Surgery to remove of all or part of a torn meniscus in both medial and lateral compartments 62 | 29881,KNEE ARTHROSCOPY/SURGERY,Surgery to remove of all or part of a torn meniscus in one compartment 63 | 29888,KNEE ARTHROSCOPY/SURGERY,ACL reconstruction 64 | 30520,REPAIR OF NASAL SEPTUM,Repair procedures of the nose 65 | 31231,NASAL ENDOSCOPY DX,"Nasal endoscopy, diagnostic, unilateral or bilateral" 66 | 31237,NASAL/SINUS ENDOSCOPY SURG,"Surgical nasal/ sinus endoscopy with biopsy, polypectomy or debridement" 67 | 31575,DIAGNOSTIC LARYNGOSCOPY,"Flexible, fiberoptic diagnostic laryngoscopy" 68 | 36415,ROUTINE VENIPUNCTURE,Collection of venous blood by venipuncture 69 | 36471,NJX SCLRSNT MLT INCMPTNT VN,Injections to remove spider veins on the limbs or trunk 70 | 36475,ENDOVENOUS RF 1ST VEIN,Ablation of incompetent vein 71 | 36478,ENDOVENOUS LASER 1ST VEIN,Laser removal of incompetent vein 72 | 42820,REMOVE TONSILS AND ADENOIDS,Removal of tonsils and adenoid glands patient younger than age 12 73 | 42826,REMOVAL OF TONSILS,Primary or secondary removal of tonsils 74 | 42830,REMOVAL OF ADENOIDS,Primary removal of the adenoids 75 | 43235,EGD DIAGNOSTIC BRUSH WASH,"Diagnostic examination of esophagus, stomach, and/or upper small bowel using an endoscope" 76 | 43239,EGD BIOPSY SINGLE/MULTIPLE,"Biopsy of the esophagus, stomach, and/or upper small bowel using an endoscope" 77 | 43846,"Gastric restrictive procedure, with gastric bypass for morbid obesity; with small intestine reconstruction to limit absorption",Surgical procedure used for weight loss resulting in a partial removal of stomach 78 | 44388,Colonoscopy thru stoma spx,Diagnostic examination of large bowel using an endoscope which is inserted through abdominal opening 79 | 44389,Colonoscopy with biopsy,Biopsies of large bowel using an endoscope which is inserted through abdominal opening 80 | 44394,Colonoscopy w/snare,Removal of large bowel polyps or growths using an endoscope 81 | 45378,DIAGNOSTIC COLONOSCOPY,Diagnostic examination of large bowel using an endoscope 82 | 45379,Colonoscopy w/fb removal,Removal of foreign bodies in large bowel using an endoscope 83 | 45380,COLONOSCOPY AND BIOPSY,Biopsy of large bowel using an endoscope 84 | 45381,Colonoscopy submucous njx,Injections of large bowel using an endoscope 85 | 45382,Colonoscopy w/control bleed,Control of bleeding in large bowel using an endoscope 86 | 45384,Colonoscopy w/lesion removal,Removal of polyps or growths in large bowel using an endoscope 87 | 45385,COLONOSCOPY W/LESION REMOVAL,Removal of polyps or growths of large bowel using an endoscope 88 | 45386,Colonoscopy w/balloon dilat,Balloon dilation of large bowel using an endoscope 89 | 45388,Colonoscopy w/ablation,Destruction of large bowel growths using an endoscope 90 | 45390,Colonoscopy w/resection,Removal of large bowel tissue using an endoscope 91 | 45391,Colonoscopy w/endoscope us,Ultrasound examination of lower large bowel using an endoscope 92 | 45392,Colonoscopy w/endoscopic fnb,Ultrasound guided needle aspiration or biopsy of lower large bowel using an endoscope 93 | 45398,Colonoscopy w/band ligation,Tying of large bowel using an endoscope 94 | 47562,LAPAROSCOPIC CHOLECYSTECTOMY,Removal of gallbladder using an endoscope 95 | 47563,LAPARO CHOLECYSTECTOMY/GRAPH,Gallbladder removal with use of an x-ray exam of the bile ducts 96 | 49505,PRP I/HERN INIT REDUC >5 YR,Repair of groin hernia patient age 5 years or older 97 | 49585,RPR UMBIL HERN REDUC > 5 YR,Repair of umbilical hernia in patients over 5 years old 98 | 49650,LAP ING HERNIA REPAIR INIT,Inguinal hernia repair done by laparoscope 99 | 50590,FRAGMENTING OF KIDNEY STONE,Surgical procedures on the kidney to break up and remove kidney stones 100 | 51741,ELECTRO-UROFLOWMETRY FIRST,A diagnostic test used to measure the flow of urine 101 | 51798,US URINE CAPACITY MEASURE,Ultrasound of bladder to measure urine capacity 102 | 52000,CYSTOSCOPY,Procedure on the bladder 103 | 52310,CYSTOSCOPY AND TREATMENT,Removing an indwelling ureteral stent by cystoscopy 104 | 52332,CYSTOSCOPY AND TREATMENT,Ureteral stents inserted internally between the bladder and the kidney and will remain within the patient for a defined period of time 105 | 55250,EXCISION PROCEDURES ON THE VAS DEFERENS,Removal of sperm duct(s) 106 | 55700,Prostate biopsy,Biopsy of prostate gland 107 | 55866,Surgical Procedures on the Prostate,Surgical removal of prostate and surrounding lymph nodes using an endoscope 108 | 57022,Incision and drainage of vaginal blood accumulation following delivery,Incision and drainage of vaginal blood accumulation following delivery 109 | 57288,REPAIR BLADDER DEFECT,Replacement of sling to support the bladder 110 | 57454,BX/CURETT OF CERVIX W/SCOPE,Biopsy of cervix or uterus 111 | 58100,EXCISION PROCEDURES ON THE CORPUS UTERI,Biopsy of the lining of the uterus 112 | 58558,HYSTEROSCOPY BIOPSY,Surgical hysteroscopy with biopsy 113 | 58563,HYSTEROSCOPY ABLATION,Surgical procedure used to treat premenopausal abnormal uterine bleeding 114 | 58565,HYSTEROSCOPY STERILIZATION,Laparoscopic/Hysteroscopic Procedures on the uterus 115 | 58571,TLH W/T/O 250 G OR LESS,Laparoscopic hysterectomy 116 | 58661,LAPAROSCOPY REMOVE ADNEXA,"Removal of either benign or malignant tissue from the uterus, ovaries, fallopian tubes, or any of the surrounding tissues using a laparoscope" 117 | 58662,LAPAROSCOPY EXCISE LESIONS,"Removal of lesions of the ovary, pelvic viscera, or peritoneal surface" 118 | 58671,LAPAROSCOPY TUBAL BLOCK,Laparoscopic tubal sterilization is surgery to block the fallopian tubes to prevent pregnancy 119 | 59000,AMNIOCENTESIS DIAGNOSTIC,Removal of amniotic fluid from the uterus for diagnostic purposes 120 | 59025,FETAL NON-STRESS TEST,A common prenatal test used to check on a baby's health. 121 | 59400,OBSTETRICAL CARE,Obstetrical pre- and postpartum care and vaginal delivery 122 | 59409,Vaginal delivery,Vaginal delivery 123 | 59410,Vaginal delivery with post-delivery care,Vaginal delivery with post-delivery care 124 | 59414,Vaginal delivery of placenta,Vaginal delivery of placenta 125 | 59425,Pre-delivery care 4-6 visits,Pre-delivery care 4-6 visits 126 | 59426,Pre-delivery care 7 or more visits,Pre-delivery care 7 or more visits 127 | 59510,CESAREAN DELIVERY,Cesarean delivery with pre- and post-delivery care 128 | 59514,Cesarean delivery,Cesarean delivery 129 | 59515,Cesarean delivery with post-delivery care,Cesarean delivery with post-delivery care 130 | 59610,VBAC DELIVERY,Vaginal delivery after prior cesarean delivery 131 | 59612,Vaginal delivery after prior cesarean delivery,Vaginal delivery after prior cesarean delivery 132 | 59614,Vaginal delivery after prior cesarean delivery with post-delivery care,Vaginal delivery after prior cesarean delivery with post-delivery care 133 | 62322,SPINAL INJECTION FOR PAIN MANAGEMENT,Injection of substance into spinal canal of lower back or sacrum using imaging guidance 134 | 62323,Injection of substance into spinal canal of lower back or sacrum using imaging guidance,Injection of substance into spinal canal of lower back or sacrum using imaging guidance 135 | 63030,LOW BACK DISK SURGERY,Surgical procedure to decompress a herniated vertebra 136 | 64483,Transforaminal Epidural Injection,Injections of anesthetic and/or steroid drug into lower or sacral spine nerve root using imaging guidance 137 | 64493,INJ PARAVERT F JNT L/S 1 LEV,Injection into lower back of nerve block using imaging guidance 138 | 64721,CARPAL TUNNEL SURGERY,Release of the transverse carpal ligament 139 | 66821,YAG capusulotomy surgery,Removal of recurring cataract in lens capsule using laser 140 | 66984,CATARACT SURG W/IOL 1 STAGE,Removal of cataract with insertion of lens 141 | 67028,INJECTION EYE DRUG,Injection of a pharmaceutical agent into the eye 142 | 69210,REMOVE IMPACTED EAR WAX,Removal of ear wax from one or both ears 143 | 69436,CREATE EARDRUM OPENING,Insertion of tubes into one or both ears 144 | 70450,CT HEAD/BRAIN W/O DYE,CT scan head or brain without dye 145 | 70486,CT MAXILLOFACIAL W/O DYE,CT Scan of the face and jaw without dye 146 | 70491,CT SOFT TISSUE NECK W/DYE,CT scan of neck with dye 147 | 70551,MRI BRAIN STEM W/O DYE,MRI of brain stem without dye 148 | 70553,MRI BRAIN STEM W/O & W/DYE,MRI scan of brain before and after contrast 149 | 71045,CHEST X-RAY,Single view 150 | 71046,CHEST X-RAY,"2 views, front and back" 151 | 71047,CHEST X-RAY,3 views 152 | 71048,CHEST X-RAY,4 or more views 153 | 71101,X-RAY EXAM UNILAT RIBS/CHEST,Radiologic examination of one side of the chest/ribs 154 | 71250,CT THORAX W/O DYE,CT scan of the thorax without dye 155 | 71260,CT THORAX W/DYE,CT scan of the thorax with dye 156 | 71275,CT ANGIOGRAPHY CHEST,Diagnostic Radiology (Diagnostic Imaging) Procedures of the Chest 157 | 72040,X-RAY EXAM NECK SPINE 2-3 VW,"Radiologic examination of the neck/spine, 2-3 views" 158 | 72050,X-RAY EXAM NECK SPINE 4/5VWS,"Radiologic examination of the neck/spine, 4-5 views" 159 | 72070,X-RAY EXAM THORAC SPINE 2VWS,"Radiologic examination of the middle spine, 2 views" 160 | 72072,X-RAY EXAM THORAC SPINE 3VWS,"Radiologic examination of the middle spine, 3 views" 161 | 72100,X-RAY EXAM L-S SPINE 2/3 VWS,X-ray of the lower spine 2-3 views 162 | 72110,X-RAY EXAM L-2 SPINE 4/>VWS,"X-ray of lower and sacral spine, minimum of 4 views" 163 | 72131,CT LUMBAR SPINE W/O DYE,CT scan of lower spine without dye 164 | 72141,MRI NECK SPINE W/O DYE,MRI of the neck or spine without dye 165 | 72146,MRI CHEST SPINE W/O DYE,MRI of chest and spine without dye 166 | 72148,MRI LUMBAR SPINE W/O DYE,MRI scan of lower spinal canal 167 | 72156,MRI NECK SPINE W/O & W/DYE,MRI of neck/spine with and without dye 168 | 72157,MRI CHEST SPINE W/O & W/DYE,MRI of chest and spine with and without dye 169 | 72158,MRI LUMBAR SPINE W/O & W/DYE,MRI of lower back with and without dye 170 | 72170,X-RAY EXAM OF PELVIS,Radiologic examination of the pelvis 171 | 72192,CT PELVIS W/O DYE,CT of pelvis without dye 172 | 72193,CT PELVIS W/DYE,"CT scan, pelvis, with contrast" 173 | 72195,MRI PELVIS W/O DYE,MRI of pelvis without dye 174 | 72197,MRI PELVIS W/O & W/DYE,MRI of pelvis before and after dye 175 | 73000,X-RAY EXAM OF COLLAR BONE,Radiologic examination of the collar bone 176 | 73030,X-RAY EXAM OF SHOULDER,Radiologic examination of the shoulder 177 | 73070,X-RAY EXAM OF ELBOW,"Radiologic examination, elbow; 2 views" 178 | 73080,X-RAY EXAM OF ELBOW,"Radiologic examination, elbow; 3 or more views" 179 | 73090,X-RAY EXAM OF FOREARM,Radiologic examination of the forearm 180 | 73100,X-RAY EXAM OF WRIST,3 or more views 181 | 73110,X-RAY EXAM OF WRIST,Up to 3 views 182 | 73120,X-RAY EXAM OF HAND,X-ray of the hand with 2 views 183 | 73130,X-RAY EXAM OF HAND,X-ray of the hand with 3 or more views 184 | 73140,X-RAY EXAM OF FINGER(S),Radiologic examination of the finger(s) 185 | 73221,MRI JOINT UPR EXTREM W/O DYE,MRI of upper extremity without dye 186 | 73560,X-RAY EXAM OF KNEE 1 OR 2,Radiologic examination of the knee with 1 or 2 views 187 | 73562,X-RAY EXAM OF KNEE 3,Radiologic examination of the knee with 3 views 188 | 73564,X-RAY EXAM KNEE 4 OR MORE,Radiologic examination of the knee with 4 or more views 189 | 73565,X-RAY EXAM OF KNEES,Radiologic examination of both knees 190 | 73590,X-RAY EXAM OF LOWER LEG,Radiologic examination of the lower leg 191 | 73600,X-RAY EXAM OF ANKLE,Radiologic examination of the ankle with 2 views 192 | 73610,X-RAY EXAM OF ANKLE,Radiologic examination of the ankle with 3 views 193 | 73620,X-RAY EXAM OF FOOT,"Radiologic examination, foot; 2 views" 194 | 73630,X-RAY EXAM OF FOOT,Radiologic examination of the foot with 3 or more views 195 | 73650,X-RAY EXAM OF HEEL,Radiologic examination of the heel 196 | 73660,X-RAY EXAM OF TOE(S),Radiologic examination of the toe(s) 197 | 73700,CT LOWER EXTREMITY W/O DYE,CT scan of leg without dye 198 | 73718,MRI LOWER EXTREMITY W/O DYE,MRI of leg without dye 199 | 73721,MRI JNT OF LWR EXTRE W/O DYE,MRI of lower extremity joint (knee/ankle) without dye 200 | 73722,MRI JOINT OF LWR EXTR W/DYE,MRI of lower extremity joint (knee/ankle) with dye 201 | 73723,MRI JOINT LWR EXTR W/O&W/DYE,MRI of lower extremity joint (knee/ankle) with and without dye 202 | 74022,X-RAY EXAM SERIES ABDOMEN,Serial radiologic examination of the abdomen 203 | 74150,CT ABDOMEN W/O DYE,CT of abdomen without dye 204 | 74160,CT ABDOMEN W/DYE,CT of abdomen with dye 205 | 74170,CT ABDOMEN W/O & W/DYE,CT of abdomen with and without dye 206 | 74176,CT ABD & PELVIS W/O CONTRAST,CT of abdomen and pelvis without dye 207 | 74177,CT ABD & PELV W/CONTRAST,CT scan of abdomen and pelvis with contrast 208 | 74178,CT ABD & PELV 1/> REGNS,"Computed tomography, abdomen and pelvis; without contrast material in one or both body regions, followed by contrast material(s) and further sections in one or both body regions" 209 | 74181,MRI ABDOMEN W/O DYE,MRI of abdomen without dye 210 | 74183,MRI ABDOMEN W/O & W/DYE,MRI of abdomen without and with dye 211 | 76000,CHEST X-RAY,"Flouroscopy, or x-ray ""movie"" that takes less than an hour" 212 | 76512,OPHTH US B W/NON-QUANT A,Ultrasound of the eye 213 | 76514,ECHO EXAM OF EYE THICKNESS,A diagnostic procedure that allows a provider to see the organs and other structures in the abdomen 214 | 76536,US EXAM OF HEAD AND NECK,Ultrasound of head and neck 215 | 76642,ULTRASOUND BREAST LIMITED,Limited ultrasound of the breast 216 | 76700,US EXAM ABDOM COMPLETE,Ultrasound of abdomen with all areas scanned 217 | 76705,ECHO EXAM OF ABDOMEN,A diagnostic procedure that allows a provider to see the organs and other structures in the abdomen 218 | 76770,US EXAM ABDO BACK WALL COMP,Ultrasound of back wall of the abdomen with all areas viewed 219 | 76775,US EXAM ABDO BACK WALL LIM,Ultrasound of back wall of the abdomen with limited areas viewed 220 | 76801,OB US < 14 WKS SINGLE FETUS,Abdominal ultrasound of pregnant uterus (less than 14 weeks) single or first fetus 221 | 76805,OB US >/= 14 WKS SNGL FETUS,Abdominal ultrasound of pregnant uterus (greater or equal to 14 weeks 0 days) single or first fetus 222 | 76811,OB US DETAILED SNGL FETUS,Ultrasound of single fetus 223 | 76813,OB US NUCHAL MEAS 1 GEST,Evaluation through measurement of fetal nuchal translucency 224 | 76815,OB US LIMITED FETUS(S),Ultrasound of fetus with limited views 225 | 76817,TRANSVAGINAL US OBSTETRIC,Transvaginal ultrasound of uterus 226 | 76818,FETAL BIOPHYS PROFILE W/NST,Fetal biophysical profile with non-stress test 227 | 76819,FETAL BIOPHYS PROFIL W/O NST,Fetal biophysical profile without non-stress test 228 | 76830,TRANSVAGINAL US NON-OB,Ultrasound of the pelvis through vagina 229 | 76831,ECHO EXAM UTERUS,A diagnostic procedure that allows a provider to see the uterus 230 | 76856,US EXAM PELVIC COMPLETE,Complete ultrasound of the pelvis 231 | 76857,US EXAM PELVIC LIMITED,Limited ultrasound of the pelvis 232 | 76870,US EXAM SCROTUM,Ultrasound of the scrotum 233 | 76872,US TRANSRECTAL,Transrectal ultrasound 234 | 76882,US LMTD JT/NONVASC XTR STRUX,"Diagnostic ultrasound of an extremity excluding the bone, joints or vessels" 235 | 77047,MRI BOTH BREASTS,"Magnetic resonance imaging, breasts, without contrast material; bilateral" 236 | 77065,DX MAMMO INCL CAD UNI,Mammography of one breast 237 | 77066,DX MAMMO INCL CAD BI,Mammography of both breasts 238 | 77067,SCR MAMMO BI INCL CAD,Mammography of both breasts-2 or more views 239 | 77080,BONE DENSITY STUDY OF SPINE OR PELVIS,Scan to measure bone mineral density (BMD) at the spine and hip 240 | 77385,Ntsty modul rad tx dlvr smpl,Radiation therapy delivery 241 | 77386,Ntsty modul rad tx dlvr cplx,Radiation therapy delivery 242 | 77387,Guidance for radia tx dlvr,Guidance for localization of target delivery of radiation treatment delivery 243 | 77412,Radiation treatment delivery,Radiation treatment delivery 244 | 78014,THYROID IMAGING W/BLOOD FLOW,Scan using a radioactive medication (radiopharmaceutical) to take pictures or images of the thyroid gland. 245 | 78306,BONE IMAGING WHOLE BODY,"A procedure most commonly ordered to detect areas of abnormal bone growth due to fractures, tumors, infection, or other bone issues" 246 | 78452,HT MUSCLE IMAGE SPECT MULT,Image of the heart to assess perfusion 247 | 78815,PET IMAGE W/CT SKULL-THIGH,"Tumor imaging, positron emission tomography (PET) with concurrently acquired computed tomography (CT) for attenuation correction and anatomical localization" 248 | 80048,METABOLIC PANEL TOTAL CA,Basic metabolic panel 249 | 80050,GENERAL HEALTH PANEL,General health panel 250 | 80051,"Blood test panel for electrolytes (sodium potassium, chloride, carbon dioxide)","Blood test panel for electrolytes (sodium potassium, chloride, carbon dioxide)" 251 | 80053,COMPREHEN METABOLIC PANEL,"Blood test, comprehensive group of blood chemicals" 252 | 80055,OBSTETRIC PANEL,Obstetric blood test panel 253 | 80061,LIPID PANEL,"Blood test, lipids (cholesterol and triglycerides)" 254 | 80069,RENAL FUNCTION PANEL,Kidney function panel test 255 | 80074,ACUTE HEPATITIS PANEL,Acute hepatitis panel 256 | 80076,HEPATIC FUNCTION PANEL,Liver function blood test panel 257 | 80081,"Blood test panel for obstetrics (cbc, differential wbc count, hepatitis b, hiv, rubella, syphilis, antibody screening, rbc, blood typing)","Blood test panel for obstetrics (cbc, differential wbc count, hepatitis b, hiv, rubella, syphilis, antibody screening, rbc, blood typing)" 258 | 80197,ASSAY OF TACROLIMUS,Test is used to measure the amount of the drug in the blood to determine whether the concentration has reached a therapeutic level and is below the toxic level 259 | 80307,Drug test prsmv chem anlyzr,Testing for presence of drug 260 | 81000,URINALYSIS NONAUTO W/SCOPE,Manual urinalysis test with examination using microscope 261 | 81001,URINALYSIS; MANUAL OR AUTO WITH OR WITHOUT MICROSCOPY,Manual urinalysis test with examination with or without using microscope 262 | 81002,URINALYSIS NONAUTO W/O SCOPE,Manual urinalysis test with examination without using microscope 263 | 81003,URINALYSIS; MANUAL OR AUTO WITH OR WITHOUT MICROSCOPY,Automated urinalysis test 264 | 81025,URINE PREGNANCY TEST,Urine pregnancy test 265 | 82043,UR ALBUMIN QUANTITATIVE,Urine test to measure albumin 266 | 82044,UR ALBUMIN SEMIQUANTITATIVE,Urine test to measure albumin-semiquantitative 267 | 82248,BILIRUBIN DIRECT,Measurement of direct bilirubin 268 | 82306,VITAMIN D 25 HYDROXY,Blood test to monitor vitamin D levels 269 | 82553,CREATINE MB FRACTION,Blood test to detect heart enzymes 270 | 82570,ASSAY OF URINE CREATININE,Test to measure creatinine in the urine 271 | 82607,VITAMIN B-12,Blood test to measure B-12 272 | 82627,DEHYDROEPIANDROSTERONE,Blood test to measure an enzyme in the blood 273 | 82670,ASSAY OF ESTRADIOL,Blood test to measure a type of estrogen in the blood 274 | 82728,ASSAY OF FERRITIN,Test to determine level of iron in the blood 275 | 82784,ASSAY IGA/IGD/IGG/IGM EACH,Test to determine levels of immunoglobulins in the blood 276 | 82803,BLOOD GASES ANY COMBINATION,Test to measure arterial blood gases 277 | 82947,ASSAY GLUCOSE BLOOD QUANT,Quantitative measure of glucose build up in the blood over time 278 | 82950,GLUCOSE TEST,Test of glucose level in the blood 279 | 82951,GLUCOSE TOLERANCE TEST,Test to predict likelihood of gestational diabetes 280 | 83001,ASSAY OF GONADOTROPIN (FSH),Test of hormone in the blood 281 | 83002,ASSAY OF GONADOTROPIN (LH),Test of hormone in the blood 282 | 83013,H PYLORI (C-13) BREATH,Test of breath for a stomach bacterium 283 | 83036,GLYCOSYLATED HEMOGLOBIN TEST,Blood test to measure average blood glucose levels for past 2-3 months 284 | 83516,IMMUNOASSAY NONANTIBODY,Chemical test of the blood to measure presence or concentration of a substance in the blood 285 | 83540,ASSAY OF IRON,Blood test to measure the amount of iron that is in transit in the body 286 | 83550,IRON BINDING TEST,Blood test that measures the amount of iron carried in the blood 287 | 83655,ASSAY OF LEAD,Blood test to determine the concentration of lead in the blood 288 | 83718,ASSAY OF LIPOPROTEIN,Blood test to measure the level of lipoproteins in the blood 289 | 83880,ASSAY OF NATRIURETIC PEPTIDE,Blood test used to diagnose heart failure 290 | 84134,ASSAY OF PREALBUMIN,Blood test to measure level of prealbumin 291 | 84153,ASSAY OF PSA TOTAL,PSA (prostate specific antigen) 292 | 84154,PSA (prostate specific antigen) measurement,PSA (prostate specific antigen) measurement 293 | 84436,ASSAY OF TOTAL THYROXINE,Blood test to measure a type of thyroid hormone 294 | 84439,ASSAY OF FREE THYROXINE,Blood test to evaluate thyroid function 295 | 84443,ASSAY THYROID STIM HORMONE,"Blood test, thyroid stimulating hormone (TSH)" 296 | 84460,ALANINE AMINO (ALT) (SGPT),Blood test to evaluate liver function 297 | 84480,ASSAY TRIIODOTHYRONINE (T3),Blood test to evaluate thyroid function 298 | 84484,ASSAY OF TROPONIN QUANT,Blood test to measure a certain protein in the blood to determine heart muscle damage 299 | 84703,CHORIONIC GONADOTROPIN ASSAY,Blood test to assess for pregnancy 300 | 85007,BL SMEAR W/DIFF WBC COUNT,Blood test to assess for infection 301 | 85018,HEMOGLOBIN,Blood test to measure levels of hemoglobin 302 | 85025,COMPLETE CBC W/AUTO DIFF WBC,"Complete blood cell count, with differential white blood cells, automated" 303 | 85027,COMPLETE CBC AUTOMATED,"Complete blood count, automated" 304 | 85610,PROTHROMBIN TIME,"Blood test, clotting time" 305 | 85730,THROMBOPLASTIN TIME PARTIAL,Coagulation assessment blood test 306 | 86039,ANTINUCLEAR ANTIBODIES (ANA),Blood test to determine autoimmune disorders 307 | 86147,CARDIOLIPIN ANTIBODY EA IG,Blood test to determine cause of inappropriate blood clot formation 308 | 86200,CCP ANTIBODY,Blood test to diagnose rheumatoid arthritis 309 | 86300,IMMUNOASSAY TUMOR CA 15-3,Blood test to monitor breast cancer 310 | 86304,IMMUNOASSAY TUMOR CA 125,Blood test to monitor for cancer 311 | 86336,INHIBIN A,Blood test to monitor for cancer in the ovaries or testis 312 | 86592,SYPHILIS TEST NON-TREP QUAL,Blood test to screen for syphilis 313 | 86644,CMV ANTIBODY,Blood test to monitor for cytomegalovirus 314 | 86665,EPSTEIN-BARR CAPSID VCA,Blood test to diagnose mononucleosis 315 | 86677,HELICOBACTER PYLORI ANTIBODY,Blood test to if peptic ulcers are caused by a certain bacterium 316 | 86703,HIV-1/HIV-2 1 RESULT ANTBDY,Blood test to diagnose HIV 317 | 86704,HEP B CORE ANTIBODY TOTAL,Blood test indicating infection with Hepatitis B 318 | 86708,HEPATITIS A ANTIBODY,Blood test indicating infection with Hepatitis A 319 | 86762,RUBELLA ANTIBODY,Blood test to determine if antibodies exist for rubella 320 | 86765,RUBEOLA ANTIBODY,Blood test to determine if antibodies exist for measles 321 | 86780,TREPONEMA PALLIDUM,Blood test to determine existence of certain bacterium that causes syphilis 322 | 86803,HEPATITIS C AB TEST,Blood test to determine infection with Hepatitis C 323 | 86850,RBC ANTIBODY SCREEN,Blood test to screen for antibodies that could harm red blood cells 324 | 87040,BLOOD CULTURE FOR BACTERIA,Blood test to screen for bacteria in the blood 325 | 87046,STOOL CULTR AEROBIC BACT EA,Blood test to identify bacteria that may be contributing to symptoms in the gastrointestinal tract 326 | 87070,CULTURE OTHR SPECIMN AEROBIC,Test of body fluid other than blood to assess for bacteria 327 | 87077,CULTURE AEROBIC IDENTIFY,Test of a wound for type of bacterial infection 328 | 87081,CULTURE SCREEN ONLY,Medical test to find an infection 329 | 87086,URINE CULTURE/COLONY COUNT,Culture of the urine to determine number of bacteria 330 | 87088,URINE BACTERIA CULTURE,Culture of the urine to determine bacterial infection 331 | 87101,SKIN FUNGI CULTURE,A procedure used to determine if fungi are present in an area of the body 332 | 87186,MICROBE SUSCEPTIBLE MIC,A test used to determine which medications work on bacteria for fungi 333 | 87205,SMEAR GRAM STAIN,A lab test used to detect bacteria or fungi in a sample taken from the site of a suspected infection 334 | 87210,SMEAR WET MOUNT SALINE/INK,A lab test to screen for evidence of vaginal infection 335 | 87324,CLOSTRIDIUM AG IA,A test of the stool to diagnose Clostridium difficile (C. diff) infection 336 | 87389,HIV-1 AG W/HIV-1 & HIV-2 AB,Test for HIV 337 | 87491,CHYLMD TRACH DNA AMP PROBE,Test that detects Chlamydia 338 | 87510,GARDNER VAG DNA DIR PROBE,Blood test for vaginitis 339 | 87591,N.GONORRHOEAE DNA AMP PROB,Blood test for an STD 340 | 87624,Hpv high-risk types,Detection test for human papillomavirus (hpv) 341 | 87653,STREP B DNA AMP PROBE,Blood test for strep infection 342 | 87661,TRICHOMONAS VAGINALIS AMPLIF,Blood test for an STD 343 | 87801,DETECT AGNT MULT DNA AMPLI,Blood test to determine genetic material of certain infectious agents 344 | 87804,INFLUENZA ASSAY W/OPTIC,Flu test 345 | 87807,RSV ASSAY W/OPTIC,Test for RSV 346 | 87880,STREP A ASSAY W/OPTIC,Test for strep A 347 | 88112,CYTOPATH CELL ENHANCE TECH,Urine test 348 | 88141,CYTOPATH C/V INTERPRET,Cervical cancer screening test with interpretation 349 | 88142,CYTOPATH C/V THIN LAYER,PAP smear 350 | 88150,CYTOPATH C/V MANUAL,Cervical cancer screening test done manually 351 | 88175,CYTOPATH C/V AUTO FLUID REDO,PAP smear 352 | 88305,TISSUE EXAM BY PATHOLOGIST,Test of tissues for diagnosis of abnormalities 353 | 88312,SPECIAL STAINS GROUP 1,Blood test to assist with diagnosis 354 | 88313,SPECIAL STAINS GROUP 2,Blood test to assist with diagnosis 355 | 88342,IMMUNOHISTO ANTB 1ST STAIN,Pathology test 356 | 90460,IM ADMIN 1ST/ONLY COMPONENT,Immunization administration in children <18 357 | 90471,IMMUNIZATION ADMIN,Immunization administration by a medical assistant or nurse 358 | 90474,IMMUNE ADMIN ORAL/NASAL ADDL,Immunization administered orally or nasally 359 | 90632,HEPA VACCINE ADULT IM,Hepatitis A vaccination for adults 360 | 90633,HEPA VACC PED/ADOL 2 DOSE IM,Hepatitis A vaccination for adolescents and children 361 | 90649,4VHPV VACCINE 3 DOSE IM,3-dose HPV vaccination 362 | 90656,IIV3 VACC NO PRSV 0.5 ML IM,Flu shot-high dose for 2019-2020 flu season given by injection 363 | 90658,IIV3 VACCINE SPLT 0.5 ML IM,Preservative free flu vaccine 364 | 90672,LAIV4 VACCINE INTRANASAL,Nasal flu vaccine 365 | 90681,RV1 VACC 2 DOSE LIVE ORAL,Rotavirus vaccination 366 | 90686,IIV4 VACC NO PRSV 0.5 ML IM,Flu shot-high dose for 2019-2020 flu season given by injection for people >65 367 | 90707,MMR VACCINE SC,"Measles, mumps and rubella vaccine" 368 | 90710,MMRV VACCINE SC,"measlesMeasles, mumps, rubella and varicella vaccine" 369 | 90715,TDAP VACCINE 7 YRS/> IM,"Diphtheria, tetanus acellular, and pertussis vaccine for adults" 370 | 90716,VAR VACCINE LIVE SUBQ,Varicella vaccine 371 | 90732,PPSV23 VACC 2 YRS+ SUBQ/IM,pneumococcal Pneumococcal vaccine 372 | 90734,MENACWYD/MENACWYCRM VACC IM,meningococcal Meningococcal conjugate vaccine 373 | 90736,HZV VACCINE LIVE SUBQ,Shingles vaccine 374 | 90746,HEPB VACCINE 3 DOSE ADULT IM,Hepatitis B vaccine 375 | 90791,PSYCH DIAGNOSTIC EVALUATION,"A diagnostic tool employed by a psychiatrist to diagnose problems with memory, thought processes, and behaviors" 376 | 90792,PSYCH DIAG EVAL W/MED SRVCS,A diagnostic tool employed by a psychiatrist to determine if medications are needed 377 | 90832,PSYTX W PT 30 MINUTES,"Psychotherapy, 30 min" 378 | 90833,PSYTX W PT W E/M 30 MIN,"Psychotherapy, 30 minutes with patient when performed with an evaluation and management service" 379 | 90834,PSYTX W PT 45 MINUTES,"Psychotherapy, 45 min" 380 | 90836,PSYTX W PT W E/M 45 MIN,"Psychotherapy, 45 minutes with patient when performed with an evaluation and management service" 381 | 90837,PSYTX W PT 60 MINUTES,"Psychotherapy, 60 min" 382 | 90838,"Psychotherapy, 60 minutes","Psychotherapy, 60 minutes" 383 | 90839,"Psychotherapy for crisis, first 60 minutes","Psychotherapy for crisis, first 60 minutes" 384 | 90840,Psychotherapy for crisis,Psychotherapy for crisis 385 | 90846,"Family psychotherapy, 50 minutes","Family psychotherapy, not including patient, 50 min" 386 | 90847,FAMILY PSYTX W/PT 50 MIN,"Family psychotherapy, including patient, 50 min" 387 | 90853,GROUP PSYCHOTHERAPY,Group psychotherapy 388 | 92002,EYE EXAM NEW PATIENT,Intermediate exam 389 | 92004,EYE EXAM NEW PATIENT,Complete exam 390 | 92012,EYE EXAM ESTABLISH PATIENT,Eye exam on an established patient 391 | 92014,EYE EXAM&TX ESTAB PT 1/>VST,Eye exam and treatment for established patient 392 | 92083,VISUAL FIELD EXAMINATION(S),An eye examination that can detect dysfunction in central and peripheral vision 393 | 92133,CMPTR OPHTH IMG OPTIC NERVE,Optic nerve imaging 394 | 92507,SPEECH/HEARING THERAPY,Therapy for speech or hearing 395 | 92523,SPEECH SOUND LANG COMPREHEN,Evaluation of speech sound production with evaluation of language comprehension 396 | 92552,PURE TONE AUDIOMETRY AIR,Type of hearing test 397 | 93000,ELECTROCARDIOGRAM COMPLETE,Routine EKG using at least 12 leads including interpretation and report 398 | 93015,CARDIOVASCULAR STRESS TEST,Test to determine heart abnormalities 399 | 93303,ECHO TRANSTHORACIC,Test to screen the heart for abnormalities 400 | 93306,Tte w/doppler complete,"Ultrasound examination of heart including color-depicted blood flow rate, direction, and valve function" 401 | 93307,TTE W/O DOPPLER COMPLETE,Echo without doppler study 402 | 93320,DOPPLER ECHO EXAM HEART,Echo with doppler 403 | 93350,STRESS TTE ONLY,Stress test with echocardiogram 404 | 93452,Cardiac Catheterization,Insertion of catheter into left heart for diagnosis 405 | 93798,CARDIAC REHAB/MONITOR,Use of EKG to monitor cardiac rehabilitation 406 | 93880,EXTRACRANIAL BILAT STUDY,Study of vessels on both sides of the head and neck 407 | 93922,UPR/L XTREMITY ART 2 LEVELS,Limited bilateral noninvasive physiologic studies of upper or lower extremity arteries 408 | 93970,EXTREMITY STUDY,Complete bilateral study of the extremities 409 | 93971,EXTREMITY STUDY,One sided or limited bilateral study 410 | 94010,BREATHING CAPACITY TEST,Test to determine how well oxygen moves from the lungs to the blood stream 411 | 94060,EVALUATION OF WHEEZING,Test to determine if wheezing is present 412 | 94375,RESPIRATORY FLOW VOLUME LOOP,Graphical representation of inspiration and expiration 413 | 94726,PULM FUNCT TST PLETHYSMOGRAP,Measures how much air is in the lungs after taking a deep breath 414 | 94727,PULM FUNCTION TEST BY GAS,Measure of lung function and gas exchange 415 | 94729,CO/MEMBANE DIFFUSE CAPACITY,Test to measure how well gases diffuse across lung surfaces 416 | 95004,PERCUT ALLERGY SKIN TESTS,Allergy test 417 | 95115,IMMUNOTHERAPY ONE INJECTION,Allergy shot-1 shot 418 | 95117,IMMUNOTHERAPY INJECTIONS,Multiple allergy shots 419 | 95810,POLYSOM 6/> YRS 4/> PARAM,Sleep monitoring of patient (6 years or older) in sleep lab 420 | 95811,POLYSOM 6/>YRS CPAP 4/> PARM,Sleep monitoring of patient (6 years or older) in sleep lab using CPAP 421 | 95860,MUSCLE TEST ONE LIMB,Test to measure electrical activity of muscles or nerves in 1 limb 422 | 95861,MUSCLE TEST 2 LIMBS,Test to measure electrical activity of muscles or nerves in 2 limb 423 | 95886,MUSC TEST DONE W/N TEST COMP,Test to assess for nerve damage 424 | 96110,DEVELOPMENTAL SCREEN W/SCORE,Childhood test to screen for developmental disabilities 425 | 96365,THER/PROPH/DIAG IV INF INIT,"Intravenous infusion, for therapy, prophylaxis, or diagnosis-initial infusion" 426 | 96366,THER/PROPH/DIAG IV INF ADDON,"Intravenous infusion, for therapy, prophylaxis, or diagnosis-additional infusions" 427 | 96374,THER/PROPH/DIAG INJ IV PUSH,"Intravenous infusion, for therapy, prophylaxis, or diagnosis-IV push" 428 | 96375,TX/PRO/DX INJ NEW DRUG ADDON,"Intravenous infusion, for treatment, prophylaxis, or diagnosis-new drug add on" 429 | 96376,TX/PRO/DX INJ SAME DRUG ADON,"Intravenous infusion, for treatment, prophylaxis, or diagnosis-same drug add on" 430 | 96415,CHEMO IV INFUSION ADDL HR,Chemotherapy infusion-each additional hour 431 | 96417,CHEMO IV INFUS EACH ADDL SEQ,Chemotherapy infusion-additional IV pushes of the same medication 432 | 97010,HOT OR COLD PACKS THERAPY,Use of external hot or cold packs 433 | 97012,MECHANICAL TRACTION THERAPY,Form of decompression therapy of the spine 434 | 97014,ELECTRIC STIMULATION THERAPY,One time use unattended 435 | 97016,VASOPNEUMATIC DEVICE THERAPY,"Machines designed to pump cold water into an inflatable wrap or brace, compressing the enveloped area of the body" 436 | 97026,INFRARED THERAPY,Light-based method to treat pain and inflammation 437 | 97032,ELECTRICAL STIMULATION,Repeated application to one or more parts of the body 438 | 97033,ELECTRIC CURRENT THERAPY,Psychiatric treatment in which seizures are electrically induced in patients to provide relief from mental disorders 439 | 97035,ULTRASOUND THERAPY,"Use of sound waves to treat medical problems, especially musculoskeletal problems like inflammation from injuries" 440 | 97110,THERAPEUTIC EXERCISES,"Therapeutic exercise to develop strength, endurance, range of motion, and flexibility, each 15 minutes" 441 | 97112,NEUROMUSCULAR REEDUCATION,A technique used by physical therapists to restore normal body movement patterns 442 | 97113,AQUATIC THERAPY/EXERCISES,Use of water for therapy/exercises 443 | 97116,GAIT TRAINING THERAPY,A type of physical therapy 444 | 97124,MASSAGE THERAPY,Use of massage 445 | 97140,MANUAL THERAPY 1/> REGIONS,Manipulation of 1 or more regions of the body 446 | 97530,THERAPEUTIC ACTIVITIES,"Incorporates the use of multiple parameters, such as balance, strength, and range of motion, for a functional activity" 447 | 97535,SELF CARE MNGMENT TRAINING,Occupational therapy 448 | 97597,RMVL DEVITAL TIS 20 CM/<,"Debridement (for example, high pressure waterjet with/without suction, sharp selective debridement with scissors, scalpel and forceps)" 449 | 97811,ACUPUNCT W/O STIMUL ADDL 15M,Acupuncture without stimulation 450 | 97813,ACUPUNCT W/STIMUL 15 MIN,Acupuncture with stimulation 451 | 98940,CHIROPRACT MANJ 1-2 REGIONS,Chiropractic manipulation in 1-2 regions 452 | 98941,CHIROPRACT MANJ 3-4 REGIONS,Chiropractic manipulation in 3-4 regions 453 | 98943,CHIROPRACT MANJ XTRSPINL 1/>,Chiropractic manipulation not of the spine 454 | 98966,Hc pro phone call 5-10 min,"Telephone assessment and management service, 5-10 minutes of medical discussion" 455 | 98967,Hc pro phone call 11-20 min,"Telephone assessment and management service, 11-20 minutes of medical discussion" 456 | 98968,Hc pro phone call 21-30 min,"Telephone assessment and management service, 21-30 minutes of medical discussion" 457 | 98970,Qualified non physician health care professional online digital assessment and management est. patient 5-10 minutes,"Qualified non physician health care professional online digital assessment and management, for an established patient, for up to 7 days, cumulative time during the 7 days; 5-10 minutes" 458 | 98971,Qualified non physician health care professional online digital assessment and management est. patient 11-20 minutes,"Qualified non physician health care professional online digital assessment and management, for an established patient, for up to 7 days, cumulative time during the 7 days; 11-20 minutes" 459 | 98972,Qualified non physician health care professional online digital assessment and management for est. patients 21+ minutes,"Qualified non physician health care professional online digital assessment and management, for an established patient, for up to 7 days, cumulative time during the 7 days; 21 or more minutes" 460 | 99051,MED SERV EVE/WKEND/HOLIDAY,Medical service during off-hours 461 | 99173,VISUAL ACUITY SCREEN,Eye test 462 | 99202,OFFICE/OUTPATIENT VISIT NEW,"New patient office or other outpatient visit, typically 20 minutes" 463 | 99203,OFFICE/OUTPATIENT VISIT NEW,"New patient office or other outpatient visit, typically 30 min" 464 | 99204,OFFICE/OUTPATIENT VISIT NEW,"New patient office of other outpatient visit, typically 45 min" 465 | 99205,OFFICE/OUTPATIENT VISIT NEW,"New patient office of other outpatient visit, typically 60 min" 466 | 99211,OFFICE/OUTPATIENT VISIT EST,Outpatient visit of established patient not requiring a physician 467 | 99212,OFFICE/OUTPATIENT VISIT EST,Outpatient visit of established patient requiring a physician 468 | 99213,OFFICE/OUTPATIENT VISIT EST,"Established patient office or other outpatient visit, typically 15 minutes" 469 | 99214,OFFICE/OUTPATIENT VISIT EST,"Established patient office or other outpatient visit, typically 25 minutes" 470 | 99215,OFFICE/OUTPATIENT VISIT EST,"Established patient office or other outpatient, visit typically 40 minutes" 471 | 99243,OFFICE CONSULTATION,"Patient office consultation, typically 40 min" 472 | 99244,OFFICE CONSULTATION,"Patient office consultation, typically 60 min" 473 | 99283,Emergency dept visit,"Emergency department visit, moderately severe problem" 474 | 99284,Emergency dept visit,"Emergency department visit, problem of high severity" 475 | 99285,Emergency dept visit,"Emergency department visit, problem with significant threat to life or function" 476 | 99381,INIT PM E/M NEW PAT INFANT,Initial visit for an infant 477 | 99382,INIT PM E/M NEW PAT 1-4 YRS,Initial visit for new patients 1-4 years old 478 | 99383,PREV VISIT NEW AGE 5-11,New preventative visit in new patients 5-11 years old 479 | 99384,PREV VISIT NEW AGE 12-17,New preventative visit in new patients 12-17 years old 480 | 99385,PREV VISIT NEW AGE 18-39,Initial new patient preventive medicine evaluation (18–39 years) 481 | 99386,PREV VISIT NEW AGE 40-64,Initial new patient preventive medicine evaluation (40–64 years) 482 | 99387,INIT PM E/M NEW PAT 65+ YRS,Initial visit for new patients 65 and older years old 483 | 99391,PER PM REEVAL EST PAT INFANT,Periodic primary re-evaluation for an established infant patient 484 | 99392,PREV VISIT EST AGE 1-4,Initial visit for new patients 1-4 years old 485 | 99393,PREV VISIT EST AGE 5-11,New preventative visit in new patients 5-11 years old 486 | 99394,PREV VISIT EST AGE 12-17,New preventative visit in new patients 12-17 years old 487 | 99395,PREV VISIT EST AGE 18-39,Established patient periodic preventive medicine examination age 18-39 years 488 | 99396,PREV VISIT EST AGE 40-64,Established patient periodic preventive medicine examination age 40-64 years 489 | 99397,PER PM REEVAL EST PAT 65+ YR,Periodic primary re-evaluation for an established patient 65 and older 490 | 99421,ONLINE DIGITAL EVALUATION AND MANAGEMENT SERVICE; 5-10 MINUTES,"Online digital evaluation and management service, for an established patient, for up to 7 days, cumulative time during the 7 days; 5-10 minutes" 491 | 99422,Online digital evaluation and management service; 11-20 minutes,"Online digital evaluation and management service, for an established patient, for up to 7 days, cumulative time during the 7 days; 11-20 minutes" 492 | 99441,Phone e/m phys/qhp 5-10 min,"Physician telephone patient service, 5-10 minutes of medical discussion" 493 | 99442,Phone e/m phys/qhp 11-20 min,"Physician telephone patient service, 11-20 minutes of medical discussion" 494 | 99443,Phone e/m phys/qhp 21-30 min,"Physician telephone patient service, 21-30 minutes of medical discussion" -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/danielchalef/mrfparse 2 | 3 | go 1.19 4 | 5 | require ( 6 | github.com/danielchalef/jsplit v0.0.2 7 | github.com/minio/simdjson-go v0.4.2 8 | ) 9 | 10 | require ( 11 | github.com/aws/aws-sdk-go-v2 v1.17.3 // indirect 12 | github.com/aws/aws-sdk-go-v2/config v1.18.8 // indirect 13 | github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.11.47 // indirect 14 | github.com/aws/aws-sdk-go-v2/service/s3 v1.30.0 // indirect 15 | github.com/deckarep/golang-set/v2 v2.1.0 16 | github.com/segmentio/parquet-go v0.0.0-20230106170957-952b1613a191 17 | ) 18 | 19 | require ( 20 | cloud.google.com/go v0.108.0 // indirect 21 | cloud.google.com/go/compute v1.15.0 // indirect 22 | cloud.google.com/go/compute/metadata v0.2.3 // indirect 23 | cloud.google.com/go/iam v0.10.0 // indirect 24 | cloud.google.com/go/storage v1.28.1 // indirect 25 | github.com/alecthomas/repr v0.1.0 // indirect 26 | github.com/andybalholm/brotli v1.0.4 // indirect 27 | github.com/aws/aws-sdk-go v1.44.175 // indirect 28 | github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.4.10 // indirect 29 | github.com/aws/aws-sdk-go-v2/credentials v1.13.8 // indirect 30 | github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.12.21 // indirect 31 | github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.27 // indirect 32 | github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.21 // indirect 33 | github.com/aws/aws-sdk-go-v2/internal/ini v1.3.28 // indirect 34 | github.com/aws/aws-sdk-go-v2/internal/v4a v1.0.18 // indirect 35 | github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.9.11 // indirect 36 | github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.1.22 // indirect 37 | github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.21 // indirect 38 | github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.13.21 // indirect 39 | github.com/aws/aws-sdk-go-v2/service/sso v1.12.0 // indirect 40 | github.com/aws/aws-sdk-go-v2/service/ssooidc v1.14.0 // indirect 41 | github.com/aws/aws-sdk-go-v2/service/sts v1.18.0 // indirect 42 | github.com/aws/smithy-go v1.13.5 // indirect 43 | github.com/davecgh/go-spew v1.1.1 // indirect 44 | github.com/fsnotify/fsnotify v1.6.0 // indirect 45 | github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect 46 | github.com/golang/protobuf v1.5.2 // indirect 47 | github.com/google/go-cmp v0.5.9 // indirect 48 | github.com/google/uuid v1.3.0 // indirect 49 | github.com/google/wire v0.5.0 // indirect 50 | github.com/googleapis/enterprise-certificate-proxy v0.2.1 // indirect 51 | github.com/googleapis/gax-go/v2 v2.7.0 // indirect 52 | github.com/hashicorp/hcl v1.0.0 // indirect 53 | github.com/hexops/gotextdiff v1.0.3 // indirect 54 | github.com/inconshreveable/mousetrap v1.1.0 // indirect 55 | github.com/jmespath/go-jmespath v0.4.0 // indirect 56 | github.com/libp2p/go-buffer-pool v0.1.0 // indirect 57 | github.com/magiconair/properties v1.8.7 // indirect 58 | github.com/mattn/go-runewidth v0.0.14 // indirect 59 | github.com/mitchellh/mapstructure v1.5.0 // indirect 60 | github.com/olekukonko/tablewriter v0.0.5 // indirect 61 | github.com/pelletier/go-toml v1.9.5 // indirect 62 | github.com/pelletier/go-toml/v2 v2.0.6 // indirect 63 | github.com/pierrec/lz4/v4 v4.1.17 // indirect 64 | github.com/pmezard/go-difflib v1.0.0 // indirect 65 | github.com/rivo/uniseg v0.4.3 // indirect 66 | github.com/segmentio/encoding v0.3.6 // indirect 67 | github.com/spf13/afero v1.9.3 // indirect 68 | github.com/spf13/cast v1.5.0 // indirect 69 | github.com/spf13/jwalterweatherman v1.1.0 // indirect 70 | github.com/spf13/pflag v1.0.5 // indirect 71 | github.com/subosito/gotenv v1.4.1 // indirect 72 | go.opencensus.io v0.24.0 // indirect 73 | golang.org/x/net v0.7.0 // indirect 74 | golang.org/x/oauth2 v0.4.0 // indirect 75 | golang.org/x/text v0.7.0 // indirect 76 | golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 // indirect 77 | google.golang.org/api v0.106.0 // indirect 78 | google.golang.org/appengine v1.6.7 // indirect 79 | google.golang.org/genproto v0.0.0-20230106154932-a12b697841d9 // indirect 80 | google.golang.org/grpc v1.51.0 // indirect 81 | google.golang.org/protobuf v1.28.1 // indirect 82 | gopkg.in/ini.v1 v1.67.0 // indirect 83 | gopkg.in/yaml.v2 v2.4.0 // indirect 84 | gopkg.in/yaml.v3 v3.0.1 // indirect 85 | ) 86 | 87 | require ( 88 | github.com/alecthomas/assert/v2 v2.1.0 89 | github.com/alitto/pond v1.8.2 90 | github.com/avast/retry-go/v4 v4.3.2 91 | github.com/kiwicom/fakesimdjson v0.0.0-20230125075857-80f4b896a785 92 | github.com/rs/xid v1.4.0 93 | github.com/spf13/cobra v1.6.1 94 | github.com/spf13/viper v1.14.0 95 | github.com/stretchr/testify v1.8.1 96 | gocloud.dev v0.27.0 97 | golang.org/x/exp v0.0.0-20221217163422-3c43f8badb15 98 | ) 99 | 100 | require ( 101 | github.com/klauspost/compress v1.15.15 // indirect 102 | github.com/klauspost/cpuid/v2 v2.2.3 // indirect 103 | github.com/sirupsen/logrus v1.9.0 104 | golang.org/x/sys v0.5.0 // indirect 105 | ) 106 | -------------------------------------------------------------------------------- /in-network-rates_schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-07/schema#", 3 | "definitions": { 4 | "in_network": { 5 | "type": "object", 6 | "properties": { 7 | "negotiation_arrangement": { 8 | "enum": ["ffs", "bundle", "capitation"] 9 | }, 10 | "name": { 11 | "type": "string" 12 | }, 13 | "billing_code_type": { 14 | "enum": [ 15 | "CPT", 16 | "HCPCS", 17 | "ICD", 18 | "MS-DRG", 19 | "R-DRG", 20 | "S-DRG", 21 | "APS-DRG", 22 | "AP-DRG", 23 | "APR-DRG", 24 | "APC", 25 | "NDC", 26 | "HIPPS", 27 | "LOCAL", 28 | "EAPG", 29 | "CDT", 30 | "RC", 31 | "CSTM-ALL" 32 | ] 33 | }, 34 | "billing_code_type_version": { 35 | "type": "string" 36 | }, 37 | "billing_code": { 38 | "type": "string" 39 | }, 40 | "description": { 41 | "type": "string" 42 | }, 43 | "negotiated_rates": { 44 | "type": "array", 45 | "items": { 46 | "$ref": "#/definitions/negotiated_rates" 47 | }, 48 | "default": [] 49 | }, 50 | "covered_services": { 51 | "type": "array", 52 | "items": { 53 | "$ref": "#/definitions/covered_services" 54 | }, 55 | "default": [] 56 | }, 57 | "bundled_codes": { 58 | "type": "array", 59 | "items": { 60 | "$ref": "#/definitions/bundled_codes" 61 | }, 62 | "default": [] 63 | } 64 | }, 65 | "required": [ 66 | "negotiation_arrangement", 67 | "name", 68 | "billing_code_type", 69 | "billing_code_type_version", 70 | "billing_code", 71 | "negotiated_rates", 72 | "description" 73 | ] 74 | }, 75 | "bundled_codes": { 76 | "type": "object", 77 | "properties": { 78 | "billing_code_type": { 79 | "enum": [ 80 | "CPT", 81 | "HCPCS", 82 | "ICD", 83 | "MS-DRG", 84 | "R-DRG", 85 | "S-DRG", 86 | "APS-DRG", 87 | "AP-DRG", 88 | "APR-DRG", 89 | "APC", 90 | "NDC", 91 | "HIPPS", 92 | "LOCAL", 93 | "EAPG", 94 | "CDT", 95 | "RC", 96 | "CSTM-ALL" 97 | ] 98 | }, 99 | "billing_code_type_version": { 100 | "type": "string" 101 | }, 102 | "billing_code": { 103 | "type": "string" 104 | }, 105 | "description": { 106 | "type": "string" 107 | } 108 | }, 109 | "required": [ 110 | "billing_code_type", 111 | "billing_code_type_version", 112 | "billing_code", 113 | "description" 114 | ] 115 | }, 116 | "covered_services": { 117 | "type": "object", 118 | "properties": { 119 | "billing_code_type": { 120 | "enum": [ 121 | "CPT", 122 | "HCPCS", 123 | "ICD", 124 | "MS-DRG", 125 | "R-DRG", 126 | "S-DRG", 127 | "APS-DRG", 128 | "AP-DRG", 129 | "APR-DRG", 130 | "APC", 131 | "NDC", 132 | "HIPPS", 133 | "LOCAL", 134 | "EAPG", 135 | "CDT", 136 | "RC", 137 | "CSTM-ALL" 138 | ] 139 | }, 140 | "billing_code_type_version": { 141 | "type": "string" 142 | }, 143 | "billing_code": { 144 | "type": "string" 145 | }, 146 | "description": { 147 | "type": "string" 148 | } 149 | }, 150 | "required": [ 151 | "billing_code_type", 152 | "billing_code_type_version", 153 | "billing_code", 154 | "description" 155 | ] 156 | }, 157 | "provider_references": { 158 | "type": "object", 159 | "properties": { 160 | "provider_group_id": { "type": "number" }, 161 | "provider_groups": { 162 | "type": "array", 163 | "items": { 164 | "$ref": "#/definitions/providers" 165 | }, 166 | "uniqueItems": true, 167 | "default": [] 168 | }, 169 | "location": { 170 | "type": "string", 171 | "format": "uri", 172 | "pattern": "^https://" 173 | } 174 | }, 175 | "anyOf": [ 176 | { "required": ["location"] }, 177 | { "required": ["provider_groups"] } 178 | ], 179 | "required": ["provider_group_id"] 180 | }, 181 | "providers": { 182 | "type": "object", 183 | "properties": { 184 | "npi": { 185 | "type": "array", 186 | "items": { 187 | "type": "number" 188 | }, 189 | "uniqueItems": true, 190 | "default": [] 191 | }, 192 | "tin": { 193 | "type": "object", 194 | "properties": { 195 | "type": { 196 | "type": "string", 197 | "enum": ["ein", "npi"] 198 | }, 199 | "value": { 200 | "type": "string" 201 | } 202 | }, 203 | "required": ["type", "value"] 204 | } 205 | }, 206 | "required": ["npi", "tin"] 207 | }, 208 | "negotiated_rates": { 209 | "type": "object", 210 | "properties": { 211 | "negotiated_prices": { 212 | "type": "array", 213 | "items": { 214 | "$ref": "#/definitions/negotiated_price" 215 | }, 216 | "uniqueItems": true, 217 | "default": [] 218 | }, 219 | "provider_groups": { 220 | "type": "array", 221 | "items": { 222 | "$ref": "#/definitions/providers" 223 | }, 224 | "uniqueItems": true, 225 | "default": [] 226 | }, 227 | "provider_references": { 228 | "type": "array", 229 | "items": { 230 | "$ref": "#/definitions/provider_references/properties/provider_group_id" 231 | }, 232 | "uniqueItems": true, 233 | "default": [] 234 | } 235 | }, 236 | "oneOf": [ 237 | { "required": ["provider_references"] }, 238 | { "required": ["provider_groups"] } 239 | ], 240 | "required": ["negotiated_prices"] 241 | }, 242 | "negotiated_price": { 243 | "type": "object", 244 | "properties": { 245 | "service_code": { 246 | "type": "array", 247 | "items": { 248 | "type": "string", 249 | "enum": [ 250 | "01", 251 | "02", 252 | "03", 253 | "04", 254 | "05", 255 | "06", 256 | "07", 257 | "08", 258 | "09", 259 | "10", 260 | "11", 261 | "12", 262 | "13", 263 | "14", 264 | "15", 265 | "16", 266 | "17", 267 | "18", 268 | "19", 269 | "20", 270 | "21", 271 | "22", 272 | "23", 273 | "24", 274 | "25", 275 | "26", 276 | "27", 277 | "28", 278 | "29", 279 | "30", 280 | "31", 281 | "32", 282 | "33", 283 | "34", 284 | "35", 285 | "36", 286 | "37", 287 | "38", 288 | "39", 289 | "40", 290 | "41", 291 | "42", 292 | "43", 293 | "44", 294 | "45", 295 | "46", 296 | "47", 297 | "48", 298 | "49", 299 | "50", 300 | "51", 301 | "52", 302 | "53", 303 | "54", 304 | "55", 305 | "56", 306 | "57", 307 | "58", 308 | "59", 309 | "60", 310 | "61", 311 | "62", 312 | "63", 313 | "64", 314 | "65", 315 | "66", 316 | "67", 317 | "68", 318 | "69", 319 | "70", 320 | "71", 321 | "72", 322 | "73", 323 | "74", 324 | "75", 325 | "76", 326 | "77", 327 | "78", 328 | "79", 329 | "80", 330 | "81", 331 | "82", 332 | "83", 333 | "84", 334 | "85", 335 | "86", 336 | "87", 337 | "88", 338 | "89", 339 | "90", 340 | "91", 341 | "92", 342 | "93", 343 | "94", 344 | "95", 345 | "96", 346 | "97", 347 | "98", 348 | "99", 349 | "CSTM-00" 350 | ] 351 | }, 352 | "uniqueItems": true 353 | }, 354 | "billing_class": { 355 | "enum": ["professional", "institutional"] 356 | }, 357 | "negotiated_type": { 358 | "enum": [ 359 | "negotiated", 360 | "derived", 361 | "fee schedule", 362 | "percentage", 363 | "per diem" 364 | ] 365 | }, 366 | "billing_code_modifier": { 367 | "type": "array", 368 | "items": { 369 | "type": "string" 370 | }, 371 | "uniqueItems": true, 372 | "default": [] 373 | }, 374 | "negotiated_rate": { 375 | "type": "number" 376 | }, 377 | "expiration_date": { 378 | "type": "string", 379 | "description": "This is a date format of YYYY-MM-DD" 380 | }, 381 | "additional_information": { 382 | "type": "string", 383 | "description": "In situations in which alternative reimbursement arrangements can neither be expressed as a dollar amount nor as a percentage, this open text field can be used to provide information such as, a description of the formula, variables, methodology or other information necessary to understand the arrangement. The open text field may be utilized for reporting only if a plan or issuer cannot disclose its in-network rates as a dollar amount or a percentage." 384 | } 385 | }, 386 | "required": [ 387 | "negotiated_type", 388 | "billing_class", 389 | "negotiated_rate", 390 | "expiration_date" 391 | ], 392 | "if": { 393 | "properties": { 394 | "billing_class": { "const": "professional" } 395 | } 396 | }, 397 | "then": { 398 | "required": ["service_code"] 399 | } 400 | } 401 | }, 402 | "type": "object", 403 | "properties": { 404 | "reporting_entity_name": { 405 | "type": "string" 406 | }, 407 | "reporting_entity_type": { 408 | "type": "string" 409 | }, 410 | "plan_name": { 411 | "type": "string" 412 | }, 413 | "plan_id_type": { 414 | "type": "string" 415 | }, 416 | "plan_id": { 417 | "type": "string" 418 | }, 419 | "plan_market_type": { 420 | "enum": ["group", "individual"] 421 | }, 422 | "last_updated_on": { 423 | "type": "string", 424 | "description": "This is a date format of YYYY-MM-DD" 425 | }, 426 | "version": { 427 | "type": "string" 428 | }, 429 | "provider_references": { 430 | "type": "array", 431 | "items": { 432 | "$ref": "#/definitions/provider_references" 433 | }, 434 | "default": [] 435 | }, 436 | "in_network": { 437 | "type": "array", 438 | "items": { 439 | "$ref": "#/definitions/in_network" 440 | }, 441 | "default": [] 442 | } 443 | }, 444 | "required": [ 445 | "reporting_entity_name", 446 | "reporting_entity_type", 447 | "last_updated_on", 448 | "in_network", 449 | "version" 450 | ], 451 | "dependentRequired": { 452 | "plan_name": ["plan_id_type", "plan_id", "plan_market_type"], 453 | "plan_id_type": ["plan_name", "plan_id", "plan_market_type"], 454 | "plan_id": ["plan_name", "plan_id_type", "plan_market_type"], 455 | "plan_market_type": ["plan_name", "plan_id_type", "plan_id"] 456 | } 457 | } 458 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2023 Daniel Chalef 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package main 17 | 18 | import "github.com/danielchalef/mrfparse/cmd" 19 | 20 | func main() { 21 | cmd.Execute() 22 | } 23 | -------------------------------------------------------------------------------- /pkg/mrfparse/cloud/blob.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2023 Daniel Chalef 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package cloud 17 | 18 | import ( 19 | "context" 20 | "errors" 21 | "github.com/danielchalef/mrfparse/pkg/mrfparse/utils" 22 | "io" 23 | "net/url" 24 | "os" 25 | "path/filepath" 26 | "strings" 27 | 28 | "gocloud.dev/blob" 29 | _ "gocloud.dev/blob/fileblob" // required by CDK as blob driver 30 | _ "gocloud.dev/blob/gcsblob" // required by CDK as blob driver 31 | _ "gocloud.dev/blob/s3blob" // required by CDK as blob driver 32 | ) 33 | 34 | var log = utils.GetLogger() 35 | 36 | // OpenBucket opens a blob storage bucket at the URI. Context can be used to cancel any operations. 37 | // Google CDK is used to support both AWS S3 and Google Cloud Storage. Use the correct URI scheme to 38 | // specify the storage provider (gs:// or s3://). 39 | func OpenBucket(ctx context.Context, uri string) (*blob.Bucket, error) { 40 | var ( 41 | err error 42 | b *blob.Bucket 43 | ) 44 | 45 | b, err = blob.OpenBucket(ctx, uri) 46 | if err != nil { 47 | return nil, err 48 | } 49 | 50 | return b, nil 51 | } 52 | 53 | // NewWriter creates a new io.WriteCloser for the given URI. Context can be used to cancel any operations. 54 | // Google Cloud Storage, AWS S3, and local filesystem URIs are supported. Use the correct URI scheme for 55 | // the storage provider (gs://, s3://) or no scheme for local filesystem. 56 | func NewWriter(ctx context.Context, uri string) (io.WriteCloser, error) { 57 | const ( 58 | flags = os.O_CREATE | os.O_WRONLY 59 | perms = 0o644 60 | ) 61 | 62 | var ( 63 | err error 64 | k string 65 | ) 66 | 67 | if !IsCloudURI(uri) { 68 | return os.OpenFile(uri, flags, perms) 69 | } 70 | 71 | _, _, k, err = ParseBlobURI(uri) 72 | if err != nil { 73 | return nil, err 74 | } 75 | 76 | b, err := OpenBucket(ctx, uri) 77 | if err != nil { 78 | return nil, err 79 | } 80 | 81 | return b.NewWriter(ctx, k, nil) 82 | } 83 | 84 | // NewReader creates a new io.ReadCloser for the given URI. Context can be used to cancel any operations. 85 | // Google Cloud Storage, AWS S3, and local filesystem URIs are supported. Use the correct URI scheme for 86 | // the storage provider (gs://, s3://) or no scheme for local filesystem. 87 | // The URI must be a file, not a directory. 88 | func NewReader(ctx context.Context, uri string) (io.ReadCloser, error) { 89 | var ( 90 | err error 91 | k string 92 | ) 93 | 94 | if !IsCloudURI(uri) { 95 | return os.Open(uri) 96 | } 97 | 98 | _, _, k, err = ParseBlobURI(uri) 99 | if err != nil { 100 | return nil, err 101 | } 102 | 103 | b, err := OpenBucket(ctx, uri) 104 | if err != nil { 105 | return nil, err 106 | } 107 | 108 | return b.NewReader(ctx, k, nil) 109 | } 110 | 111 | // JoinURI joins two URI parts together, removing any trailing slashes from the left part and any 112 | // leading slashes from the right part. 113 | func JoinURI(left, right string) string { 114 | return strings.TrimRight(left, "/") + "/" + strings.TrimLeft(right, "/") 115 | } 116 | 117 | // Glob enumerates cloud storage objects/file names at a URI and returns a list of objects/ filename URIs that match the given pattern. 118 | // Context can be used to cancel any cloud operations. 119 | // Google Cloud Storage, AWS S3, and local filesystem URIs are supported. Use the correct URI scheme for 120 | // the storage provider (gs://, s3://) or no scheme for local filesystem. 121 | // The pattern is a glob pattern, not a regular expression. 122 | func Glob(ctx context.Context, uri, pattern string) ([]string, error) { 123 | var ( 124 | matches []string 125 | err error 126 | ) 127 | 128 | // The path is a local filesystem path 129 | if !IsCloudURI(uri) { 130 | matches, err = filepath.Glob(filepath.Join(uri, pattern)) 131 | if err != nil { 132 | return nil, err 133 | } 134 | 135 | return matches, nil 136 | } 137 | 138 | u, err := url.Parse(uri) 139 | if err != nil { 140 | return nil, err 141 | } 142 | 143 | b, err := OpenBucket(context.Background(), uri) 144 | if err != nil { 145 | return nil, err 146 | } 147 | 148 | iter := b.List(&blob.ListOptions{Prefix: u.Path}) 149 | 150 | for { 151 | obj, err := iter.Next(ctx) 152 | if errors.Is(err, io.EOF) { 153 | break 154 | } 155 | 156 | if err != nil { 157 | return nil, err 158 | } 159 | 160 | k := filepath.Base(obj.Key) 161 | log.Debugf("Key is %s", k) 162 | 163 | if matched, _ := filepath.Match(pattern, k); matched { 164 | matches = append(matches, u.Scheme+u.Host+"/"+obj.Key) 165 | } 166 | } 167 | log.Debugf("Found %d matches for %s", len(matches), pattern) 168 | 169 | return matches, nil 170 | } 171 | 172 | // ParseBlobURI parses a URI into its scheme, bucket, and key components. 173 | func ParseBlobURI(uri string) (scheme, bucket, key string, err error) { 174 | u, err := url.Parse(uri) 175 | if err != nil { 176 | return "", "", "", err 177 | } 178 | 179 | return u.Scheme, u.Host, strings.TrimLeft(u.Path, "/"), nil 180 | } 181 | 182 | // IsCloudURI returns true if the URI is a cloud storage URI (gs:// or s3://). 183 | // It does so by attempting to parse the URI and checking if the scheme is non-empty. 184 | func IsCloudURI(uri string) bool { 185 | s, _, _, err := ParseBlobURI(uri) 186 | if err != nil { 187 | return false 188 | } 189 | 190 | return s != "" 191 | } 192 | -------------------------------------------------------------------------------- /pkg/mrfparse/cloud/blob_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2023 Daniel Chalef 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | /* 17 | Copyright © 2023 Daniel Chalef 18 | 19 | Licensed under the Apache License, Version 2.0 (the "License"); 20 | you may not use this file except in compliance with the License. 21 | You may obtain a copy of the License at 22 | 23 | http://www.apache.org/licenses/LICENSE-2.0 24 | 25 | Unless required by applicable law or agreed to in writing, software 26 | distributed under the License is distributed on an "AS IS" BASIS, 27 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 28 | See the License for the specific language governing permissions and 29 | limitations under the License. 30 | */ 31 | package cloud 32 | 33 | import ( 34 | "testing" 35 | 36 | "github.com/stretchr/testify/require" 37 | ) 38 | 39 | func TestIsCloudURITrue(t *testing.T) { 40 | var uri = "gs://bucket/path/to/file.json" 41 | var expected = true 42 | var actual = IsCloudURI(uri) 43 | 44 | require.Equal(t, expected, actual) 45 | } 46 | 47 | func TestIsCloudURIFalse(t *testing.T) { 48 | var uri = "/path/to/file.json" 49 | var expected = false 50 | var actual = IsCloudURI(uri) 51 | 52 | require.Equal(t, expected, actual) 53 | } 54 | 55 | func TestParseBlobUriCloud(t *testing.T) { 56 | var uri = "gs://bucket/path/to/file.json" 57 | var exoectedScheme = "gs" 58 | var expectedBucket = "bucket" 59 | var expectedKey = "path/to/file.json" 60 | var actualScheme, actualBucket, actualKey, err = ParseBlobURI(uri) 61 | 62 | require.Nil(t, err) 63 | require.Equal(t, exoectedScheme, actualScheme) 64 | require.Equal(t, expectedBucket, actualBucket) 65 | require.Equal(t, expectedKey, actualKey) 66 | } 67 | 68 | func TestParseBlobUriFS(t *testing.T) { 69 | var uri = "/path/to/file.json" 70 | var exoectedScheme = "" 71 | var expectedBucket = "" 72 | var expectedKey = "path/to/file.json" 73 | var actualScheme, actualBucket, actualKey, err = ParseBlobURI(uri) 74 | 75 | require.Nil(t, err) 76 | require.Equal(t, exoectedScheme, actualScheme) 77 | require.Equal(t, expectedBucket, actualBucket) 78 | require.Equal(t, expectedKey, actualKey) 79 | } 80 | -------------------------------------------------------------------------------- /pkg/mrfparse/http/http.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2023 Daniel Chalef 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package http 17 | 18 | import ( 19 | "errors" 20 | "fmt" 21 | "io" 22 | "net/http" 23 | 24 | "github.com/avast/retry-go/v4" 25 | "github.com/danielchalef/mrfparse/pkg/mrfparse/utils" 26 | "github.com/spf13/viper" 27 | 28 | "time" 29 | ) 30 | 31 | const MaxRetryAttempts = 10 32 | 33 | var log = utils.GetLogger() 34 | 35 | // DownloadFileReader downloads a file from the given URL and returns an io.ReadCloser. 36 | // The caller is responsible for closing the returned io.ReadCloser. 37 | // DownloadFilereader attempts to retry the download if it receives a RetryAfterDelay error. 38 | func DownloadReader(fileURL string) (io.ReadCloser, error) { 39 | var ( 40 | err error 41 | r *http.Response 42 | HTTPTimeOut time.Duration 43 | ) 44 | 45 | HTTPTimeOut = time.Duration(viper.GetInt("pipeline.download_timeout")) * time.Minute 46 | 47 | var httpClient = &http.Client{ 48 | Timeout: HTTPTimeOut, 49 | } 50 | 51 | err = retry.Do(func() error { 52 | r, err = httpClient.Get(fileURL) //nolint:bodyclose // Embedded in retry confusing linter 53 | if err != nil { 54 | return err 55 | } 56 | return nil 57 | }, retry.DelayType(RetryAfterDelay), 58 | retry.Attempts(MaxRetryAttempts), 59 | ) 60 | if err != nil { 61 | r.Body.Close() 62 | 63 | return nil, fmt.Errorf("unable to download file from %s: %s", fileURL, errors.Unwrap(err)) 64 | } 65 | 66 | if r.StatusCode != http.StatusOK { 67 | errorText := fmt.Errorf("bad status downloading %s: %s", fileURL, r.Status) 68 | log.Error(errorText) 69 | 70 | return nil, errorText 71 | } 72 | 73 | return r.Body, nil 74 | } 75 | -------------------------------------------------------------------------------- /pkg/mrfparse/http/http_retry_after.go: -------------------------------------------------------------------------------- 1 | /* 2 | MIT License 3 | 4 | # Copyright (c) 2020 aereal 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | package http 25 | 26 | import ( 27 | "errors" 28 | "fmt" 29 | "net/http" 30 | "strconv" 31 | "time" 32 | 33 | "github.com/avast/retry-go/v4" 34 | ) 35 | 36 | var ( 37 | // ErrNegativeSecondsNotAllowed is parsing error that represents seconds value is negative. 38 | // The seconds value in Retry-After must be positive. 39 | ErrNegativeSecondsNotAllowed = errors.New("negative seconds not allowed") 40 | 41 | // ErrInvalidFormat is parsing error that represents given Retry-After neither valid seconds nor valid HTTP date. 42 | ErrInvalidFormat = errors.New("Retry-After value must be seconds integer or HTTP date string") 43 | ) 44 | 45 | func RetryAfterDelay(n uint, err error, config *retry.Config) time.Duration { 46 | var ( 47 | t time.Time 48 | 49 | e = new(RetryAfterError) 50 | ) 51 | 52 | if errors.As(err, e) { 53 | if t, err = ParseRetryAfter(e.response.Header.Get("Retry-After")); err == nil { 54 | log.Warnf("Got Retry-After header: %s", t) 55 | return time.Until(t) 56 | } 57 | } 58 | 59 | delay := retry.BackOffDelay(n, err, config) 60 | 61 | if n > MaxRetryAttempts/2 { 62 | log.Warnf("Retrying in %s after error %s", delay, err) 63 | } 64 | 65 | return delay 66 | } 67 | 68 | type RetryAfterError struct { 69 | response http.Response 70 | } 71 | 72 | func (err RetryAfterError) Error() string { 73 | return fmt.Sprintf( 74 | "Request to %s fail %s (%d)", 75 | err.response.Request.RequestURI, 76 | err.response.Status, 77 | err.response.StatusCode, 78 | ) 79 | } 80 | 81 | // ParseRetryAfter tries to parse the value as seconds or HTTP date. 82 | func ParseRetryAfter(retryAfter string) (time.Time, error) { 83 | if dur, err := ParseSeconds(retryAfter); err == nil { 84 | now := time.Now() 85 | return now.Add(dur), nil 86 | } 87 | 88 | if dt, err := ParseHTTPDate(retryAfter); err == nil { 89 | return dt, nil 90 | } 91 | 92 | return time.Time{}, ErrInvalidFormat 93 | } 94 | 95 | // ParseSeconds parses the value as seconds. 96 | func ParseSeconds(retryAfter string) (time.Duration, error) { 97 | seconds, err := strconv.ParseInt(retryAfter, 10, 64) 98 | 99 | if err != nil { 100 | return time.Duration(0), err 101 | } 102 | 103 | if seconds < 0 { 104 | return time.Duration(0), ErrNegativeSecondsNotAllowed 105 | } 106 | 107 | return time.Second * time.Duration(seconds), nil 108 | } 109 | 110 | // ParseHTTPDate parses the value as HTTP date. 111 | func ParseHTTPDate(retryAfter string) (time.Time, error) { 112 | parsed, err := time.Parse(time.RFC1123, retryAfter) 113 | if err != nil { 114 | return time.Time{}, err 115 | } 116 | 117 | return parsed, nil 118 | } 119 | -------------------------------------------------------------------------------- /pkg/mrfparse/http/http_retry_after_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | MIT License 3 | 4 | # Copyright (c) 2020 aereal 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | package http 25 | 26 | import ( 27 | "reflect" 28 | "testing" 29 | "time" 30 | ) 31 | 32 | func TestParseSeconds(t *testing.T) { 33 | cases := []struct { 34 | name string 35 | args string 36 | want time.Duration 37 | wantErr bool 38 | }{ 39 | {"ok", "60", time.Minute * 1, false}, 40 | {"invalid", "", time.Duration(0), true}, 41 | {"negative", "-10", time.Duration(0), true}, 42 | } 43 | for _, c := range cases { 44 | t.Run(c.name, func(t *testing.T) { 45 | got, err := ParseSeconds(c.args) 46 | if (err != nil) != c.wantErr { 47 | t.Errorf("error = %v, wantErr %v", err, c.wantErr) 48 | return 49 | } 50 | if !reflect.DeepEqual(got, c.want) { 51 | t.Errorf("got = %v, want = %v", got, c.want) 52 | } 53 | }) 54 | } 55 | } 56 | 57 | func TestParseHTTPDate(t *testing.T) { 58 | now := time.Now() 59 | // orig := nowFunc 60 | // nowFunc = func() time.Time { return now } 61 | // defer func() { 62 | // nowFunc = orig 63 | // }() 64 | aMinuteLater := now.Add(time.Minute) 65 | 66 | cases := []struct { 67 | name string 68 | args string 69 | want time.Time 70 | wantErr bool 71 | }{ 72 | {"ok", aMinuteLater.Format(time.RFC1123), aMinuteLater, false}, 73 | {"invalid format", "2020-01-02", time.Time{}, true}, 74 | } 75 | for _, c := range cases { 76 | t.Run(c.name, func(t *testing.T) { 77 | got, err := ParseHTTPDate(c.args) 78 | if (err != nil) != c.wantErr { 79 | t.Errorf("error = %v, wantErr %v", err, c.wantErr) 80 | return 81 | } 82 | if got.Unix() != c.want.Unix() { 83 | t.Errorf("got = %s, want %s", got, c.want) 84 | } 85 | }) 86 | } 87 | } 88 | 89 | func TestParse(t *testing.T) { 90 | now := time.Now() 91 | aMinuteLater := now.Add(time.Minute) 92 | 93 | cases := []struct { 94 | name string 95 | args string 96 | want time.Time 97 | wantErr bool 98 | }{ 99 | {"seconds/ok", "60", aMinuteLater, false}, 100 | {"http date/ok", aMinuteLater.Format(time.RFC1123), aMinuteLater, false}, 101 | {"invalid", "", time.Time{}, true}, 102 | } 103 | for _, c := range cases { 104 | t.Run(c.name, func(t *testing.T) { 105 | got, err := ParseRetryAfter(c.args) 106 | if (err != nil) != c.wantErr { 107 | t.Errorf("error = %v, wantErr %v", err, c.wantErr) 108 | return 109 | } 110 | if got.Unix() != c.want.Unix() { 111 | t.Errorf("got = %s, want %s", got, c.want) 112 | } 113 | }) 114 | } 115 | } 116 | -------------------------------------------------------------------------------- /pkg/mrfparse/models/mrf.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2023 Daniel Chalef 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package models 17 | 18 | type ServiceCodes []string 19 | type BillingCodeModifiers []string 20 | type ProviderReferences []string 21 | type NpiList []int64 22 | 23 | // We use plain encoding for all fields to increase compatibility with parquetlibraries. 24 | type Mrf struct { 25 | MrfRoot 26 | InNetwork 27 | 28 | BundledCodes 29 | Tin 30 | ProviderGroup 31 | UUID string `parquet:"uuid,plain"` 32 | ParentUUID string `parquet:"parent_uuid,plain"` 33 | RecordType string `parquet:"record_type,enum,plain"` 34 | 35 | Provider 36 | 37 | NegotiatedRate 38 | 39 | NegotiatedPrices 40 | } 41 | 42 | type MrfRoot struct { 43 | ReportingEntityName string `json:"reporting_entity_name" parquet:"reporting_entity_name,plain"` 44 | ReportingEntityType string `json:"reporting_entity_type" parquet:"reporting_entity_type,plain"` 45 | LastUpdatedOn string `json:"last_updated_on" parquet:"last_updated_on,plain"` 46 | Version string `json:"version" parquet:"version,plain"` 47 | PlanMarketType string `json:"plan_market_type,omitempty" parquet:"plan_market_type,enum,plain"` 48 | PlanName string `json:"plan_name,omitempty" parquet:"plan_name,plain"` 49 | PlanIDType string `json:"plan_id_type,omitempty" parquet:"plan_id_type,plain"` 50 | PlanID string `json:"plan_id,omitempty" parquet:"plan_id,plain"` 51 | } 52 | 53 | type ProviderGroup struct { 54 | ProviderGroupID string `parquet:"provider_group_id,enum,plain"` 55 | } 56 | 57 | type Provider struct { 58 | Parent string `parquet:"provider_parent,plain"` 59 | NpiList NpiList `parquet:"provider_npi_list,list,plain"` 60 | } 61 | 62 | type Tin struct { 63 | Value string `parquet:"provider_tin_value,plain"` 64 | TinType string `parquet:"provider_tin_type,enum,plain"` 65 | } 66 | 67 | type InNetwork struct { 68 | Name string `parquet:"in_name,plain"` 69 | Description string `parquet:"in_description,plain"` 70 | NegotiationArrangement string `parquet:"in_negotiation_arrangement,enum,plain"` 71 | BillingCodeType string `parquet:"in_billing_code_type,enum,plain"` 72 | BillingCode string `parquet:"in_billing_code,plain"` 73 | BillingCodeTypeVersion string `parquet:"in_billing_code_type_version,plain"` 74 | } 75 | 76 | type BundledCodes struct { 77 | BCDescription string `parquet:"in_bc_description,plain"` 78 | BCBillingCodeType string `parquet:"in_bc_billing_code_type,enum,plain"` 79 | BCBillingCode string `parquet:"in_bc_billing_code,plain"` 80 | NCBillingCodeTypeVersion string `parquet:"in_bc_billing_code_type_version,plain"` 81 | } 82 | 83 | type NegotiatedPrices struct { 84 | NegotiatedType string `parquet:"in_np_negotiated_type,enum,plain"` 85 | BillingClass string `parquet:"in_np_billing_class,plain"` 86 | ExpirationDate string `parquet:"in_np_expiration_date,plain"` 87 | AdditionalInformation string `parquet:"in_np_additional_information,plain"` 88 | ServiceCodes ServiceCodes `parquet:"in_np_service_codes,list,plain"` 89 | BillingCodeModifiers BillingCodeModifiers `parquet:"in_np_billing_code_modifiers,list,plain"` 90 | NegotiatedRateValue float64 `parquet:"in_np_negotiated_rate,plain"` 91 | } 92 | 93 | type NegotiatedRate struct { 94 | PRList ProviderReferences `parquet:"in_nr_provider_references,list,plain"` 95 | } 96 | -------------------------------------------------------------------------------- /pkg/mrfparse/mrf/errors.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2023 Daniel Chalef 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package mrf 17 | 18 | import "fmt" 19 | 20 | type NotInListError struct { 21 | item string 22 | } 23 | 24 | func (e *NotInListError) Error() string { 25 | return fmt.Sprintf("%s is not in list", e.item) 26 | } 27 | -------------------------------------------------------------------------------- /pkg/mrfparse/mrf/in_network_rates.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2023 Daniel Chalef 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package mrf 17 | 18 | import ( 19 | "bufio" 20 | "context" 21 | "fmt" 22 | "io" 23 | "strings" 24 | 25 | "github.com/danielchalef/mrfparse/pkg/mrfparse/cloud" 26 | "github.com/danielchalef/mrfparse/pkg/mrfparse/models" 27 | "github.com/danielchalef/mrfparse/pkg/mrfparse/utils" 28 | 29 | "github.com/minio/simdjson-go" 30 | ) 31 | 32 | func parseInNetworkRates(filename, rootUUUID string, serviceList StringSet) { 33 | const LinesAtATime int = 100 34 | 35 | var line string 36 | var lineCount = 0 37 | var totalLineCount = 0 38 | 39 | var strBuilder strings.Builder 40 | 41 | log.Info("Parsing in_network_rates: ", filename) 42 | 43 | f, err := cloud.NewReader(context.TODO(), filename) 44 | utils.ExitOnError(err) 45 | 46 | defer func(f io.ReadCloser) { 47 | err := f.Close() 48 | if err != nil { 49 | utils.ExitOnError(err) 50 | } 51 | }(f) 52 | 53 | scanner := bufio.NewScanner(f) 54 | 55 | buf := make([]byte, LineBuffer) 56 | scanner.Buffer(buf, MaxLineBuffer) 57 | 58 | for scanner.Scan() { 59 | line = scanner.Text() 60 | strBuilder.WriteString(line) 61 | strBuilder.WriteString("\n") 62 | 63 | if lineCount == LinesAtATime { 64 | lines := strBuilder.String() 65 | 66 | inPoolGroup.Submit(func() { 67 | parseInLines(&lines, rootUUUID, serviceList) 68 | }) 69 | 70 | lineCount = 0 71 | 72 | strBuilder.Reset() 73 | } else { 74 | lineCount++ 75 | } 76 | 77 | if totalLineCount%200 == 0 { 78 | log.Debug("Read ", totalLineCount, " lines") 79 | } 80 | 81 | totalLineCount++ 82 | } 83 | 84 | if err := scanner.Err(); err != nil { 85 | utils.ExitOnError(err) 86 | } 87 | 88 | if lineCount > 0 { 89 | log.Debug("Read ", totalLineCount, " lines") 90 | lines := strBuilder.String() 91 | 92 | inPoolGroup.Submit(func() { 93 | parseInLines(&lines, rootUUUID, serviceList) 94 | }) 95 | } 96 | 97 | log.Info("Completed reading negotiated_rates: ", filename) 98 | } 99 | 100 | func parseInLines(lines *string, rootUUID string, serviceList StringSet) { 101 | parsed, err := utils.ParseJSON(lines, nil) 102 | utils.ExitOnError(err) 103 | 104 | var iter = parsed.Iter() 105 | var tmpIter *simdjson.Iter 106 | 107 | var mrfList []*models.Mrf 108 | 109 | for { 110 | typ := iter.Advance() 111 | 112 | if typ == simdjson.TypeRoot { 113 | _, tmpIter, err = iter.Root(nil) 114 | utils.ExitOnError(err) 115 | 116 | _, err := tmpIter.FindElement(nil, "covered_services") 117 | if err == nil { 118 | // This is a covered_services record. We don't yet support these. 119 | utils.ExitOnError(fmt.Errorf("covered_services records are not supported")) 120 | } 121 | 122 | // Parse in_network_rates object 123 | mrfList, err = parseInObject(tmpIter, rootUUID, serviceList) 124 | // if we get a NotInListError, skip this record as it's not in the serviceList 125 | if e, ok := err.(*NotInListError); ok { 126 | log.Tracef("Skipping in_network_rates record. %s", e.Error()) 127 | continue 128 | } 129 | 130 | // if it's another error, exit 131 | utils.ExitOnError(err) 132 | 133 | err = WriteRecords(mrfList) 134 | utils.ExitOnError(err) 135 | } else if typ == simdjson.TypeNone { 136 | break 137 | } 138 | } 139 | } 140 | 141 | func parseInObject(iter *simdjson.Iter, rootUUID string, serviceList StringSet) ([]*models.Mrf, error) { 142 | var ( 143 | err error 144 | mrf *models.Mrf 145 | mrfList, mrfListTmp []*models.Mrf 146 | inUUID string 147 | ) 148 | 149 | // Parse the root of the in_network_rates record 150 | mrf, err = parseInRoot(iter, rootUUID, serviceList) 151 | if err != nil { 152 | return nil, err 153 | } 154 | 155 | inUUID = mrf.UUID 156 | 157 | mrfList = append(mrfList, mrf) 158 | 159 | // Parse bundled_codes, if present 160 | mrfListTmp, err = parseBundledCodes(iter, inUUID) 161 | if err != nil { 162 | return nil, err 163 | } 164 | log.Debug("Got bundled_codes: ", len(mrfListTmp), " records") 165 | 166 | mrfList = append(mrfList, mrfListTmp...) 167 | 168 | // Parse negotiated_rates 169 | mrfListTmp, err = parseNegotiatedRates(iter, inUUID) 170 | if err != nil { 171 | return nil, err 172 | } 173 | 174 | log.Debug("Got negotiated_rates: ", len(mrfListTmp), " records") 175 | 176 | mrfList = append(mrfList, mrfListTmp...) 177 | 178 | return mrfList, nil 179 | } 180 | 181 | func parseBundledCodes(iter *simdjson.Iter, inUUID string) ([]*models.Mrf, error) { 182 | var mrfList []*models.Mrf 183 | 184 | path := "bundled_codes" 185 | bc, err := iter.FindElement(nil, path) 186 | 187 | // If bundled_codes is not present, return an empty list 188 | // bundled_codes is optional 189 | if utils.TestElementNotPresent(err, path) { 190 | return mrfList, nil 191 | } 192 | 193 | bcIter := bc.Iter 194 | 195 | for { 196 | typ := bcIter.Advance() 197 | 198 | if typ == simdjson.TypeObject { 199 | bcUUID := utils.GetUniqueID() 200 | 201 | bcType, err := utils.GetElementValue[string]("billing_code_type", &bcIter) 202 | if err != nil { 203 | return nil, err 204 | } 205 | 206 | bcCode, err := utils.GetElementValue[string]("billing_code", &bcIter) 207 | if err != nil { 208 | return nil, err 209 | } 210 | 211 | bcTypeVersion, err := utils.GetElementValue[string]("billing_code_type_version", &bcIter) 212 | if err != nil { 213 | return nil, err 214 | } 215 | 216 | path = "description" 217 | bcDescription, err := utils.GetElementValue[string]("description", &bcIter) 218 | 219 | r := utils.TestElementNotPresent(err, path) 220 | if r { 221 | bcDescription = "" 222 | } 223 | 224 | mrfList = append(mrfList, 225 | &models.Mrf{UUID: bcUUID, ParentUUID: inUUID, RecordType: "bundled_codes", 226 | BundledCodes: models.BundledCodes{BCBillingCodeType: bcType, BCBillingCode: bcCode, 227 | NCBillingCodeTypeVersion: bcTypeVersion, BCDescription: bcDescription}}) 228 | } else if typ == simdjson.TypeNone { 229 | break 230 | } 231 | } 232 | 233 | return mrfList, nil 234 | } 235 | 236 | func parseNegotiatedRates(iter *simdjson.Iter, inUUID string) ([]*models.Mrf, error) { 237 | const prParent = "negotiated_rates" 238 | 239 | var ( 240 | err error 241 | mrfList, npMrfList, prMrfList []*models.Mrf 242 | nr *simdjson.Array 243 | neIter simdjson.Iter 244 | uuid string 245 | pr []string 246 | ) 247 | 248 | nr, err = utils.GetArrayForElement("negotiated_rates", iter) 249 | if err != nil { 250 | return nil, err 251 | } 252 | 253 | neIter = nr.Iter() 254 | 255 | for { 256 | typ := neIter.Advance() 257 | 258 | if typ == simdjson.TypeObject { 259 | uuid = utils.GetUniqueID() 260 | 261 | // Parse negotiated_prices 262 | npMrfList, err = parseNegotiatedPrices(&neIter, uuid) 263 | if err != nil { 264 | return nil, err 265 | } 266 | 267 | mrfList = append(mrfList, npMrfList...) 268 | 269 | // We should have one of provider_references or provider_groups 270 | pr, err = utils.GetArrayElementAsSlice[string]("provider_references", &neIter) 271 | // if provider_references is missing, parse provider_groups 272 | if utils.TestElementNotPresent(err, "provider_references") { 273 | log.Trace("provider_references not present, parsing provider_groups") 274 | // Add a record to capture the NR / parent relationship 275 | mrfList = append(mrfList, &models.Mrf{UUID: uuid, ParentUUID: inUUID}) 276 | 277 | prMrfList, err = parseProviderGroups(&neIter, uuid, prParent) 278 | if err != nil { 279 | return nil, err 280 | } 281 | 282 | mrfList = append(mrfList, prMrfList...) 283 | } else { 284 | // if provider_references not missing, add to providersFilter and write record 285 | providersFilter.Add(pr...) 286 | 287 | mrfList = append(mrfList, &models.Mrf{UUID: uuid, ParentUUID: inUUID, RecordType: "negotiated_rate", 288 | NegotiatedRate: models.NegotiatedRate{PRList: pr}}) 289 | } 290 | } else if typ == simdjson.TypeNone { 291 | break 292 | } 293 | } 294 | 295 | return mrfList, nil 296 | } 297 | 298 | // parseNegotiatedPrices parses the negotiated_prices array, and returns a list of MRFs. 299 | func parseNegotiatedPrices(iter *simdjson.Iter, nrUUID string) ([]*models.Mrf, error) { 300 | var ( 301 | err error 302 | np *simdjson.Array 303 | mrfList []*models.Mrf 304 | scs []string 305 | bcs []string 306 | ai, t string 307 | uuid, path string 308 | ) 309 | 310 | np, err = utils.GetArrayForElement("negotiated_prices", iter) 311 | if err != nil { 312 | return nil, err 313 | } 314 | 315 | npIter := np.Iter() 316 | 317 | for { 318 | typ := npIter.Advance() 319 | if typ == simdjson.TypeObject { 320 | uuid = utils.GetUniqueID() 321 | 322 | t, err = utils.GetElementValue[string]("negotiated_type", &npIter) 323 | if err != nil { 324 | return nil, err 325 | } 326 | 327 | bc, err := utils.GetElementValue[string]("billing_class", &npIter) 328 | if err != nil { 329 | return nil, err 330 | } 331 | 332 | ed, err := utils.GetElementValue[string]("expiration_date", &npIter) 333 | if err != nil { 334 | return nil, err 335 | } 336 | 337 | nr, err := utils.GetElementValue[float64]("negotiated_rate", &npIter) 338 | if err != nil { 339 | return nil, err 340 | } 341 | 342 | path = "additional_information" 343 | ai, err = utils.GetElementValue[string](path, &npIter) 344 | if utils.TestElementNotPresent(err, path) { 345 | ai = "" 346 | } else if err != nil { 347 | return nil, err 348 | } 349 | 350 | scs, err = parseNPServiceCodes(&npIter, bc) 351 | if err != nil { 352 | return nil, err 353 | } 354 | 355 | path = "billing_code_modifier" 356 | bcs, err = utils.GetArrayElementAsSlice[string](path, &npIter) 357 | if utils.TestElementNotPresent(err, path) { 358 | bcs = []string{} 359 | } else if err != nil { 360 | return nil, err 361 | } 362 | 363 | mrfList = append(mrfList, &models.Mrf{UUID: uuid, ParentUUID: nrUUID, RecordType: "negotiated_prices", 364 | NegotiatedPrices: models.NegotiatedPrices{NegotiatedType: t, BillingClass: bc, ExpirationDate: ed, 365 | NegotiatedRateValue: nr, AdditionalInformation: ai, ServiceCodes: scs, 366 | BillingCodeModifiers: bcs}}) 367 | } else if typ == simdjson.TypeNone { 368 | break 369 | } 370 | } 371 | 372 | return mrfList, nil 373 | } 374 | 375 | // parseNPServiceCodes parses Negotiated Prices service_codes, which should be present if billing_class is professional 376 | // Returns an empty slice if billing_class is not professional 377 | func parseNPServiceCodes(iter *simdjson.Iter, billingClass string) ([]string, error) { 378 | var scs []string 379 | var err error 380 | 381 | scs, err = utils.GetArrayElementAsSlice[string]("service_code", iter) 382 | 383 | if billingClass == "professional" && utils.TestElementNotPresent(err, "service_code") { 384 | return nil, fmt.Errorf("service_code is missing from negotiated_prices for billing_class == professional") 385 | } else if err != nil && !utils.TestElementNotPresent(err, "service_code") { 386 | return nil, err 387 | } 388 | 389 | return scs, nil 390 | } 391 | 392 | // isServiceInList gets the billing_code_type and code and determines if the service is in serviceList 393 | func isServiceInList(tmpIter *simdjson.Iter, serviceList StringSet) (billingCodeType, billingCode string, ok bool) { 394 | bct, err := utils.GetElementValue[string]("billing_code_type", tmpIter) 395 | if err != nil { 396 | utils.ExitOnError(err) 397 | } 398 | 399 | bc, err := utils.GetElementValue[string]("billing_code", tmpIter) 400 | if err != nil { 401 | utils.ExitOnError(err) 402 | } 403 | 404 | return bct, bc, ((bct == "HCPCS" || bct == "CPT") && serviceList.Contains(bc)) 405 | } 406 | 407 | // parseInRoot parses the root of the in_network file, returning an Mrf record. 408 | // If the service is not in the serviceList, it returns a NotInServiceListError 409 | func parseInRoot(iter *simdjson.Iter, rootUUID string, serviceList StringSet) (*models.Mrf, error) { 410 | var uuid = utils.GetUniqueID() 411 | 412 | // Get the billing_code_type and code and determine if in serviceList 413 | inBillingCodeType, inBillingCode, ok := isServiceInList(iter, serviceList) 414 | if !ok { 415 | // This is not a service we care about. Skip it. 416 | return nil, &NotInListError{inBillingCode} 417 | } 418 | 419 | log.Tracef("Found service %s %s", inBillingCodeType, inBillingCode) 420 | 421 | name, err := utils.GetElementValue[string]("name", iter) 422 | if err != nil { 423 | return nil, err 424 | } 425 | 426 | bcv, err := utils.GetElementValue[string]("billing_code_type_version", iter) 427 | if err != nil { 428 | return nil, err 429 | } 430 | 431 | na, err := utils.GetElementValue[string]("negotiation_arrangement", iter) 432 | if err != nil { 433 | return nil, err 434 | } 435 | 436 | path := "description" 437 | desc, err := utils.GetElementValue[string](path, iter) 438 | r := utils.TestElementNotPresent(err, path) 439 | // some carriers don't have a description, despite it being a required field 440 | if r { 441 | desc = "" 442 | } 443 | 444 | return &models.Mrf{UUID: uuid, ParentUUID: rootUUID, RecordType: "in_network", 445 | InNetwork: models.InNetwork{Name: name, BillingCodeTypeVersion: bcv, NegotiationArrangement: na, 446 | Description: desc, BillingCodeType: inBillingCodeType, BillingCode: inBillingCode}}, nil 447 | } 448 | -------------------------------------------------------------------------------- /pkg/mrfparse/mrf/in_network_rates_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2023 Daniel Chalef 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package mrf 17 | 18 | import ( 19 | "testing" 20 | 21 | "github.com/danielchalef/mrfparse/pkg/mrfparse/models" 22 | "github.com/danielchalef/mrfparse/pkg/mrfparse/utils" 23 | 24 | mapset "github.com/deckarep/golang-set/v2" 25 | "github.com/stretchr/testify/assert" 26 | ) 27 | 28 | func TestParseInObject(t *testing.T) { 29 | var j = []byte(`{ 30 | "negotiation_arrangement": "ffs", 31 | "name": "INJECTION, TRASTUZUMAB-QYYP, BIOSIMILAR, (TRAZIMERA), 10 MG", 32 | "billing_code_type": "HCPCS", 33 | "billing_code_type_version": "2022", 34 | "billing_code": "Q5116", 35 | "negotiated_rates": [ 36 | { 37 | "provider_references": [62.0004808658], 38 | "negotiated_prices": [ 39 | { 40 | "negotiated_type": "fee schedule", 41 | "negotiated_rate": 45.48, 42 | "expiration_date": "9999-12-31", 43 | "service_code": ["11", "22"], 44 | "billing_class": "professional" 45 | } 46 | ] 47 | }, 48 | { 49 | "provider_references": [62.0000565525], 50 | "negotiated_prices": [ 51 | { 52 | "negotiated_type": "fee schedule", 53 | "negotiated_rate": 69.14, 54 | "expiration_date": "9999-12-31", 55 | "service_code": ["22"], 56 | "billing_class": "institutional" 57 | } 58 | ] 59 | } 60 | ] 61 | }`) 62 | 63 | jp, err := utils.ParseJSON(&j, nil) 64 | assert.NoError(t, err) 65 | 66 | iter := jp.Iter() 67 | 68 | serviceList := mapset.NewSet("Q5116") 69 | 70 | mrfList, err := parseInObject(&iter, "rootUUID", serviceList) 71 | assert.NoError(t, err) 72 | 73 | mrf := mrfList[0] 74 | 75 | assert.Equal(t, "ffs", mrf.NegotiationArrangement) 76 | assert.Equal(t, "INJECTION, TRASTUZUMAB-QYYP, BIOSIMILAR, (TRAZIMERA), 10 MG", mrf.Name) 77 | assert.Equal(t, "HCPCS", mrf.BillingCodeType) 78 | assert.Equal(t, "2022", mrf.BillingCodeTypeVersion) 79 | assert.Equal(t, "Q5116", mrf.BillingCode) 80 | assert.Equal(t, "rootUUID", mrf.ParentUUID) 81 | 82 | prMrf := utils.Filter(mrfList, func(mrf *models.Mrf) bool { 83 | return mrf.RecordType == "negotiated_rate" 84 | }) 85 | 86 | assert.Equal(t, 2, len(prMrf)) 87 | assert.Equal(t, models.ProviderReferences{"62.0004808658"}, prMrf[0].PRList) 88 | assert.Equal(t, models.ProviderReferences{"62.0000565525"}, prMrf[1].PRList) 89 | 90 | npMrf := utils.Filter(mrfList, func(mrf *models.Mrf) bool { 91 | return mrf.RecordType == "negotiated_prices" 92 | }) 93 | 94 | assert.Equal(t, 2, len(npMrf)) 95 | assert.Equal(t, "fee schedule", npMrf[0].NegotiatedType) 96 | assert.Equal(t, 45.48, npMrf[0].NegotiatedRateValue) 97 | assert.Equal(t, "9999-12-31", npMrf[0].ExpirationDate) 98 | assert.Equal(t, models.ServiceCodes{"11", "22"}, npMrf[0].ServiceCodes) 99 | assert.Equal(t, "professional", npMrf[0].BillingClass) 100 | assert.Equal(t, "fee schedule", npMrf[1].NegotiatedType) 101 | assert.Equal(t, 69.14, npMrf[1].NegotiatedRateValue) 102 | assert.Equal(t, "9999-12-31", npMrf[1].ExpirationDate) 103 | assert.Equal(t, models.ServiceCodes{"22"}, npMrf[1].ServiceCodes) 104 | assert.Equal(t, "institutional", npMrf[1].BillingClass) 105 | assert.Equal(t, prMrf[0].UUID, npMrf[0].ParentUUID) 106 | assert.Equal(t, prMrf[1].UUID, npMrf[1].ParentUUID) 107 | 108 | } 109 | 110 | // test isServiceInList 111 | func TestIsServiceInListCPT(t *testing.T) { 112 | var j = []byte(`{ 113 | "negotiation_arrangement": "ffs", 114 | "name": "REV 204 & ICD10DX F12.10", 115 | "billing_code_type": "CPT", 116 | "billing_code_type_version": "2021", 117 | "billing_code": "2021", 118 | "description": "REV 204 & ICD10DX F12.10"}`) 119 | 120 | jp, err := utils.ParseJSON(&j, nil) 121 | assert.NoError(t, err) 122 | 123 | iter := jp.Iter() 124 | 125 | serviceList := mapset.NewSet("2025", "2021", "53") 126 | 127 | bt, bc, ok := isServiceInList(&iter, serviceList) 128 | assert.Equal(t, true, ok) 129 | assert.Equal(t, "CPT", bt) 130 | assert.Equal(t, "2021", bc) 131 | 132 | serviceList = mapset.NewSet("1", "2", "3") 133 | 134 | bt, bc, ok = isServiceInList(&iter, serviceList) 135 | assert.Equal(t, false, ok) 136 | assert.Equal(t, "CPT", bt) 137 | assert.Equal(t, "2021", bc) 138 | } 139 | 140 | func TestIsServiceInListHCPCS(t *testing.T) { 141 | var j = []byte(`{ 142 | "negotiation_arrangement": "ffs", 143 | "name": "REV 204 & ICD10DX F12.10", 144 | "billing_code_type": "HCPCS", 145 | "billing_code_type_version": "2021", 146 | "billing_code": "2021", 147 | "description": "REV 204 & ICD10DX F12.10"}`) 148 | 149 | jp, err := utils.ParseJSON(&j, nil) 150 | assert.NoError(t, err) 151 | 152 | iter := jp.Iter() 153 | 154 | serviceList := mapset.NewSet("2025", "2021", "53") 155 | 156 | bt, bc, ok := isServiceInList(&iter, serviceList) 157 | assert.Equal(t, true, ok) 158 | assert.Equal(t, "HCPCS", bt) 159 | assert.Equal(t, "2021", bc) 160 | 161 | serviceList = mapset.NewSet("1", "2", "3") 162 | 163 | bt, bc, ok = isServiceInList(&iter, serviceList) 164 | assert.Equal(t, false, ok) 165 | assert.Equal(t, "HCPCS", bt) 166 | assert.Equal(t, "2021", bc) 167 | } 168 | 169 | // Test parseInRoot 170 | func TestParseInRoot(t *testing.T) { 171 | var mrf *models.Mrf 172 | var j = []byte(`{ 173 | "negotiation_arrangement": "ffs", 174 | "name": "REV 204", 175 | "billing_code_type": "CPT", 176 | "billing_code_type_version": "1.0", 177 | "billing_code": "999", 178 | "description": "REV 204 & ICD10DX F12.10"}`) 179 | 180 | jp, err := utils.ParseJSON(&j, nil) 181 | assert.NoError(t, err) 182 | 183 | iter := jp.Iter() 184 | 185 | serviceList := mapset.NewSet("2025", "999", "53") 186 | 187 | rootUUID := "1234" 188 | 189 | mrf, err = parseInRoot(&iter, rootUUID, serviceList) 190 | 191 | assert.NoError(t, err) 192 | assert.Equal(t, "REV 204", mrf.Name) 193 | assert.Equal(t, "CPT", mrf.InNetwork.BillingCodeType) 194 | assert.Equal(t, "1.0", mrf.InNetwork.BillingCodeTypeVersion) 195 | assert.Equal(t, "999", mrf.InNetwork.BillingCode) 196 | assert.Equal(t, "REV 204 & ICD10DX F12.10", mrf.InNetwork.Description) 197 | assert.Equal(t, "ffs", mrf.NegotiationArrangement) 198 | assert.Equal(t, "1234", mrf.ParentUUID) 199 | assert.Equal(t, "in_network", mrf.RecordType) 200 | } 201 | 202 | func TestParseInRootNotInServiceListError(t *testing.T) { 203 | var j = []byte(`{ 204 | "negotiation_arrangement": "ffs", 205 | "name": "REV 204", 206 | "billing_code_type": "CPT", 207 | "billing_code_type_version": "1.0", 208 | "billing_code": "888", 209 | "description": "REV 204 & ICD10DX F12.10"}`) 210 | 211 | jp, err := utils.ParseJSON(&j, nil) 212 | assert.NoError(t, err) 213 | 214 | iter := jp.Iter() 215 | 216 | serviceList := mapset.NewSet("2025", "999", "53") 217 | 218 | rootUUID := "1234" 219 | 220 | _, err = parseInRoot(&iter, rootUUID, serviceList) 221 | 222 | _, ok := err.(*NotInListError) 223 | 224 | assert.Equal(t, true, ok) 225 | } 226 | 227 | func TestParseBundledCodes(t *testing.T) { 228 | var j = []byte(`{"bundled_codes":[ 229 | {"billing_code_type":"RC","billing_code_type_version":"2022","billing_code":"0636","description":"DRUGS REQUIRING DETAILED CODING"}, 230 | {"billing_code_type":"YC","billing_code_type_version":"2021","billing_code":"0450","description":"EMERGENCY ROOM GENERAL CLASSIFICATION"} 231 | ]}`) 232 | 233 | jp, err := utils.ParseJSON(&j, nil) 234 | assert.NoError(t, err) 235 | 236 | iter := jp.Iter() 237 | 238 | mrfList, err := parseBundledCodes(&iter, "987978hjh") 239 | assert.NoError(t, err) 240 | 241 | assert.Equal(t, 2, len(mrfList)) 242 | assert.Equal(t, "987978hjh", mrfList[0].ParentUUID) 243 | assert.Equal(t, "987978hjh", mrfList[1].ParentUUID) 244 | assert.Equal(t, "RC", mrfList[0].BundledCodes.BCBillingCodeType) 245 | assert.Equal(t, "YC", mrfList[1].BundledCodes.BCBillingCodeType) 246 | assert.Equal(t, "2022", mrfList[0].BundledCodes.NCBillingCodeTypeVersion) 247 | assert.Equal(t, "2021", mrfList[1].BundledCodes.NCBillingCodeTypeVersion) 248 | assert.Equal(t, "0636", mrfList[0].BundledCodes.BCBillingCode) 249 | assert.Equal(t, "0450", mrfList[1].BundledCodes.BCBillingCode) 250 | assert.Equal(t, "DRUGS REQUIRING DETAILED CODING", mrfList[0].BundledCodes.BCDescription) 251 | assert.Equal(t, "EMERGENCY ROOM GENERAL CLASSIFICATION", mrfList[1].BundledCodes.BCDescription) 252 | } 253 | 254 | func TestParseBundledCodesMissing(t *testing.T) { 255 | var j = []byte(`{"field":"value"}`) 256 | 257 | jp, err := utils.ParseJSON(&j, nil) 258 | assert.NoError(t, err) 259 | 260 | iter := jp.Iter() 261 | 262 | mrfList, err := parseBundledCodes(&iter, "987978hjh") 263 | assert.NoError(t, err) 264 | 265 | assert.Equal(t, 0, len(mrfList)) 266 | } 267 | 268 | func TestParseNeServiceCodesProfessional(t *testing.T) { 269 | var j = []byte(`{"negotiated_type":"fee schedule","negotiated_rate":410.82,"expiration_date":"9999-12-31", 270 | "service_code":["23","41","26","21","52","42","24","22","56","31","51","53","61","19","34"],"billing_class":"professional"}`) 271 | 272 | var scsExpected = []string{"23", "41", "26", "21", "52", "42", "24", "22", "56", "31", "51", "53", "61", "19", "34"} 273 | 274 | jp, err := utils.ParseJSON(&j, nil) 275 | assert.NoError(t, err) 276 | 277 | iter := jp.Iter() 278 | 279 | scs, err := parseNPServiceCodes(&iter, "professional") 280 | assert.NoError(t, err) 281 | 282 | assert.Equal(t, 15, len(scs)) 283 | assert.Equal(t, scsExpected, scs) 284 | } 285 | 286 | func TestParseNeServiceCodesProfessionalMissing(t *testing.T) { 287 | var j = []byte(`{"negotiated_type":"fee schedule","negotiated_rate":410.82,"expiration_date":"9999-12-31", 288 | "billing_class":"professional"}`) 289 | 290 | jp, err := utils.ParseJSON(&j, nil) 291 | assert.NoError(t, err) 292 | 293 | iter := jp.Iter() 294 | 295 | _, err = parseNPServiceCodes(&iter, "professional") 296 | assert.ErrorContains(t, err, "service_code is missing") 297 | } 298 | 299 | func TestParseNeServiceCodesInstitutional(t *testing.T) { 300 | var j = []byte(`{"negotiated_type":"fee schedule","negotiated_rate":410.82,"expiration_date":"9999-12-31", 301 | "billing_class":"institutional"}`) 302 | 303 | jp, err := utils.ParseJSON(&j, nil) 304 | assert.NoError(t, err) 305 | 306 | iter := jp.Iter() 307 | 308 | scs, err := parseNPServiceCodes(&iter, "institutional") 309 | assert.NoError(t, err) 310 | assert.Nil(t, scs) 311 | } 312 | 313 | func TestParseNegotiatedPrices(t *testing.T) { 314 | var j = []byte(`{ 315 | "provider_references": [62.0000005757], 316 | "negotiated_prices": [ 317 | { 318 | "negotiated_type": "fee schedule", 319 | "negotiated_rate": 34.54, 320 | "expiration_date": "9999-12-31", 321 | "service_code": ["81", "11", "22"], 322 | "billing_class": "professional" 323 | }, 324 | { 325 | "negotiated_type": "fee schedule", 326 | "negotiated_rate": 14.8, 327 | "expiration_date": "9999-12-31", 328 | "service_code": ["67", "55", "44"], 329 | "billing_code_modifier": ["12", "14"], 330 | "billing_class": "institutional" 331 | }]}`) 332 | 333 | jp, err := utils.ParseJSON(&j, nil) 334 | assert.NoError(t, err) 335 | 336 | iter := jp.Iter() 337 | 338 | mrf, err := parseNegotiatedPrices(&iter, "nrUUID") 339 | assert.NoError(t, err) 340 | 341 | assert.Equal(t, 2, len(mrf)) 342 | assert.Equal(t, "nrUUID", mrf[0].ParentUUID) 343 | assert.Equal(t, "nrUUID", mrf[1].ParentUUID) 344 | assert.Equal(t, "fee schedule", mrf[0].NegotiatedPrices.NegotiatedType) 345 | assert.Equal(t, "fee schedule", mrf[1].NegotiatedPrices.NegotiatedType) 346 | assert.Equal(t, 34.54, mrf[0].NegotiatedPrices.NegotiatedRateValue) 347 | assert.Equal(t, 14.8, mrf[1].NegotiatedPrices.NegotiatedRateValue) 348 | assert.Equal(t, "9999-12-31", mrf[0].NegotiatedPrices.ExpirationDate) 349 | assert.Equal(t, "9999-12-31", mrf[1].NegotiatedPrices.ExpirationDate) 350 | assert.Equal(t, "professional", mrf[0].NegotiatedPrices.BillingClass) 351 | assert.Equal(t, "institutional", mrf[1].NegotiatedPrices.BillingClass) 352 | assert.Equal(t, models.ServiceCodes{"81", "11", "22"}, mrf[0].NegotiatedPrices.ServiceCodes) 353 | assert.Equal(t, models.BillingCodeModifiers{"12", "14"}, mrf[1].NegotiatedPrices.BillingCodeModifiers) 354 | assert.Equal(t, "negotiated_prices", mrf[0].RecordType) 355 | assert.Equal(t, "negotiated_prices", mrf[1].RecordType) 356 | assert.NotZero(t, mrf[0].UUID) 357 | assert.NotZero(t, mrf[1].UUID) 358 | 359 | } 360 | 361 | // test parseNegotiatedRates function 362 | func TestParseNegotiatedRates(t *testing.T) { 363 | var j = []byte(`{ 364 | "negotiation_arrangement": "ffs", 365 | "negotiated_rates": [ 366 | { 367 | "provider_references": [492089], 368 | "negotiated_prices": [ 369 | { 370 | "negotiated_type": "per diem", 371 | "negotiated_rate": 781.0, 372 | "expiration_date": "9999-12-31", 373 | "service_code": [ 374 | "21", 375 | "31", 376 | "32", 377 | "33" 378 | ], 379 | "billing_class": "institutional" 380 | } 381 | ] 382 | }, 383 | { 384 | "provider_references": [11925, 403819], 385 | "negotiated_prices": [ 386 | { 387 | "negotiated_type": "per diem", 388 | "negotiated_rate": 4793.0, 389 | "expiration_date": "9999-12-31", 390 | "service_code": [ 391 | "21", 392 | "31", 393 | "32", 394 | "33", 395 | "34", 396 | "51", 397 | "54", 398 | "55", 399 | "56", 400 | "61" 401 | ], 402 | "billing_class": "institutional" 403 | } 404 | ] 405 | }, 406 | { 407 | "provider_references": [62.0004643342], 408 | "negotiated_prices": [ 409 | { 410 | "negotiated_type": "fee schedule", 411 | "negotiated_rate": 199.06, 412 | "expiration_date": "9999-12-31", 413 | "service_code": ["12", "11"], 414 | "billing_class": "professional", 415 | "billing_code_modifier": ["NU", ""] 416 | } 417 | ] 418 | } 419 | ] 420 | } 421 | `) 422 | 423 | jp, err := utils.ParseJSON(&j, nil) 424 | assert.NoError(t, err) 425 | 426 | iter := jp.Iter() 427 | 428 | mrf, err := parseNegotiatedRates(&iter, "inUUID") 429 | assert.NoError(t, err) 430 | 431 | assert.Equal(t, 6, len(mrf)) 432 | assert.Equal(t, "per diem", mrf[0].NegotiatedType) 433 | assert.Equal(t, 781.0, mrf[0].NegotiatedPrices.NegotiatedRateValue) 434 | assert.Equal(t, "9999-12-31", mrf[0].ExpirationDate) 435 | assert.Equal(t, "institutional", mrf[0].BillingClass) 436 | assert.Equal(t, models.ServiceCodes{"21", "31", "32", "33"}, mrf[0].ServiceCodes) 437 | assert.Equal(t, models.ProviderReferences{"492089"}, mrf[1].PRList) 438 | assert.Equal(t, "inUUID", mrf[1].ParentUUID) 439 | assert.Equal(t, models.ServiceCodes{"21", "31", "32", "33", "34", "51", "54", "55", "56", "61"}, mrf[2].ServiceCodes) 440 | assert.Equal(t, models.ProviderReferences{"11925", "403819"}, mrf[3].PRList) 441 | assert.Equal(t, "fee schedule", mrf[4].NegotiatedType) 442 | assert.Equal(t, 199.06, mrf[4].NegotiatedPrices.NegotiatedRateValue) 443 | assert.Equal(t, "9999-12-31", mrf[4].ExpirationDate) 444 | assert.Equal(t, "professional", mrf[4].BillingClass) 445 | assert.Equal(t, models.ServiceCodes{"12", "11"}, mrf[4].ServiceCodes) 446 | assert.Equal(t, models.BillingCodeModifiers{"NU", ""}, mrf[4].BillingCodeModifiers) 447 | // test for RecordType 448 | assert.Equal(t, "negotiated_rate", mrf[1].RecordType) 449 | assert.Equal(t, "negotiated_rate", mrf[3].RecordType) 450 | } 451 | -------------------------------------------------------------------------------- /pkg/mrfparse/mrf/parse.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2023 Daniel Chalef 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package mrf 17 | 18 | import ( 19 | "context" 20 | "os" 21 | "path/filepath" 22 | "strings" 23 | 24 | "github.com/danielchalef/mrfparse/pkg/mrfparse/cloud" 25 | "github.com/danielchalef/mrfparse/pkg/mrfparse/models" 26 | "github.com/danielchalef/mrfparse/pkg/mrfparse/parquet" 27 | "github.com/danielchalef/mrfparse/pkg/mrfparse/utils" 28 | 29 | "github.com/alitto/pond" 30 | mapset "github.com/deckarep/golang-set/v2" 31 | ) 32 | 33 | const MaxWorkers int = 5 34 | const MaxCapacity int = 4 35 | 36 | const LineBuffer int = 5000000 // bytes 37 | const MaxLineBuffer int = LineBuffer * 5 // bytes 38 | 39 | type StringSet mapset.Set[string] 40 | 41 | var log = utils.GetLogger() 42 | 43 | var processPool = pond.New(MaxWorkers, MaxCapacity) 44 | var inPoolGroup = processPool.Group() 45 | var prPoolGroup = processPool.Group() 46 | var writerPoolGroup = processPool.Group() 47 | 48 | func Parse(inputPath, outputPath string, planID int64, serviceFile string) { 49 | const writerChannelSize int = 4 * 1024 50 | 51 | // used to persist []mrf to parquet 52 | wc := make(chan []*models.Mrf, writerChannelSize) 53 | // done channel for writers 54 | done := make(chan bool) 55 | 56 | // if outputPath is on the local filesystem and does not exist, create it 57 | if !cloud.IsCloudURI(outputPath) { 58 | // test if path already exists 59 | _, err := os.Stat(outputPath) 60 | if os.IsNotExist(err) { 61 | err = os.MkdirAll(outputPath, os.ModePerm) 62 | utils.ExitOnError(err) 63 | } else { 64 | utils.ExitOnError(err) 65 | } 66 | } 67 | 68 | // Start the writer in a goroutine 69 | writerPoolGroup.Submit(func() { parquet.Writer("mrf", outputPath, wc, done) }) 70 | 71 | // create the record writer using the new wc channel 72 | WriteRecords = NewRecordWriter(wc) 73 | 74 | // Load service list that we'll use to filter for services we care about 75 | serviceList := loadServiceList(serviceFile) 76 | log.Info("Loaded ", serviceList.Cardinality(), " services.") 77 | 78 | // Get list of files in inputPath. We expect to find a root file and in_network_rate and provider_references files 79 | filesList, err := cloud.Glob(context.TODO(), inputPath, "*.json*") 80 | utils.ExitOnError(err) 81 | 82 | log.Info("Found ", len(filesList), " files.") 83 | 84 | // Parse root file first as we need root uuid for the other records 85 | filename, err := findRootFile(filesList) 86 | utils.ExitOnError(err) 87 | 88 | rootUUID := writeRoot(filename, planID) 89 | log.Info("MrfRoot file parsed: ", filename) 90 | 91 | // Parse in_network files first 92 | for i := range filesList { 93 | f := filepath.Base(filesList[i]) 94 | if strings.HasPrefix(f, "in_network_") { 95 | log.Info("Found in_network_rate file", filename) 96 | parseInNetworkRates(filesList[i], rootUUID, serviceList) 97 | } 98 | } 99 | 100 | // Wait for all in_network threads to finish 101 | log.Debug("Waiting for in_network_rate threads to finish.") 102 | inPoolGroup.Wait() 103 | 104 | log.Info("Found ", providersFilter.Len(), " providers in in_network_rates.") 105 | 106 | // Parse provider_references_ files 107 | for i := range filesList { 108 | f := filepath.Base(filesList[i]) 109 | if strings.HasPrefix(f, "provider_references_") { 110 | log.Info("Found provider_references file", f) 111 | parseProviderReference(filesList[i], rootUUID) 112 | } 113 | } 114 | 115 | // Wait for all pr threads to finish 116 | prPoolGroup.Wait() 117 | // Tell writer to finish 118 | done <- true 119 | // Wait for Writers to clean up 120 | writerPoolGroup.Wait() 121 | log.Debugf("Finished waiting for writer pool group to finish.") 122 | // Stop the process pool 123 | processPool.StopAndWait() 124 | 125 | log.Info("Found ", totalProviderCounter.Load(), " providers. Matched on ", matchedProviderCounter.Load(), " providers.") 126 | } 127 | -------------------------------------------------------------------------------- /pkg/mrfparse/mrf/provider_list.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2023 Daniel Chalef 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package mrf 17 | 18 | import ( 19 | mapset "github.com/deckarep/golang-set/v2" 20 | ) 21 | 22 | var providersFilter = NewProviderList() 23 | 24 | type ProviderList struct { 25 | Providers StringSet 26 | } 27 | 28 | // NewProviderList returns a new ProviderList containing a StringSet 29 | func NewProviderList() *ProviderList { 30 | return &ProviderList{ 31 | Providers: mapset.NewSet[string](), 32 | } 33 | } 34 | 35 | // Add adds a string (or many strings) to the ProviderList 36 | // Returns true if all values were added, false otherwise 37 | func (p *ProviderList) Add(vals ...string) bool { 38 | var added = true 39 | 40 | for _, val := range vals { 41 | r := p.Providers.Add(val) 42 | added = added && r 43 | } 44 | 45 | return added 46 | } 47 | 48 | // Slice returns a slice of strings from the ProviderList 49 | func (p *ProviderList) Slice() []string { 50 | return p.Providers.ToSlice() 51 | } 52 | 53 | // Len returns the number of elements in the ProviderList 54 | func (p *ProviderList) Len() int { 55 | return p.Providers.Cardinality() 56 | } 57 | 58 | // Contains returns true if the ProviderList contains a string 59 | func (p *ProviderList) Contains(s string) bool { 60 | return p.Providers.Contains(s) 61 | } 62 | -------------------------------------------------------------------------------- /pkg/mrfparse/mrf/provider_list_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2023 Daniel Chalef 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package mrf 17 | 18 | import ( 19 | "testing" 20 | 21 | "github.com/stretchr/testify/assert" 22 | "golang.org/x/exp/slices" 23 | ) 24 | 25 | func TestProviderList(t *testing.T) { 26 | pl := NewProviderList() 27 | 28 | assert.Equal(t, 0, pl.Len()) 29 | assert.Equal(t, false, pl.Contains("a")) 30 | 31 | pl.Add("a", "b", "c") 32 | 33 | assert.Equal(t, 3, pl.Len()) 34 | assert.Equal(t, true, pl.Contains("a")) 35 | assert.Equal(t, true, pl.Contains("b")) 36 | assert.Equal(t, true, pl.Contains("c")) 37 | assert.Equal(t, false, pl.Contains("d")) 38 | 39 | pl.Add("a", "b", "c") 40 | 41 | assert.Equal(t, 3, pl.Len()) 42 | assert.Equal(t, true, pl.Contains("a")) 43 | assert.Equal(t, true, pl.Contains("b")) 44 | assert.Equal(t, true, pl.Contains("c")) 45 | assert.Equal(t, false, pl.Contains("d")) 46 | 47 | pl.Add("d") 48 | 49 | assert.Equal(t, 4, pl.Len()) 50 | assert.Equal(t, true, pl.Contains("a")) 51 | assert.Equal(t, true, pl.Contains("b")) 52 | assert.Equal(t, true, pl.Contains("c")) 53 | assert.Equal(t, true, pl.Contains("d")) 54 | 55 | } 56 | 57 | func TestProviderListReturn(t *testing.T) { 58 | pl := NewProviderList() 59 | 60 | pl.Add("a", "b", "c") 61 | 62 | r := pl.Add("a", "b", "c", "d") 63 | assert.Equal(t, false, r) 64 | 65 | r = pl.Add("e") 66 | assert.Equal(t, true, r) 67 | } 68 | 69 | func TestProviderListSlice(t *testing.T) { 70 | pl := NewProviderList() 71 | 72 | pl.Add("a", "b", "c") 73 | 74 | s := pl.Slice() 75 | 76 | assert.Equal(t, 3, len(s)) 77 | assert.Equal(t, true, slices.Contains(s, "a")) 78 | assert.Equal(t, true, slices.Contains(s, "b")) 79 | assert.Equal(t, true, slices.Contains(s, "c")) 80 | assert.Equal(t, false, slices.Contains(s, "d")) 81 | } 82 | -------------------------------------------------------------------------------- /pkg/mrfparse/mrf/provider_references.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2023 Daniel Chalef 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package mrf 17 | 18 | import ( 19 | "bufio" 20 | "context" 21 | "fmt" 22 | "io" 23 | "strings" 24 | "sync/atomic" 25 | 26 | "github.com/danielchalef/mrfparse/pkg/mrfparse/cloud" 27 | "github.com/danielchalef/mrfparse/pkg/mrfparse/models" 28 | "github.com/danielchalef/mrfparse/pkg/mrfparse/utils" 29 | 30 | "github.com/minio/simdjson-go" 31 | ) 32 | 33 | var matchedProviderCounter = atomic.Int32{} 34 | var totalProviderCounter = atomic.Int32{} 35 | 36 | // parseProviderReference parses provider_references_*.jsonl files 37 | func parseProviderReference(filename, rootUUID string) { 38 | const LinesAtATime int = 2_000 39 | 40 | var ( 41 | line string 42 | lineCount = 0 43 | totalLineCount = 0 44 | strBuilder strings.Builder 45 | ) 46 | 47 | log.Info("Parsing provider references: ", filename) 48 | 49 | f, err := cloud.NewReader(context.TODO(), filename) 50 | utils.ExitOnError(err) 51 | 52 | defer func(f io.ReadCloser) { 53 | err := f.Close() 54 | if err != nil { 55 | utils.ExitOnError(err) 56 | } 57 | }(f) 58 | 59 | scanner := bufio.NewScanner(f) 60 | 61 | buf := make([]byte, LineBuffer) 62 | scanner.Buffer(buf, MaxLineBuffer) 63 | 64 | for scanner.Scan() { 65 | // Build a NDJSON string with LinesAtATime lines 66 | line = scanner.Text() 67 | strBuilder.WriteString(line) 68 | strBuilder.WriteString("\n") 69 | 70 | if lineCount == LinesAtATime { 71 | lines := strBuilder.String() 72 | 73 | // submit the parse job to the goroutine pool 74 | prPoolGroup.Submit(func() { 75 | parsePRLines(&lines, rootUUID) 76 | }) 77 | 78 | lineCount = 0 79 | 80 | strBuilder.Reset() 81 | } else { 82 | lineCount++ 83 | } 84 | 85 | if totalLineCount%100_000 == 0 { 86 | log.Debug("Read ", totalLineCount, " lines") 87 | } 88 | 89 | totalLineCount++ 90 | } 91 | 92 | if err := scanner.Err(); err != nil { 93 | utils.ExitOnError(err) 94 | } 95 | 96 | // Ensure we parse the last few lines if we've not yet reached LinesAtATime 97 | if lineCount > 0 { 98 | lines := strBuilder.String() 99 | 100 | prPoolGroup.Submit(func() { 101 | parsePRLines(&lines, rootUUID) 102 | }) 103 | } 104 | 105 | log.Info("Completed reading provider references: ", filename) 106 | } 107 | 108 | // parsePRLines parses provider_references lines, each of which is a json object. 109 | // It's designed to run concurrently, with parseProviderReference submitting parsePRLines jobs 110 | // to the goroutine pool. Parsed Mrf records are written to a channel for processing by a Writer thread. 111 | func parsePRLines(lines *string, rootUUID string) { 112 | parsed, err := utils.ParseJSON(lines, nil) 113 | utils.ExitOnError(err) 114 | 115 | var ( 116 | iter = parsed.Iter() 117 | tmpIter *simdjson.Iter 118 | mrfList []*models.Mrf 119 | ) 120 | 121 | for { 122 | typ := iter.Advance() 123 | 124 | if typ == simdjson.TypeRoot { 125 | totalProviderCounter.Add(1) 126 | 127 | _, tmpIter, err = iter.Root(nil) 128 | utils.ExitOnError(err) 129 | 130 | _, err := tmpIter.FindElement(nil, "location") 131 | if err == nil { 132 | // This is a location record. We don't yet support these. 133 | utils.ExitOnError(fmt.Errorf("location records are not supported")) 134 | } 135 | 136 | mrfList, err = parsePRObject(tmpIter, providersFilter, rootUUID) 137 | // We only want to parse records where the provider_group_id is present in the in_network_rates dataset. 138 | // If we get a NotInListError, skip this record. 139 | if e, ok := err.(*NotInListError); ok { 140 | log.Tracef("Skipping provider_reference record. %s", e.Error()) 141 | continue 142 | } 143 | 144 | // Exit on any other error 145 | utils.ExitOnError(err) 146 | 147 | // Count a matched provider 148 | matchedProviderCounter.Add(1) 149 | 150 | err = WriteRecords(mrfList) 151 | utils.ExitOnError(err) 152 | } else if typ == simdjson.TypeNone { 153 | break 154 | } 155 | } 156 | } 157 | 158 | // parsePRObject parses a provider_reference object. It returns a slice of Mrf records, which 159 | // contains the root object and any provider_groups. 160 | func parsePRObject(iter *simdjson.Iter, providersFilter *ProviderList, rootUUID string) ([]*models.Mrf, error) { 161 | const parent = "provider_references" 162 | 163 | var ( 164 | mrf *models.Mrf 165 | mrfList []*models.Mrf 166 | err error 167 | ) 168 | 169 | mrf, err = parsePRRoot(providersFilter, rootUUID, iter) 170 | if err != nil { 171 | return nil, err 172 | } 173 | 174 | mrfList, err = parseProviderGroups(iter, mrf.UUID, parent) 175 | if err != nil { 176 | return nil, err 177 | } 178 | 179 | mrfList = append(mrfList, mrf) 180 | 181 | return mrfList, nil 182 | } 183 | 184 | // parsePRRoot parses the root of the provider_reference file. If the provider is not in the 185 | // providerFilter set, then it returns a NotInListError. 186 | func parsePRRoot(providers *ProviderList, rootUUID string, iter *simdjson.Iter) (*models.Mrf, error) { 187 | uuid := utils.GetUniqueID() 188 | 189 | id, err := utils.GetElementValue[string]("provider_group_id", iter) 190 | if err != nil { 191 | return nil, err 192 | } 193 | 194 | if !providers.Contains(id) { 195 | return nil, &NotInListError{item: id} 196 | } 197 | 198 | return &models.Mrf{UUID: uuid, ParentUUID: rootUUID, RecordType: "provider_group", 199 | ProviderGroup: models.ProviderGroup{ProviderGroupID: id}}, nil 200 | } 201 | 202 | // parseProviderGroups parses the provider_groups element, which is an array of providers. 203 | // It creates an MRf record for each provider. 204 | func parseProviderGroups(iter *simdjson.Iter, parentUUID, parent string) ([]*models.Mrf, error) { 205 | var mrfList []*models.Mrf 206 | var mrf *models.Mrf 207 | var uuid string 208 | var err error 209 | var p *simdjson.Object 210 | var npi []int64 211 | 212 | pa, err := utils.GetArrayForElement("provider_groups", iter) 213 | if err != nil { 214 | return nil, err 215 | } 216 | 217 | paIter := pa.Iter() 218 | 219 | for { 220 | typ := paIter.Advance() 221 | 222 | if typ == simdjson.TypeObject { 223 | p, err = paIter.Object(p) 224 | if err != nil { 225 | return nil, err 226 | } 227 | 228 | uuid = utils.GetUniqueID() 229 | 230 | // Parse the npi array 231 | npi, err = utils.GetArrayElementAsSlice[int64]("npi", &paIter) 232 | if err != nil { 233 | return nil, err 234 | } 235 | 236 | mrfList = append(mrfList, &models.Mrf{UUID: uuid, ParentUUID: parentUUID, RecordType: "provider", 237 | Provider: models.Provider{Parent: parent, NpiList: npi}}) 238 | 239 | // parse tin element 240 | mrf, err = parseTin(&paIter, parentUUID) 241 | if err != nil { 242 | return nil, err 243 | } 244 | 245 | mrfList = append(mrfList, mrf) 246 | } else if typ == simdjson.TypeNone { 247 | break 248 | } 249 | } 250 | 251 | return mrfList, err 252 | } 253 | 254 | // parseTin parses the tin element of the provider record. 255 | // parentUUID is the UUID of the parent providers record. 256 | func parseTin(iter *simdjson.Iter, parentUUID string) (*models.Mrf, error) { 257 | tin, err := iter.FindElement(nil, "tin") 258 | if err != nil { 259 | return nil, err 260 | } 261 | 262 | tt, err := utils.GetElementValue[string]("type", &tin.Iter) 263 | if err != nil { 264 | return nil, err 265 | } 266 | 267 | tv, err := utils.GetElementValue[string]("value", &tin.Iter) 268 | if err != nil { 269 | return nil, err 270 | } 271 | 272 | tinUUID := utils.GetUniqueID() 273 | 274 | return &models.Mrf{UUID: tinUUID, ParentUUID: parentUUID, RecordType: "tin", 275 | Tin: models.Tin{TinType: tt, Value: tv}}, nil 276 | } 277 | -------------------------------------------------------------------------------- /pkg/mrfparse/mrf/provider_references_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2023 Daniel Chalef 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package mrf 17 | 18 | import ( 19 | "github.com/danielchalef/mrfparse/pkg/mrfparse/models" 20 | "github.com/danielchalef/mrfparse/pkg/mrfparse/utils" 21 | "testing" 22 | 23 | "github.com/stretchr/testify/assert" 24 | ) 25 | 26 | // test parsePRObject function 27 | func TestParsePRObject(t *testing.T) { 28 | var j = []byte(`{ 29 | "provider_group_id": 62.0003430048, 30 | "provider_groups": [ 31 | { "npi": [1821198789], "tin": { "type": "ein", "value": "1821198789" } }, 32 | { "npi": [1770512915], "tin": { "type": "npi", "value": "1770512915" } } 33 | ] 34 | }`) 35 | 36 | var providerList = NewProviderList() 37 | 38 | providerList.Add("62.0003430048", "2342423423") 39 | 40 | jp, err := utils.ParseJSON(&j, nil) 41 | assert.NoError(t, err) 42 | 43 | iter := jp.Iter() 44 | 45 | mrfList, err := parsePRObject(&iter, providerList, "1234-5678-9012-3456") 46 | assert.NoError(t, err) 47 | 48 | // 1 provider_group, 2 provider, 2 tin 49 | assert.Equal(t, 5, len(mrfList)) 50 | 51 | pgMrf := utils.Filter(mrfList, func(mrf *models.Mrf) bool { 52 | return mrf.RecordType == "provider_group" 53 | }) 54 | assert.Equal(t, 1, len(pgMrf)) 55 | assert.Equal(t, "1234-5678-9012-3456", pgMrf[0].ParentUUID) 56 | assert.Equal(t, "62.0003430048", pgMrf[0].ProviderGroupID) 57 | 58 | pgUUID := pgMrf[0].UUID 59 | 60 | tinMrf := utils.Filter(mrfList, func(mrf *models.Mrf) bool { 61 | return mrf.RecordType == "tin" 62 | }) 63 | assert.Equal(t, 2, len(tinMrf)) 64 | assert.Equal(t, "ein", tinMrf[0].TinType) 65 | assert.Equal(t, "1821198789", tinMrf[0].Value) 66 | assert.Equal(t, pgUUID, tinMrf[0].ParentUUID) 67 | assert.Equal(t, "npi", tinMrf[1].TinType) 68 | assert.Equal(t, "1770512915", tinMrf[1].Value) 69 | assert.Equal(t, pgUUID, tinMrf[1].ParentUUID) 70 | 71 | providerMrf := utils.Filter(mrfList, func(mrf *models.Mrf) bool { 72 | return mrf.RecordType == "provider" 73 | }) 74 | assert.Equal(t, 2, len(providerMrf)) 75 | assert.Equal(t, int64(1821198789), providerMrf[0].NpiList[0]) 76 | assert.Equal(t, pgUUID, providerMrf[0].ParentUUID) 77 | assert.Equal(t, int64(1770512915), providerMrf[1].NpiList[0]) 78 | assert.Equal(t, pgUUID, providerMrf[1].ParentUUID) 79 | 80 | } 81 | 82 | // test parseTin 83 | func TestParseTin(t *testing.T) { 84 | var j = []byte(`{ "npi": [1821198789], "tin": { "type": "npi", "value": "1821198789" } }`) 85 | 86 | jp, err := utils.ParseJSON(&j, nil) 87 | assert.NoError(t, err) 88 | 89 | iter := jp.Iter() 90 | 91 | mrf, err := parseTin(&iter, "1234-5678-9012-3456") 92 | assert.NoError(t, err) 93 | 94 | assert.Equal(t, "1234-5678-9012-3456", mrf.ParentUUID) 95 | assert.Equal(t, "npi", mrf.Tin.TinType) 96 | assert.Equal(t, "1821198789", mrf.Tin.Value) 97 | assert.Equal(t, "tin", mrf.RecordType) 98 | } 99 | 100 | // test parsePRRoot 101 | func TestParsePRRootPgIdPresent(t *testing.T) { 102 | var j = []byte(`{ 103 | "provider_group_id": 62.0003430048, 104 | "provider_groups": [ 105 | { 106 | "npi": [1821198789, 987654321], 107 | "tin": { "type": "npi", "value": "1407989569" } 108 | }, 109 | { 110 | "npi": [1174123582], 111 | "tin": { "type": "ein", "value": "850645536" } 112 | } 113 | ] 114 | }`) 115 | 116 | var providerList = NewProviderList() 117 | 118 | providerList.Add("62.0003430048", "2342423423") 119 | 120 | jp, err := utils.ParseJSON(&j, nil) 121 | assert.NoError(t, err) 122 | 123 | iter := jp.Iter() 124 | 125 | mrfList, err := parsePRRoot(providerList, "1234-5678-9012-3456", &iter) 126 | assert.NoError(t, err) 127 | 128 | assert.Equal(t, "1234-5678-9012-3456", mrfList.ParentUUID) 129 | assert.Equal(t, "62.0003430048", mrfList.ProviderGroupID) 130 | assert.Equal(t, "provider_group", mrfList.RecordType) 131 | } 132 | 133 | func TestParsePRRootPgIdNotPresent(t *testing.T) { 134 | var j = []byte(`{ 135 | "provider_group_id": 62.0003430048, 136 | "provider_groups": [ 137 | { 138 | "npi": [1821198789, 987654321], 139 | "tin": { "type": "npi", "value": "1407989569" } 140 | }, 141 | { 142 | "npi": [1174123582], 143 | "tin": { "type": "ein", "value": "850645536" } 144 | } 145 | ] 146 | }`) 147 | 148 | var providerList = NewProviderList() 149 | 150 | providerList.Add("65.0003430048", "2342423423") 151 | 152 | jp, err := utils.ParseJSON(&j, nil) 153 | assert.NoError(t, err) 154 | 155 | iter := jp.Iter() 156 | 157 | mrfList, err := parsePRRoot(providerList, "1234-5678-9012-3456", &iter) 158 | if assert.Error(t, err) { 159 | assert.Equal(t, &NotInListError{"62.0003430048"}, err) 160 | } 161 | 162 | assert.Nil(t, mrfList) 163 | } 164 | -------------------------------------------------------------------------------- /pkg/mrfparse/mrf/record_writer.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2023 Daniel Chalef 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package mrf 17 | 18 | import "github.com/danielchalef/mrfparse/pkg/mrfparse/models" 19 | 20 | var WriteRecords func(records []*models.Mrf) error 21 | 22 | // NewRecordWriter returns a function that writes Mrf records to the writer channel 23 | // This allows us to avoid passing the channel to every function that needs to write 24 | func NewRecordWriter(wc chan []*models.Mrf) func(records []*models.Mrf) error { 25 | return func(records []*models.Mrf) error { 26 | wc <- records 27 | 28 | return nil 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /pkg/mrfparse/mrf/root.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2023 Daniel Chalef 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package mrf 17 | 18 | import ( 19 | "context" 20 | "encoding/json" 21 | "errors" 22 | "github.com/danielchalef/mrfparse/pkg/mrfparse/cloud" 23 | "github.com/danielchalef/mrfparse/pkg/mrfparse/models" 24 | "github.com/danielchalef/mrfparse/pkg/mrfparse/utils" 25 | "io" 26 | "path/filepath" 27 | "strconv" 28 | "strings" 29 | ) 30 | 31 | // parseMrfRoot parses the root json doc and returns a Mrf struct 32 | func parseMrfRoot(doc []byte, planID int64) (*models.Mrf, error) { 33 | var ( 34 | root models.MrfRoot 35 | mrf models.Mrf 36 | uuid = utils.GetUniqueID() 37 | ) 38 | 39 | err := json.Unmarshal(doc, &root) 40 | if err != nil { 41 | return nil, err 42 | } 43 | 44 | if planID != -1 { 45 | root.PlanID = strconv.FormatInt(planID, 10) 46 | } 47 | 48 | mrf = models.Mrf{UUID: uuid, RecordType: "root", MrfRoot: root} 49 | 50 | return &mrf, nil 51 | } 52 | 53 | // WriteRoot loads the root.json file and writes it 54 | func writeRoot(filename string, planID int64) string { 55 | f, err := cloud.NewReader(context.TODO(), filename) 56 | utils.ExitOnError(err) 57 | 58 | defer func(f io.ReadCloser) { 59 | err = f.Close() 60 | if err != nil { 61 | log.Errorf("Unable to close file: %s", err.Error()) 62 | } 63 | }(f) 64 | 65 | doc, err := io.ReadAll(f) 66 | utils.ExitOnError(err) 67 | 68 | mrf, err := parseMrfRoot(doc, planID) 69 | utils.ExitOnError(err) 70 | 71 | err = WriteRecords([]*models.Mrf{mrf}) 72 | utils.ExitOnError(err) 73 | 74 | return mrf.UUID 75 | } 76 | 77 | func findRootFile(filesList []string) (string, error) { 78 | for _, file := range filesList { 79 | filename := filepath.Base(file) 80 | if strings.Contains(filename, "root.json") { 81 | return file, nil 82 | } 83 | } 84 | 85 | return "", errors.New("root.json file not found") 86 | } 87 | -------------------------------------------------------------------------------- /pkg/mrfparse/mrf/root_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2023 Daniel Chalef 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package mrf 17 | 18 | import ( 19 | "testing" 20 | 21 | "github.com/stretchr/testify/assert" 22 | ) 23 | 24 | // test findRootFile function 25 | func TestFindRootFile(t *testing.T) { 26 | var filesList = []string{"s3://some_bucket/some/path/in_network_001.json", "s3://some_bucket/some/path/root.json", 27 | "s3://some_bucket/some/path/provider_references001.json", "s3://some_bucket/some/path/"} 28 | 29 | filename, err := findRootFile(filesList) 30 | assert.NoError(t, err) 31 | assert.Equal(t, "s3://some_bucket/some/path/root.json", filename) 32 | } 33 | 34 | // test parseMrfRoot function 35 | func TestParseMrfRoot(t *testing.T) { 36 | doc := []byte(`{"reporting_entity_name":"Aetna Health Insurance Company", 37 | "reporting_entity_type":"Health Insurance Issuer", 38 | "last_updated_on":"2022-11-05", 39 | "plan_market_type":"group", 40 | "plan_id":"1234-5678-9012-3456", 41 | "plan_id_type":"planidtype", 42 | "version":"1.3.1"}`) 43 | 44 | mrf, err := parseMrfRoot(doc, -1) 45 | assert.NoError(t, err) 46 | assert.Equal(t, "Aetna Health Insurance Company", mrf.ReportingEntityName) 47 | assert.Equal(t, "Health Insurance Issuer", mrf.ReportingEntityType) 48 | assert.Equal(t, "2022-11-05", mrf.LastUpdatedOn) 49 | assert.Equal(t, "1.3.1", mrf.Version) 50 | assert.Equal(t, "group", mrf.PlanMarketType) 51 | assert.Equal(t, "1234-5678-9012-3456", mrf.PlanID) 52 | assert.Equal(t, "planidtype", mrf.PlanIDType) 53 | assert.Equal(t, "root", mrf.RecordType) 54 | } 55 | -------------------------------------------------------------------------------- /pkg/mrfparse/mrf/services.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2023 Daniel Chalef 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package mrf 17 | 18 | import ( 19 | "context" 20 | "encoding/csv" 21 | "github.com/danielchalef/mrfparse/pkg/mrfparse/cloud" 22 | "github.com/danielchalef/mrfparse/pkg/mrfparse/utils" 23 | "io" 24 | 25 | mapset "github.com/deckarep/golang-set/v2" 26 | "github.com/spf13/viper" 27 | ) 28 | 29 | // loadServiceList loads a list of services from a csv file and returns a stringSet of the services. 30 | // The csv file is expected to have a header row, with first column being the 31 | // CPT/HCPCS service code, and subsequent columns being ignored. 32 | func loadServiceList(uri string) StringSet { 33 | var f io.ReadCloser 34 | var err error 35 | var services StringSet = mapset.NewSet[string]() 36 | 37 | // if empty, get from config file 38 | if uri == "" { 39 | uri = viper.GetString("services.file") 40 | } 41 | 42 | f, err = cloud.NewReader(context.TODO(), uri) 43 | utils.ExitOnError(err) 44 | 45 | defer func(f io.ReadCloser) { 46 | err = f.Close() 47 | if err != nil { 48 | utils.ExitOnError(err) 49 | } 50 | }(f) 51 | 52 | csvReader := csv.NewReader(f) 53 | serviceData, err := csvReader.ReadAll() 54 | utils.ExitOnError(err) 55 | 56 | // extract the first column, the CPT/HCPCS code, from csv, 57 | // skipping the header row 58 | for _, s := range serviceData[1:] { // skip header 59 | services.Add(s[0]) 60 | } 61 | 62 | return services 63 | } 64 | -------------------------------------------------------------------------------- /pkg/mrfparse/mrf/services_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2023 Daniel Chalef 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package mrf 17 | 18 | import ( 19 | "testing" 20 | 21 | "github.com/alecthomas/assert/v2" 22 | ) 23 | 24 | // Test loadServices 25 | func TestLoadServices(t *testing.T) { 26 | services := loadServiceList("../../../data/test_services.csv") 27 | assert.Equal(t, 2, services.Cardinality()) 28 | 29 | assert.True(t, services.Contains("J0702")) 30 | assert.True(t, services.Contains("J1745")) 31 | } 32 | -------------------------------------------------------------------------------- /pkg/mrfparse/parquet/pq_writer_factory.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2023 Daniel Chalef 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package parquet 17 | 18 | import ( 19 | "context" 20 | "fmt" 21 | "github.com/danielchalef/mrfparse/pkg/mrfparse/cloud" 22 | "github.com/danielchalef/mrfparse/pkg/mrfparse/models" 23 | "io" 24 | 25 | "github.com/segmentio/parquet-go" 26 | "github.com/spf13/viper" 27 | ) 28 | 29 | // PqWriteCloser is a wrapper around parquet.GenericWriter and io.WriteCloser 30 | type PqWriteCloser struct { 31 | closer io.WriteCloser 32 | ctx context.Context 33 | writer *parquet.GenericWriter[*models.Mrf] 34 | uri string 35 | } 36 | 37 | // Close closes the underlying parquet.GenericWriter and the underlying io.WriteCloser. 38 | func (pwc *PqWriteCloser) Close() error { 39 | err := pwc.writer.Close() 40 | if err != nil { 41 | return err 42 | } 43 | 44 | err = pwc.closer.Close() 45 | if err != nil { 46 | return err 47 | } 48 | 49 | return nil 50 | } 51 | 52 | // Write writes the given data to the underlying parquet.GenericWriter. 53 | func (pwc *PqWriteCloser) Write(rows []*models.Mrf) (int, error) { 54 | return pwc.writer.Write(rows) 55 | } 56 | 57 | // Flush flushes the underlying parquet.GenericWriter. 58 | func (pwc *PqWriteCloser) Flush() error { 59 | return pwc.writer.Flush() 60 | } 61 | 62 | // URI returns the composed URI of the underlying io.WriteCloser. 63 | func (pwc *PqWriteCloser) URI() string { 64 | return pwc.uri 65 | } 66 | 67 | // NewPqWriter creates a new PqWriteCloser. ctx is the context to use for the underlying io.WriteCloser, making it 68 | // possible to cancel the write operation. 69 | func NewPqWriter(ctx context.Context, uri string, maxRowsPerGroup int64) (*PqWriteCloser, error) { 70 | pqConfig := parquet.WriterConfig{Compression: &parquet.Zstd, MaxRowsPerRowGroup: maxRowsPerGroup} 71 | 72 | w, err := cloud.NewWriter(ctx, uri) 73 | if err != nil { 74 | return nil, err 75 | } 76 | 77 | writer := parquet.NewGenericWriter[*models.Mrf](w, &pqConfig) 78 | 79 | return &PqWriteCloser{uri: uri, writer: writer, closer: w, ctx: ctx}, nil 80 | } 81 | 82 | // PqWriterFactory is a factory for creating PqWriteClosers. 83 | // It is used to create a new PqWriteCloser when the number of rows written to the current PqWriteCloser exceeds 84 | // MaxRowsPerFile. The URI of the new PqWriteCloser is created by incrementing the fileIndex and formatting it 85 | // into the filenameTemplate. 86 | type PqWriterFactory struct { 87 | filenameTemplate string 88 | fileIndex int 89 | MaxRowsPerFile int 90 | MaxRowsPerGroup int64 91 | } 92 | 93 | // NewPqWriterFactory creates a new PqWriterFactory. filePrefix is the prefix of the filename (e.g. "mrf"), outputURI 94 | // is the URI of the output directory (e.g. "gs://bucket/output"). 95 | func NewPqWriterFactory(filePrefix, outputURI string) *PqWriterFactory { 96 | var ( 97 | DefaultMaxRowsPerFile = 100_000_000 98 | MaxRowsPerGroup int64 = 1_000_000 99 | DefaultOutputTemplate = "_%04d.zstd.parquet" 100 | ) 101 | 102 | if viper.IsSet("writer.max_rows_per_file") { 103 | DefaultMaxRowsPerFile = viper.GetInt("writer.max_rows_per_file") 104 | } 105 | 106 | if viper.IsSet("writer.filename_template") { 107 | DefaultOutputTemplate = viper.GetString("writer.filename_template") 108 | } 109 | 110 | if viper.IsSet("writer.max_rows_per_group") { 111 | MaxRowsPerGroup = viper.GetInt64("writer.max_rows_per_group") 112 | } 113 | 114 | filenameTemplate := cloud.JoinURI(outputURI, filePrefix) + DefaultOutputTemplate 115 | 116 | return &PqWriterFactory{ 117 | filenameTemplate: filenameTemplate, 118 | fileIndex: 0, 119 | MaxRowsPerFile: DefaultMaxRowsPerFile, 120 | MaxRowsPerGroup: MaxRowsPerGroup, 121 | } 122 | } 123 | 124 | // CreateWriter creates a new PqWriteCloser. fileIndex is incremented by each call to CreateWriter, and the 125 | // filenameTemplate is formatted with the new fileIndex to create the URI of the new PqWriteCloser. 126 | func (pwf *PqWriterFactory) CreateWriter(ctx context.Context) (*PqWriteCloser, error) { 127 | uri := fmt.Sprintf(pwf.filenameTemplate, pwf.fileIndex) 128 | pwf.fileIndex++ 129 | 130 | w, err := NewPqWriter(ctx, uri, pwf.MaxRowsPerGroup) 131 | if err != nil { 132 | return nil, err 133 | } 134 | 135 | return w, nil 136 | } 137 | -------------------------------------------------------------------------------- /pkg/mrfparse/parquet/pq_writer_factory_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2023 Daniel Chalef 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package parquet 17 | 18 | import ( 19 | "context" 20 | "github.com/danielchalef/mrfparse/pkg/mrfparse/models" 21 | "github.com/danielchalef/mrfparse/pkg/mrfparse/utils" 22 | "testing" 23 | 24 | "github.com/alecthomas/assert/v2" 25 | "github.com/spf13/viper" 26 | ) 27 | 28 | func TestPqWriteCloserURIIncrement(t *testing.T) { 29 | const ( 30 | expectedURIZero = "/tmp/mrf_0000.zstd.parquet" 31 | expectedURIOne = "/tmp/mrf_0001.zstd.parquet" 32 | ) 33 | 34 | pwf := NewPqWriterFactory("mrf", "/tmp/") 35 | pwc, err := pwf.CreateWriter(context.TODO()) 36 | assert.NoError(t, err) 37 | 38 | err = pwc.Close() 39 | assert.NoError(t, err) 40 | 41 | assert.Equal(t, expectedURIZero, pwc.URI()) 42 | 43 | pwc, err = pwf.CreateWriter(context.TODO()) 44 | assert.NoError(t, err) 45 | 46 | err = pwc.Close() 47 | assert.NoError(t, err) 48 | 49 | assert.Equal(t, expectedURIOne, pwc.URI()) 50 | } 51 | 52 | // test writing data to a parquet file 53 | func TestPqWriteCloserWrite(t *testing.T) { 54 | var mrfList = []*models.Mrf{{UUID: utils.GetUniqueID(), ParentUUID: utils.GetUniqueID()}, 55 | {UUID: utils.GetUniqueID(), ParentUUID: utils.GetUniqueID()}} 56 | 57 | pwf := NewPqWriterFactory("mrf", "/tmp/") 58 | pwc, err := pwf.CreateWriter(context.TODO()) 59 | assert.NoError(t, err) 60 | 61 | rows, err := pwc.Write(mrfList) 62 | assert.NoError(t, err) 63 | 64 | err = pwc.Close() 65 | assert.NoError(t, err) 66 | 67 | assert.Equal(t, 2, rows) 68 | } 69 | 70 | // test NewPqWriterFactory config 71 | func TestNewPqWriterFactoryConfig(t *testing.T) { 72 | const ( 73 | expectedMaxRowsPerFile = 555 74 | expectedMaxRowsPerGroup = 666 75 | expectedOutputTemplate = "_mrf.parquet" 76 | ) 77 | 78 | viper.Set("writer.max_rows_per_file", expectedMaxRowsPerFile) 79 | viper.Set("writer.filename_template", expectedOutputTemplate) 80 | viper.Set("writer.max_rows_per_group", expectedMaxRowsPerGroup) 81 | 82 | pwf := NewPqWriterFactory("file", "/tmp/output") 83 | assert.Equal(t, expectedMaxRowsPerFile, pwf.MaxRowsPerFile) 84 | assert.Equal(t, expectedMaxRowsPerGroup, pwf.MaxRowsPerGroup) 85 | assert.Equal(t, "/tmp/output/file"+expectedOutputTemplate, pwf.filenameTemplate) 86 | } 87 | -------------------------------------------------------------------------------- /pkg/mrfparse/parquet/writer.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2023 Daniel Chalef 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package parquet 17 | 18 | import ( 19 | "context" 20 | "github.com/danielchalef/mrfparse/pkg/mrfparse/models" 21 | "github.com/danielchalef/mrfparse/pkg/mrfparse/utils" 22 | ) 23 | 24 | var log = utils.GetLogger() 25 | 26 | // Writer is intended to run as a goroutine, writing data to parquet files. The wc channel 27 | // receives slices of Mrf structs. Send true to the done channel to signal that no more 28 | // data will be sent to wc and that the writer should close the current file and exit. 29 | // 30 | // Writer will create a new file when the number of rows written to the current file 31 | // exceeds the WriterFactory's MaxRowsPerFile. 32 | func Writer(filePrefix, outputURI string, wc <-chan []*models.Mrf, done <-chan bool) { 33 | var ( 34 | data []*models.Mrf 35 | i int 36 | rowCnt int 37 | err error 38 | wf = NewPqWriterFactory(filePrefix, outputURI) 39 | writer *PqWriteCloser 40 | ctx = context.Background() 41 | ) 42 | 43 | W: 44 | for { 45 | select { 46 | case data = <-wc: 47 | if i%wf.MaxRowsPerFile == 0 { 48 | if writer != nil { 49 | err = writer.Close() 50 | utils.ExitOnError(err) 51 | 52 | log.Debugf("Closed writer for %s", writer.URI()) 53 | } 54 | 55 | writer, err = wf.CreateWriter(ctx) 56 | utils.ExitOnError(err) 57 | } 58 | 59 | rowCnt, err = writer.Write(data) 60 | utils.ExitOnError(err) 61 | 62 | if i%50_000 == 0 { 63 | log.Debug("Wrote ", i, " rows.") 64 | // We see slightly less memory usage and faster run times when periodically 65 | // flushing the writer. 66 | err = writer.Flush() 67 | utils.ExitOnError(err) 68 | } 69 | i += rowCnt 70 | 71 | case <-done: 72 | err = writer.Close() 73 | utils.ExitOnError(err) 74 | break W 75 | } 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /pkg/mrfparse/pipeline/framework.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2023 Daniel Chalef 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package pipeline 17 | 18 | import ( 19 | "github.com/danielchalef/mrfparse/pkg/mrfparse/utils" 20 | ) 21 | 22 | // A very simple composable pipeline framework. Steps are added to a pipeline and then run in order. 23 | // Each step is timed and logged. 24 | 25 | var log = utils.GetLogger() 26 | 27 | // Step is an interface that defines a pipeline step. Name() returns the step name. 28 | type Step interface { 29 | Name() string 30 | Run() 31 | } 32 | 33 | type Pipeline struct { 34 | Steps []Step 35 | } 36 | 37 | func (p *Pipeline) AddStep(step Step) { 38 | p.Steps = append(p.Steps, step) 39 | } 40 | 41 | // Run executes each step in the pipeline in the order of the Steps slice. Each step is timed and logged. 42 | // No effort is made to manage errors or recover from them. Each step is responsible for handling errors. 43 | func (p *Pipeline) Run() { 44 | var fn func() 45 | 46 | for _, step := range p.Steps { 47 | log.Infof("Running step: %s", step.Name()) 48 | 49 | fn = func() { step.Run() } 50 | elapsed := utils.Timed(fn) 51 | log.Infof("Step %s completed in %d seconds", step.Name(), elapsed) 52 | } 53 | } 54 | 55 | // New creates a new pipeline with the provided steps. 56 | func New(steps ...Step) *Pipeline { 57 | return &Pipeline{Steps: steps} 58 | } 59 | -------------------------------------------------------------------------------- /pkg/mrfparse/pipeline/steps.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2023 Daniel Chalef 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package pipeline 17 | 18 | import ( 19 | "io" 20 | "os" 21 | "path/filepath" 22 | "strings" 23 | 24 | "github.com/danielchalef/mrfparse/pkg/mrfparse/http" 25 | "github.com/danielchalef/mrfparse/pkg/mrfparse/mrf" 26 | "github.com/danielchalef/mrfparse/pkg/mrfparse/split" 27 | "github.com/danielchalef/mrfparse/pkg/mrfparse/utils" 28 | 29 | "github.com/spf13/viper" 30 | ) 31 | 32 | // NewParsePipeline returns a pipeline that splits the input file, parses the 33 | // split files, and then cleans up afterwards. 34 | // 35 | // InputPath is the path to the input JSON object file. 36 | // OutputPath is the path to the output parquet fileset. 37 | // ServiceFile is the path to the HCPCS/CPT service file in CSV format. 38 | // PlanID is the plan ID to use for the parquet fileset. 39 | // 40 | // The pipeline uses a tmp path to store the intermediate split files. The tmp 41 | // path ican be configured in the config file, an enrivonment variable, or a 42 | // default system tmp path will be used. 43 | func NewParsePipeline(inputPath, outputPath, serviceFile string, planID int64) *Pipeline { 44 | var ( 45 | err error 46 | tmpPath string 47 | srcFilePath string 48 | tmpPathSrc string 49 | tmpPathSplit string 50 | steps []Step 51 | cfgTmpPath = viper.GetString("tmp.path") 52 | ) 53 | 54 | if cfgTmpPath != "" { 55 | tmpPath, err = os.MkdirTemp(cfgTmpPath, "mrfparse") 56 | } else { 57 | tmpPath, err = os.MkdirTemp("", "mrfparse") 58 | } 59 | 60 | utils.ExitOnError(err) 61 | 62 | tmpPathSrc = filepath.Join(tmpPath, "src") 63 | tmpPathSplit = filepath.Join(tmpPath, "split") 64 | 65 | srcFilePath = filepath.Join(tmpPathSrc, filepath.Base(inputPath)) 66 | srcFilePath = strings.Split(srcFilePath, "?")[0] 67 | 68 | steps = []Step{ 69 | &DownloadStep{ 70 | URL: inputPath, 71 | OutputPath: srcFilePath, 72 | }, 73 | &SplitStep{ 74 | InputPath: srcFilePath, 75 | OutputPath: tmpPathSplit, 76 | Overwrite: true, 77 | }, 78 | &ParseStep{ 79 | InputPath: tmpPathSplit, 80 | OutputPath: outputPath, 81 | ServiceFile: serviceFile, 82 | PlanID: planID, 83 | }, 84 | &CleanStep{ 85 | TmpPath: tmpPath, 86 | }, 87 | } 88 | 89 | return New(steps...) 90 | } 91 | 92 | // DownloadStep downloads a file from a URL to a local path using http.DownloadReader 93 | type DownloadStep struct { 94 | URL string 95 | OutputPath string 96 | } 97 | 98 | func (s *DownloadStep) Run() { 99 | var ( 100 | o string 101 | rd io.ReadCloser 102 | wr io.WriteCloser 103 | err error 104 | n int64 105 | ) 106 | 107 | o = filepath.Dir(s.OutputPath) 108 | 109 | err = os.MkdirAll(o, 0o755) 110 | utils.ExitOnError(err) 111 | 112 | rd, err = http.DownloadReader(s.URL) 113 | utils.ExitOnError(err) 114 | 115 | defer rd.Close() 116 | 117 | wr, err = os.Create(s.OutputPath) 118 | utils.ExitOnError(err) 119 | 120 | defer wr.Close() 121 | 122 | n, err = io.Copy(wr, rd) 123 | utils.ExitOnError(err) 124 | 125 | log.Infof("Downloaded %d bytes from %s to %s", n, s.URL, s.OutputPath) 126 | } 127 | 128 | func (s *DownloadStep) Name() string { 129 | return "Download" 130 | } 131 | 132 | // SplitStep splits the input JSON object file into NDJSON files using split.File 133 | type SplitStep struct { 134 | InputPath string 135 | OutputPath string 136 | Overwrite bool 137 | } 138 | 139 | func (s *SplitStep) Run() { 140 | split.File(s.InputPath, s.OutputPath, s.Overwrite) 141 | } 142 | 143 | func (s *SplitStep) Name() string { 144 | return "Split" 145 | } 146 | 147 | // ParseStep parses the split NDJSON files into a parquet fileset using mrf.Parse 148 | type ParseStep struct { 149 | InputPath string 150 | OutputPath string 151 | ServiceFile string 152 | PlanID int64 153 | } 154 | 155 | func (s *ParseStep) Run() { 156 | mrf.Parse(s.InputPath, s.OutputPath, s.PlanID, s.ServiceFile) 157 | } 158 | 159 | func (s *ParseStep) Name() string { 160 | return "Parse" 161 | } 162 | 163 | // CleanStep removes the tmp directory used to store the split files 164 | type CleanStep struct { 165 | TmpPath string 166 | } 167 | 168 | func (s *CleanStep) Run() { 169 | err := os.RemoveAll(s.TmpPath) 170 | utils.ExitOnError(err) 171 | } 172 | 173 | func (s *CleanStep) Name() string { 174 | return "Clean" 175 | } 176 | -------------------------------------------------------------------------------- /pkg/mrfparse/pipeline/steps_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2023 Daniel Chalef 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package pipeline 17 | 18 | import ( 19 | "os" 20 | "strings" 21 | "testing" 22 | 23 | "github.com/alecthomas/assert/v2" 24 | "github.com/spf13/viper" 25 | ) 26 | 27 | func TestNewParsePipeline(t *testing.T) { 28 | inputPath := "http://server.com/somepath/input.gz?somestuff" 29 | outputPath := "output" 30 | serviceFile := "service.csv" 31 | planID := int64(1) 32 | 33 | viper.Set("tmp.path", "/tmp") 34 | 35 | p := NewParsePipeline(inputPath, outputPath, serviceFile, planID) 36 | assert.Equal(t, len(p.Steps), 4) 37 | 38 | downloadStep, ok := p.Steps[0].(*DownloadStep) 39 | assert.True(t, ok) 40 | 41 | assert.Equal(t, downloadStep.URL, inputPath) 42 | assert.True(t, strings.HasPrefix(downloadStep.OutputPath, "/tmp")) 43 | 44 | tmpPath := downloadStep.OutputPath 45 | assert.False(t, strings.Contains(tmpPath, "?")) 46 | 47 | splitStep, ok := p.Steps[1].(*SplitStep) 48 | assert.True(t, ok) 49 | assert.Equal(t, splitStep.InputPath, tmpPath) 50 | assert.True(t, splitStep.Overwrite) 51 | 52 | parseStep, ok := p.Steps[2].(*ParseStep) 53 | assert.True(t, ok) 54 | assert.Equal(t, parseStep.OutputPath, outputPath) 55 | assert.Equal(t, parseStep.ServiceFile, serviceFile) 56 | assert.Equal(t, parseStep.PlanID, planID) 57 | 58 | cleanupStep, ok := p.Steps[3].(*CleanStep) 59 | assert.True(t, ok) 60 | assert.True(t, strings.HasPrefix(tmpPath, cleanupStep.TmpPath)) 61 | 62 | err := os.RemoveAll(tmpPath) 63 | assert.NoError(t, err) 64 | } 65 | -------------------------------------------------------------------------------- /pkg/mrfparse/split/split.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2023 Daniel Chalef 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package split 17 | 18 | import ( 19 | "github.com/danielchalef/mrfparse/pkg/mrfparse/utils" 20 | 21 | "github.com/danielchalef/jsplit/pkg/jsplit" 22 | ) 23 | 24 | // File splits a JSON document into multiple files. 25 | // It produces a root.json file for field elements in the root of the document, and 26 | // a file for each array element in the document root. Files are limited to 4GB each. 27 | func File(inputURI, outputURI string, overwrite bool) { 28 | err := jsplit.Split(inputURI, outputURI, overwrite) 29 | utils.ExitOnError(err) 30 | } 31 | -------------------------------------------------------------------------------- /pkg/mrfparse/utils/crypto.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2023 Daniel Chalef 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package utils 17 | 18 | import ( 19 | "crypto/sha256" 20 | "encoding/hex" 21 | 22 | "github.com/rs/xid" 23 | ) 24 | 25 | // GetUniqueID generates an xid, a fast, sortable globally unique id that is only 20 characters long. 26 | func GetUniqueID() string { 27 | guid := xid.New() 28 | 29 | return guid.String() 30 | } 31 | 32 | // Generate sha256sum for a string. Not intended to be cryptographically secure. 33 | func Sha256Sum(s string) string { 34 | h := sha256.New() 35 | h.Write([]byte(s)) 36 | 37 | return hex.EncodeToString(h.Sum(nil)) 38 | } 39 | -------------------------------------------------------------------------------- /pkg/mrfparse/utils/crypto_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2023 Daniel Chalef 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package utils 17 | 18 | import ( 19 | "testing" 20 | 21 | "github.com/stretchr/testify/assert" 22 | ) 23 | 24 | // test Sha256Sum 25 | func TestSha256Sum(t *testing.T) { 26 | s := "filename_test.gz" 27 | hash_expected := "cc13984a42a92b86c46c861655e91bda947325361fe6427a611be61053366877" 28 | 29 | hash := Sha256Sum(s) 30 | assert.Equal(t, hash_expected, hash) 31 | } 32 | -------------------------------------------------------------------------------- /pkg/mrfparse/utils/error.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2023 Daniel Chalef 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package utils 17 | 18 | import ( 19 | "os" 20 | "runtime" 21 | ) 22 | 23 | func ExitOnError(err error) { 24 | if err != nil { 25 | _, file, no, ok := runtime.Caller(1) 26 | if ok { 27 | log.Errorf("Fatal error in %s#%d: %s", file, no, err.Error()) 28 | } else { 29 | log.Errorf("Fatal error: %s", err.Error()) 30 | } 31 | 32 | os.Exit(1) 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /pkg/mrfparse/utils/func.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2023 Daniel Chalef 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package utils 17 | 18 | // filter returns a new slice containing all elements of slice that satisfy the predicate f. 19 | func Filter[T any](slice []T, f func(T) bool) []T { 20 | var n []T 21 | 22 | for _, e := range slice { 23 | if f(e) { 24 | n = append(n, e) 25 | } 26 | } 27 | 28 | return n 29 | } 30 | -------------------------------------------------------------------------------- /pkg/mrfparse/utils/func_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2023 Daniel Chalef 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package utils 17 | 18 | import ( 19 | "testing" 20 | 21 | "github.com/stretchr/testify/assert" 22 | ) 23 | 24 | // test Filter function 25 | 26 | func TestFilterInt(t *testing.T) { 27 | var slice = []int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10} 28 | 29 | var even = Filter(slice, func(i int) bool { 30 | return i%2 == 0 31 | }) 32 | 33 | assert.Equal(t, []int{2, 4, 6, 8, 10}, even) 34 | } 35 | 36 | func TestFilterString(t *testing.T) { 37 | var slice = []string{"a", "b", "c", "d", "e", "f", "g", "h", "i", "j"} 38 | 39 | var eplus = Filter(slice, func(s string) bool { 40 | return s > "d" 41 | }) 42 | 43 | assert.Equal(t, []string{"e", "f", "g", "h", "i", "j"}, eplus) 44 | } 45 | -------------------------------------------------------------------------------- /pkg/mrfparse/utils/json.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2023 Daniel Chalef 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package utils 17 | 18 | import ( 19 | "errors" 20 | "fmt" 21 | 22 | "github.com/minio/simdjson-go" 23 | ) 24 | 25 | // GetElementValue extracts the value at the given path in the Iter. Supports string, int64, float64 types. 26 | func GetElementValue[T string | int64 | float64](path string, iter *simdjson.Iter) (T, error) { 27 | var ret T 28 | 29 | e, err := iter.FindElement(nil, path) 30 | if err != nil { 31 | log.Tracef("Failed to find element: %s - %v", path, err) 32 | return ret, err 33 | } 34 | 35 | switch p := any(&ret).(type) { 36 | case *string: 37 | *p, err = e.Iter.StringCvt() 38 | 39 | case *int64: 40 | *p, err = e.Iter.Int() 41 | 42 | case *float64: 43 | *p, err = e.Iter.Float() 44 | } 45 | 46 | return ret, err 47 | } 48 | 49 | // GetArrayElementAsSlice extracts the array at the given path in the Iter, returning the array as a slice. 50 | // Supports string, int64, float64 slices. 51 | func GetArrayElementAsSlice[T string | int64 | float64](path string, iter *simdjson.Iter) ([]T, error) { 52 | var ret []T 53 | 54 | a, err := GetArrayForElement(path, iter) 55 | if err != nil { 56 | return ret, err 57 | } 58 | 59 | switch p := any(&ret).(type) { 60 | case *[]string: 61 | *p, err = a.AsStringCvt() 62 | 63 | case *[]int64: 64 | *p, err = a.AsInteger() 65 | 66 | case *[]float64: 67 | *p, err = a.AsFloat() 68 | } 69 | 70 | return ret, err 71 | } 72 | 73 | // GetArrayForElement extracts the array at the given path in the Iter, returning the simdjson.Array 74 | func GetArrayForElement(path string, iter *simdjson.Iter) (*simdjson.Array, error) { 75 | e, err := iter.FindElement(nil, path) 76 | if err != nil { 77 | log.Tracef("Failed to find array element: %s - %v", path, err) 78 | return nil, err 79 | } 80 | 81 | a, err := e.Iter.Array(nil) 82 | if err != nil { 83 | return nil, err 84 | } 85 | 86 | return a, nil 87 | } 88 | 89 | // TestElementNotPresent evaluates simdjson error for ErrPathNotFound and exits if the error is something else. 90 | // Otherwise, returns true if the element is missing. 91 | func TestElementNotPresent(err error, path string) bool { 92 | if err != nil { 93 | if errors.Is(err, simdjson.ErrPathNotFound) { 94 | log.Tracef("Element not found: %s", path) 95 | return true 96 | } 97 | 98 | ExitOnError(err) 99 | } 100 | 101 | return false 102 | } 103 | 104 | // CheckCPU checks that we're running on a CPU that supports the required SIMD instructions 105 | func CheckCPU() { 106 | if !simdjson.SupportedCPU() { 107 | ExitOnError(fmt.Errorf("unsupported cpu")) 108 | } 109 | } 110 | 111 | // ParseJSON parses []byte as Json document, while string is assumed to be NDJson 112 | func ParseJSON[T *[]byte | *string](s T, r *simdjson.ParsedJson) (*simdjson.ParsedJson, error) { 113 | switch p := any(s).(type) { 114 | case *[]byte: 115 | return simdParse(*p, r) 116 | case *string: 117 | return simdParseND([]byte(*p), r) 118 | default: 119 | return nil, fmt.Errorf("invalid type") 120 | } 121 | } 122 | -------------------------------------------------------------------------------- /pkg/mrfparse/utils/json_amd64.go: -------------------------------------------------------------------------------- 1 | //go:build amd64 && (linux || darwin) 2 | 3 | /* 4 | Copyright © 2023 Daniel Chalef 5 | 6 | Licensed under the Apache License, Version 2.0 (the "License"); 7 | you may not use this file except in compliance with the License. 8 | You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | package utils 19 | 20 | import "github.com/minio/simdjson-go" 21 | 22 | func simdParse(b []byte, r *simdjson.ParsedJson) (*simdjson.ParsedJson, error) { 23 | return simdjson.Parse(b, r) 24 | } 25 | 26 | func simdParseND(b []byte, r *simdjson.ParsedJson) (*simdjson.ParsedJson, error) { 27 | return simdjson.ParseND(b, r) 28 | } 29 | -------------------------------------------------------------------------------- /pkg/mrfparse/utils/json_other.go: -------------------------------------------------------------------------------- 1 | //go:build !amd64 || !(linux || darwin) 2 | 3 | /* 4 | Copyright © 2023 Daniel Chalef 5 | 6 | Licensed under the Apache License, Version 2.0 (the "License"); 7 | you may not use this file except in compliance with the License. 8 | You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | package utils 19 | 20 | import ( 21 | "github.com/kiwicom/fakesimdjson" 22 | "github.com/minio/simdjson-go" 23 | ) 24 | 25 | func simdParse(b []byte, _ *simdjson.ParsedJson) (*simdjson.ParsedJson, error) { 26 | return fakesimdjson.Parse(b) 27 | } 28 | 29 | func simdParseND(b []byte, _ *simdjson.ParsedJson) (*simdjson.ParsedJson, error) { 30 | return fakesimdjson.ParseND(b) 31 | } 32 | -------------------------------------------------------------------------------- /pkg/mrfparse/utils/json_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2023 Daniel Chalef 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package utils 17 | 18 | import ( 19 | "testing" 20 | 21 | "github.com/stretchr/testify/assert" 22 | ) 23 | 24 | func TestGetElementValue(t *testing.T) { 25 | js := []byte(`{"string_val": "12345", "int_val": 12345, "float_val": 0.12345}`) 26 | 27 | jp, err := ParseJSON(&js, nil) 28 | assert.NoError(t, err) 29 | 30 | iter := jp.Iter() 31 | 32 | // test string 33 | var retString string 34 | retString, err = GetElementValue[string]("string_val", &iter) 35 | assert.NoError(t, err) 36 | assert.Equal(t, "12345", retString) 37 | 38 | // test int64 39 | var retInt int64 40 | retInt, err = GetElementValue[int64]("int_val", &iter) 41 | assert.NoError(t, err) 42 | assert.Equal(t, int64(12345), retInt) 43 | 44 | // test float64 45 | var retFloat float64 46 | retFloat, err = GetElementValue[float64]("float_val", &iter) 47 | assert.NoError(t, err) 48 | assert.Equal(t, 0.12345, retFloat) 49 | 50 | // Test string with float value (we see provider_group_id as floats in MRFs, but they should be strings) 51 | retString, err = GetElementValue[string]("float_val", &iter) 52 | assert.NoError(t, err) 53 | assert.Equal(t, "0.12345", retString) 54 | } 55 | 56 | // test GetArrayElementAsSlice 57 | func TestGetArrayElementAsSlice(t *testing.T) { 58 | js := []byte(`{"string_vals": ["12345", "54321"], "int_vals": [12345, 54321], "float_vals": [0.12345, 0.54321]}`) 59 | jp, err := ParseJSON(&js, nil) 60 | assert.NoError(t, err) 61 | 62 | iter := jp.Iter() 63 | 64 | // test string 65 | var retString []string 66 | retString, err = GetArrayElementAsSlice[string]("string_vals", &iter) 67 | assert.NoError(t, err) 68 | assert.Equal(t, []string{"12345", "54321"}, retString) 69 | 70 | // test int64 71 | var retInt []int64 72 | retInt, err = GetArrayElementAsSlice[int64]("int_vals", &iter) 73 | assert.NoError(t, err) 74 | assert.Equal(t, []int64{12345, 54321}, retInt) 75 | 76 | // test float64 77 | var retFloat []float64 78 | retFloat, err = GetArrayElementAsSlice[float64]("float_vals", &iter) 79 | assert.NoError(t, err) 80 | assert.Equal(t, []float64{0.12345, 0.54321}, retFloat) 81 | } 82 | 83 | func TestTestElementNotPresent(t *testing.T) { 84 | js := []byte(`{"string_val": "12345", "int_val": 12345, "float_vals": [0.12345, 0.54321]}`) 85 | 86 | jp, err := ParseJSON(&js, nil) 87 | assert.NoError(t, err) 88 | 89 | iter := jp.Iter() 90 | 91 | path := "float_vals" 92 | _, err = iter.FindElement(nil, path) 93 | r := TestElementNotPresent(err, path) 94 | assert.False(t, r) 95 | 96 | path = "int_val" 97 | _, err = iter.FindElement(nil, path) 98 | r = TestElementNotPresent(err, path) 99 | assert.False(t, r) 100 | 101 | path = "missing_val" 102 | _, err = iter.FindElement(nil, path) 103 | r = TestElementNotPresent(err, path) 104 | assert.True(t, r) 105 | } 106 | 107 | // test GetArrayIterForElement 108 | func TestGetArrayIterForElement(t *testing.T) { 109 | js := []byte(`{"string_vals": ["abc", "def"], "int_vals": [12345, 54321], "float_vals": [0.12345, 0.54321]}`) 110 | path := "string_vals" 111 | jp, err := ParseJSON(&js, nil) 112 | assert.NoError(t, err) 113 | 114 | iter := jp.Iter() 115 | 116 | // test string 117 | a, err := GetArrayForElement(path, &iter) 118 | assert.NoError(t, err) 119 | 120 | ret, err := a.AsString() 121 | assert.NoError(t, err) 122 | 123 | expectedRet := []string{"abc", "def"} 124 | 125 | assert.Equal(t, expectedRet, ret) 126 | 127 | } 128 | -------------------------------------------------------------------------------- /pkg/mrfparse/utils/logger.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2023 Daniel Chalef 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package utils 17 | 18 | import ( 19 | "os" 20 | "sync" 21 | 22 | "github.com/sirupsen/logrus" 23 | "github.com/spf13/viper" 24 | ) 25 | 26 | var once sync.Once 27 | var logger *logrus.Logger 28 | 29 | var log = GetLogger() 30 | 31 | func GetLogger() *logrus.Logger { 32 | var level logrus.Level 33 | 34 | level = logrus.InfoLevel 35 | 36 | if viper.IsSet("log.level") { 37 | switch viper.GetString("log.level") { 38 | case "debug": 39 | level = logrus.DebugLevel 40 | case "warn": 41 | level = logrus.WarnLevel 42 | case "error": 43 | level = logrus.ErrorLevel 44 | case "trace": 45 | level = logrus.TraceLevel 46 | } 47 | } 48 | 49 | // Use a singleton so we can update log level once config is loaded 50 | once.Do(func() { 51 | logger = logrus.New() 52 | }) 53 | 54 | logger.Out = os.Stdout 55 | logger.SetLevel(level) 56 | 57 | logger.SetFormatter(&logrus.TextFormatter{ 58 | DisableColors: false, 59 | FullTimestamp: true, 60 | }) 61 | 62 | return logger 63 | } 64 | -------------------------------------------------------------------------------- /pkg/mrfparse/utils/logger_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2023 Daniel Chalef 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package utils 17 | 18 | import ( 19 | "testing" 20 | 21 | "github.com/sirupsen/logrus" 22 | "github.com/spf13/viper" 23 | "github.com/stretchr/testify/assert" 24 | ) 25 | 26 | // test logger config 27 | func TestLoggerConfig(t *testing.T) { 28 | log := GetLogger() 29 | assert.Equal(t, log.Level, logrus.InfoLevel) 30 | 31 | viper.Set("log.level", "debug") 32 | log = GetLogger() 33 | assert.Equal(t, log.Level, logrus.DebugLevel) 34 | 35 | viper.Set("log.level", "info") 36 | log = GetLogger() 37 | assert.Equal(t, log.Level, logrus.InfoLevel) 38 | 39 | viper.Set("log.level", "error") 40 | log = GetLogger() 41 | assert.Equal(t, log.Level, logrus.ErrorLevel) 42 | 43 | viper.Set("log.level", "warn") 44 | log = GetLogger() 45 | assert.Equal(t, log.Level, logrus.WarnLevel) 46 | } 47 | -------------------------------------------------------------------------------- /pkg/mrfparse/utils/timer.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2023 Daniel Chalef 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package utils 17 | 18 | import ( 19 | "math" 20 | "sort" 21 | "time" 22 | ) 23 | 24 | // Time execution of a function 25 | type wrapped func() 26 | 27 | func Timed(fn wrapped) int64 { 28 | start := time.Now().Unix() 29 | 30 | fn() 31 | 32 | end := time.Now().Unix() 33 | 34 | return end - start 35 | } 36 | 37 | type TimedOperation func() time.Duration 38 | 39 | // timeOperation performs the given operation and returns the duration of the operation 40 | // in nanoseconds. 41 | func timeOperation(op TimedOperation) time.Duration { 42 | start := time.Now() 43 | 44 | op() 45 | 46 | return time.Since(start) 47 | } 48 | 49 | // measureExecutionTimes takes a TimedOperation and a number of iterations as arguments 50 | // and returns the mean, median, and variance of the execution times. 51 | func MeasureExecutionTimes(op TimedOperation, iterations int) (mean, median, variance int64) { 52 | times := make([]int64, iterations) 53 | 54 | // Perform the operation and record the execution time for each iteration. 55 | for i := 0; i < iterations; i++ { 56 | times[i] = timeOperation(op).Nanoseconds() 57 | } 58 | 59 | // Calculate the mean execution time. 60 | var sum int64 61 | for _, t := range times { 62 | sum += t 63 | } 64 | 65 | meanF := float64(sum) / float64(iterations) 66 | 67 | // convert mean to an int64 68 | mean = int64(meanF) 69 | 70 | // Calculate the variance of the execution times. 71 | var varianceF float64 72 | for _, t := range times { 73 | varianceF += math.Pow(float64(t)-meanF, 2) 74 | } 75 | 76 | varianceF /= float64(iterations) 77 | variance = int64(varianceF) 78 | 79 | // Calculate the median execution time. 80 | sort.Slice(times, func(i, j int) bool { return times[i] < times[j] }) 81 | median = times[iterations/2] 82 | 83 | return mean, median, variance 84 | } 85 | --------------------------------------------------------------------------------