├── .dockerignore ├── .github └── workflows │ ├── build.yml │ ├── checks.yml │ ├── docs.yml │ ├── release.yml │ ├── tests.yml │ └── update_dev_docs.yml ├── .gitignore ├── CNAME ├── LICENSE ├── Makefile ├── README.md ├── cmd └── greenmask │ ├── cmd │ ├── delete │ │ ├── delete_dump.go │ │ └── domains.go │ ├── dump │ │ └── dump.go │ ├── list_dumps │ │ └── list_dumps.go │ ├── list_transformers │ │ └── list_transformers.go │ ├── restore │ │ └── restore.go │ ├── root.go │ ├── show_dump │ │ └── show_dump.go │ ├── show_transformer │ │ └── show_transformer.go │ └── validate │ │ └── validate.go │ └── main.go ├── config.yml.example ├── docker-compose-integration.yml ├── docker-compose.yml ├── docker ├── greenmask │ └── Dockerfile ├── integration │ ├── filldb │ │ ├── .dockerignore │ │ ├── Dockerfile │ │ ├── filldb.sh │ │ └── generated.sql │ └── tests │ │ └── Dockerfile └── playground │ └── filldb │ ├── Dockerfile │ └── filldb.sh ├── docs ├── architecture.md ├── assets │ ├── built_in_transformers │ │ ├── img.png │ │ ├── orders-schema.png │ │ └── person-person-schema.png │ ├── list_dumps_screen.png │ ├── list_transformers_screen_1.png │ ├── list_transformers_screen_2.png │ ├── logo.png │ ├── schema.png │ ├── show_transformer.png │ ├── validate_horizontal_diff.png │ └── validate_vertical_diff.png ├── built_in_transformers │ ├── advanced_transformers │ │ ├── custom_functions │ │ │ ├── core_functions.md │ │ │ ├── faker_function.md │ │ │ └── index.md │ │ ├── index.md │ │ ├── json.md │ │ ├── template.md │ │ └── template_record.md │ ├── dynamic_parameters.md │ ├── index.md │ ├── parameters_templating.md │ ├── standard_transformers │ │ ├── cmd.md │ │ ├── dict.md │ │ ├── hash.md │ │ ├── index.md │ │ ├── masking.md │ │ ├── noise_date.md │ │ ├── noise_float.md │ │ ├── noise_int.md │ │ ├── noise_numeric.md │ │ ├── random_amount_with_currency.md │ │ ├── random_bool.md │ │ ├── random_cc_number.md │ │ ├── random_cc_type.md │ │ ├── random_century.md │ │ ├── random_choice.md │ │ ├── random_company.md │ │ ├── random_currency.md │ │ ├── random_date.md │ │ ├── random_day_of_month.md │ │ ├── random_day_of_week.md │ │ ├── random_domain_name.md │ │ ├── random_e164_phone_number.md │ │ ├── random_email.md │ │ ├── random_float.md │ │ ├── random_int.md │ │ ├── random_ip.md │ │ ├── random_latitude.md │ │ ├── random_longitude.md │ │ ├── random_mac.md │ │ ├── random_month_name.md │ │ ├── random_numeric.md │ │ ├── random_paragraph.md │ │ ├── random_password.md │ │ ├── random_person.md │ │ ├── random_phone_number.md │ │ ├── random_sentence.md │ │ ├── random_string.md │ │ ├── random_timezone.md │ │ ├── random_toll_free_phone_number.md │ │ ├── random_unix_timestamp.md │ │ ├── random_url.md │ │ ├── random_username.md │ │ ├── random_uuid.md │ │ ├── random_word.md │ │ ├── random_year_string.md │ │ ├── real_address.md │ │ ├── regexp_replace.md │ │ ├── replace.md │ │ └── set_null.md │ ├── transformation_condition.md │ ├── transformation_engines.md │ └── transformation_inheritance.md ├── commands │ ├── delete.md │ ├── dump.md │ ├── index.md │ ├── list-dumps.md │ ├── list-transformers.md │ ├── restore.md │ ├── show-dump.md │ ├── show-transformer.md │ └── validate.md ├── configuration.md ├── database_subset.md ├── index.md ├── installation.md ├── overrides │ └── main.html ├── playground.md └── release_notes │ ├── greenmask_0_1_0.md │ ├── greenmask_0_1_0_beta.md │ ├── greenmask_0_1_1.md │ ├── greenmask_0_1_10.md │ ├── greenmask_0_1_11.md │ ├── greenmask_0_1_12.md │ ├── greenmask_0_1_13.md │ ├── greenmask_0_1_14.md │ ├── greenmask_0_1_2.md │ ├── greenmask_0_1_3.md │ ├── greenmask_0_1_4.md │ ├── greenmask_0_1_5.md │ ├── greenmask_0_1_6.md │ ├── greenmask_0_1_7.md │ ├── greenmask_0_1_8.md │ ├── greenmask_0_1_9.md │ ├── greenmask_0_2_0.md │ ├── greenmask_0_2_0_b1.md │ ├── greenmask_0_2_0_b2.md │ ├── greenmask_0_2_1.md │ ├── greenmask_0_2_10.md │ ├── greenmask_0_2_11.md │ ├── greenmask_0_2_12.md │ ├── greenmask_0_2_2.md │ ├── greenmask_0_2_3.md │ ├── greenmask_0_2_4.md │ ├── greenmask_0_2_5.md │ ├── greenmask_0_2_6.md │ ├── greenmask_0_2_7.md │ ├── greenmask_0_2_8.md │ └── greenmask_0_2_9.md ├── go.mod ├── go.sum ├── internal ├── db │ └── postgres │ │ ├── cmd │ │ ├── dump.go │ │ ├── restore.go │ │ ├── show_dump.go │ │ ├── validate.go │ │ └── validate_utils │ │ │ ├── json_document.go │ │ │ ├── json_document_test.go │ │ │ ├── text_document.go │ │ │ └── utils.go │ │ ├── context │ │ ├── config_builder.go │ │ ├── config_builder_test.go │ │ ├── context.go │ │ ├── context_test.go │ │ ├── pg_catalog.go │ │ ├── pg_catalog_test.go │ │ ├── queries.go │ │ ├── schema.go │ │ ├── tables_introspection.go │ │ ├── transformers.go │ │ ├── types.go │ │ └── virtual_references.go │ │ ├── dumpers │ │ ├── dumpers.go │ │ ├── errors.go │ │ ├── large_object.go │ │ ├── pipeliner.go │ │ ├── plain_dump_pipeline.go │ │ ├── sequence.go │ │ ├── table.go │ │ ├── transformation_pipeline.go │ │ ├── transformation_pipeline_test.go │ │ ├── transformation_window.go │ │ ├── transformation_window_test.go │ │ └── validation_pipeline.go │ │ ├── entries │ │ ├── entry_producer.go │ │ ├── large_object.go │ │ ├── sequence.go │ │ ├── sequence_test.go │ │ └── table.go │ │ ├── pgcopy │ │ ├── common.go │ │ ├── decoder.go │ │ ├── decoder_test.go │ │ ├── encoder.go │ │ ├── encoder_test.go │ │ ├── integration_test.go │ │ ├── row.go │ │ └── row_test.go │ │ ├── pgdump │ │ ├── adapter.go │ │ ├── adapter_test.go │ │ └── pgdump.go │ │ ├── pgrestore │ │ ├── pgrestore.go │ │ └── script.go │ │ ├── restorers │ │ ├── base.go │ │ ├── base_test.go │ │ ├── blobs.go │ │ ├── blobs_test.go │ │ ├── sequence.go │ │ ├── sequence_test.go │ │ ├── table.go │ │ ├── table_insert_format.go │ │ ├── table_insert_format_test.go │ │ └── table_test.go │ │ ├── storage │ │ └── metadata_json.go │ │ ├── subset │ │ ├── component.go │ │ ├── component_link.go │ │ ├── component_test.go │ │ ├── condenced_edge.go │ │ ├── cte.go │ │ ├── cycle_edge.go │ │ ├── edge.go │ │ ├── graph.go │ │ ├── path.go │ │ ├── query.go │ │ ├── scope_edge.go │ │ ├── set_queries.go │ │ └── table_link.go │ │ ├── toc │ │ ├── common.go │ │ ├── entry.go │ │ ├── header.go │ │ ├── reader.go │ │ ├── toc.go │ │ ├── utils.go │ │ └── writer.go │ │ ├── transformers │ │ ├── cmd.go │ │ ├── column_context.go │ │ ├── custom │ │ │ ├── bootatrsp_transformers.go │ │ │ ├── custom_cmd.go │ │ │ ├── custom_cmd_test.go │ │ │ ├── custom_transformer_definition.go │ │ │ ├── dynamic_definition.go │ │ │ ├── dynamic_definition_test.go │ │ │ └── test │ │ │ │ └── example_transformer.sh │ │ ├── data.go │ │ ├── default_params.go │ │ ├── dict.go │ │ ├── dict_test.go │ │ ├── email.go │ │ ├── email_test.go │ │ ├── hash.go │ │ ├── hash_test.go │ │ ├── json.go │ │ ├── json_context.go │ │ ├── json_test.go │ │ ├── masking.go │ │ ├── masking_test.go │ │ ├── noise_date.go │ │ ├── noise_date_test.go │ │ ├── noise_float.go │ │ ├── noise_float_test.go │ │ ├── noise_int.go │ │ ├── noise_int_test.go │ │ ├── noise_numeric.go │ │ ├── noise_numeric_test.go │ │ ├── random_bool.go │ │ ├── random_bool_test.go │ │ ├── random_choice.go │ │ ├── random_choice_test.go │ │ ├── random_company.go │ │ ├── random_company_test.go │ │ ├── random_date.go │ │ ├── random_date_test.go │ │ ├── random_faker.go │ │ ├── random_float.go │ │ ├── random_float_test.go │ │ ├── random_int.go │ │ ├── random_int_test.go │ │ ├── random_ip.go │ │ ├── random_ip_test.go │ │ ├── random_mac.go │ │ ├── random_mac_test.go │ │ ├── random_numeric.go │ │ ├── random_numeric_test.go │ │ ├── random_person.go │ │ ├── random_person_test.go │ │ ├── random_string.go │ │ ├── random_string_test.go │ │ ├── random_unix_timestamp.go │ │ ├── random_unix_timestamp_test.go │ │ ├── random_uuid.go │ │ ├── random_uuid_test.go │ │ ├── real_address.go │ │ ├── real_address_test.go │ │ ├── regexp_replace.go │ │ ├── regexp_replace_test.go │ │ ├── replace.go │ │ ├── replace_test.go │ │ ├── set_null.go │ │ ├── set_null_test.go │ │ ├── template.go │ │ ├── template_record.go │ │ ├── template_record_test.go │ │ ├── template_test.go │ │ ├── tesing_helpers_test.go │ │ ├── test │ │ │ └── cmd_test.sh │ │ ├── utils.go │ │ └── utils │ │ │ ├── cmd_transformer_base.go │ │ │ ├── definition.go │ │ │ ├── definition_test.go │ │ │ ├── properties.go │ │ │ ├── registry.go │ │ │ ├── schema_validation.go │ │ │ └── transformer.go │ │ └── utils │ │ ├── connector.go │ │ └── connector_test.go ├── domains │ ├── config.go │ └── virtual_references.go ├── generators │ ├── generator.go │ ├── hash.go │ ├── hash_reducer.go │ ├── hybrid.go │ ├── hybrid_test.go │ ├── murmur.go │ ├── projector.go │ ├── random_bytes.go │ ├── random_int64.go │ ├── random_int64_test.go │ ├── siphash.go │ ├── transformers │ │ ├── noise_float64.go │ │ ├── noise_int64.go │ │ ├── noise_int64_test.go │ │ ├── noise_numeric.go │ │ ├── noise_timestamp.go │ │ ├── noise_timestamp_test.go │ │ ├── random_boolean.go │ │ ├── random_choice.go │ │ ├── random_choice_test.go │ │ ├── random_company.go │ │ ├── random_company_test.go │ │ ├── random_float64.go │ │ ├── random_float64_test.go │ │ ├── random_int64.go │ │ ├── random_int64_test.go │ │ ├── random_ip.go │ │ ├── random_ip_test.go │ │ ├── random_mac.go │ │ ├── random_mac_test.go │ │ ├── random_numeric.go │ │ ├── random_numeric_test.go │ │ ├── random_person.go │ │ ├── random_person_test.go │ │ ├── random_string.go │ │ ├── random_string_test.go │ │ ├── random_timestamp.go │ │ ├── random_timestamp_test.go │ │ ├── random_uuid.go │ │ ├── random_uuid_test.go │ │ └── tramsformer.go │ ├── utils.go │ └── utils_test.go ├── storages │ ├── builder │ │ └── builder.go │ ├── directory │ │ ├── config.go │ │ ├── directiry_test.go │ │ └── directory.go │ ├── domains │ │ └── domains.go │ ├── s3 │ │ ├── config.go │ │ ├── logger.go │ │ └── s3.go │ ├── storager.go │ └── utils.go └── utils │ ├── cmd_runner │ └── cmd_runner.go │ ├── config │ ├── mapstructure_hook.go │ └── viper_workaround.go │ ├── context.go │ ├── context_test.go │ ├── dumpstatus │ └── status.go │ ├── ioutils │ ├── count_reader.go │ ├── count_writer.go │ ├── gzip_reader.go │ ├── gzip_reader_test.go │ ├── gzip_writer.go │ ├── gzip_writer_test.go │ ├── pipe.go │ └── utils.go │ ├── logger │ └── logger.go │ ├── pgerrors │ └── wrapper.go │ ├── reader │ └── reader.go │ ├── strings │ ├── strings.go │ └── strings_test.go │ └── testutils │ ├── containers.go │ └── storage.go ├── mkdocs.yml ├── pkg └── toolkit │ ├── cmd.go │ ├── column.go │ ├── common.go │ ├── constraints.go │ ├── csv_api.go │ ├── database_schema.go │ ├── definition.go │ ├── driver.go │ ├── dynamic_parameter.go │ ├── dynamic_parameter_test.go │ ├── expr.go │ ├── expt_test.go │ ├── interaction_api.go │ ├── json_api.go │ ├── json_record_with_attr_names_binary.go │ ├── json_record_with_attr_names_text.go │ ├── meta.go │ ├── parameter_definition.go │ ├── parameter_definition_test.go │ ├── parametrizer.go │ ├── raw_record.go │ ├── raw_record_csv.go │ ├── raw_record_str.go │ ├── raw_record_test.go │ ├── raw_record_text.go │ ├── record.go │ ├── record_test.go │ ├── row_driver.go │ ├── static_parameter.go │ ├── static_parameter_context.go │ ├── static_parameter_test.go │ ├── table.go │ ├── template_functions.go │ ├── template_functions_test.go │ ├── template_record_context.go │ ├── tesing_helpers.go │ ├── testutils.go │ ├── testutils │ └── testutils.go │ ├── text_api.go │ ├── transformation_funcs.go │ ├── transformation_funcs_test.go │ ├── transformer.go │ ├── type_cast_functions.go │ ├── type_caster_static_funtions.go │ ├── types.go │ ├── utils.go │ ├── validation_warning.go │ └── values.go ├── playground ├── .pgpass ├── cleanup.sh ├── config.yml └── run.sh ├── requirements.txt └── tests ├── debug_utils └── toc │ └── main.go ├── external_transformer └── test.go └── integration ├── greenmask ├── args.go ├── backward_compatibility_test.go ├── main_test.go └── toc_readwriter_test.go └── storages ├── args.go ├── main_test.go └── s3_test.go /.dockerignore: -------------------------------------------------------------------------------- 1 | docker 2 | docs 3 | config.* 4 | docker-compose-* 5 | *.md 6 | .idea 7 | -------------------------------------------------------------------------------- /.github/workflows/checks.yml: -------------------------------------------------------------------------------- 1 | name: PR and main branch checks 2 | 3 | on: 4 | workflow_dispatch: 5 | push: 6 | branches: 7 | - 'main' 8 | paths-ignore: 9 | - 'README.md' 10 | - 'docs/**' 11 | - 'mkdocs.yml' 12 | - 'LICENSE' 13 | - 'getting_started.md' 14 | - 'docker-compose.yml' 15 | - 'playground/**' 16 | - 'CNAME' 17 | - 'requirements.txt' 18 | pull_request: 19 | paths-ignore: 20 | - 'README.md' 21 | - 'docs/**' 22 | - 'mkdocs.yml' 23 | - 'LICENSE' 24 | - 'getting_started.md' 25 | - 'docker-compose.yml' 26 | - 'playground/**' 27 | - 'CNAME' 28 | - 'requirements.txt' 29 | 30 | env: 31 | go-version: '1.24' 32 | golangci-lint-version: v1.64 33 | 34 | permissions: 35 | contents: read 36 | 37 | jobs: 38 | tests: 39 | uses: ./.github/workflows/tests.yml 40 | 41 | build: 42 | uses: ./.github/workflows/build.yml 43 | needs: 44 | - tests 45 | 46 | code_check: 47 | name: lint 48 | runs-on: ubuntu-latest 49 | needs: 50 | - tests 51 | steps: 52 | - uses: actions/checkout@v4 53 | with: 54 | fetch-depth: 0 55 | 56 | - uses: actions/setup-go@v5 57 | with: 58 | go-version: ${{ env.go-version }} 59 | 60 | - uses: actions/cache/restore@v4 61 | id: restore-cache 62 | with: 63 | path: coverage.out 64 | key: ${{ runner.os }}-coverage-${{ github.sha }} 65 | 66 | - name: golangci-lint 67 | uses: golangci/golangci-lint-action@v6 68 | with: 69 | version: ${{ env.golangci-lint-version }} 70 | -------------------------------------------------------------------------------- /.github/workflows/docs.yml: -------------------------------------------------------------------------------- 1 | name: Deploy documentation 2 | 3 | on: 4 | workflow_call: 5 | 6 | env: 7 | python-version: 'pypy3.10' 8 | 9 | jobs: 10 | deploy-docs: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - name: Checkout repository 14 | uses: actions/checkout@v4 15 | with: 16 | fetch-depth: 0 17 | 18 | - name: Setup python 19 | uses: actions/setup-python@v5 20 | with: 21 | python-version: ${{ env.python-version }} 22 | 23 | - name: Install dependicies 24 | run: pip install -r requirements.txt 25 | 26 | - name: Setup docs deploy 27 | run: | 28 | git config --global user.name "Greenmask CI" 29 | git config --global user.email ci@greenmask.io 30 | 31 | - name: Build dev docs 32 | if: github.ref == 'refs/heads/main' 33 | run: mike deploy --push dev 34 | 35 | - name: Get Git tag 36 | if: startsWith(github.ref, 'refs/tags/v') 37 | run: echo "TAG=$(git tag --points-at HEAD)" >> $GITHUB_ENV 38 | 39 | - name: Build release docs 40 | if: startsWith(github.ref, 'refs/tags/v') 41 | run: | 42 | if [[ "${{ env.TAG }}" == *"rc"* || "${{ env.TAG }}" == *"dev"* || "${{ env.TAG }}" == *"pre"* || "${{ env.TAG }}" == *"beta"* || "${{ env.TAG }}" == *"b"* ]]; then 43 | mike deploy --push ${{ env.TAG }} 44 | else 45 | mike deploy --push --update-aliases ${{ env.TAG }} latest 46 | mike set-default --push latest 47 | fi 48 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Create Greenmask release 2 | 3 | on: 4 | workflow_dispatch: 5 | push: 6 | tags: 7 | - 'v*' 8 | 9 | jobs: 10 | tests: 11 | uses: ./.github/workflows/tests.yml 12 | 13 | build: 14 | uses: ./.github/workflows/build.yml 15 | needs: 16 | - tests 17 | secrets: inherit 18 | 19 | docs: 20 | uses: ./.github/workflows/docs.yml 21 | needs: 22 | - build 23 | -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: Run unit and integration tests 2 | 3 | on: 4 | workflow_call: 5 | 6 | env: 7 | go-version: '1.24' 8 | 9 | jobs: 10 | tests: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - name: Checkout 14 | uses: actions/checkout@v4 15 | 16 | - name: Setup Go 17 | uses: actions/setup-go@v5 18 | with: 19 | go-version: ${{ env.go-version }} 20 | 21 | - name: Set up QEMU 22 | uses: docker/setup-qemu-action@v3 23 | 24 | - name: Set up Docker Buildx 25 | uses: docker/setup-buildx-action@v3 26 | 27 | - name: Run unit tests 28 | run: make coverage 29 | 30 | - uses: actions/cache/save@v4 31 | id: coverage_file_cache 32 | with: 33 | path: coverage.out 34 | key: ${{ runner.os }}-coverage-${{ github.sha }} 35 | 36 | - name: Run integration tests 37 | run: | 38 | docker compose -f docker-compose-integration.yml -p greenmask up \ 39 | --renew-anon-volumes --force-recreate --build --exit-code-from greenmask \ 40 | --abort-on-container-exit greenmask 41 | -------------------------------------------------------------------------------- /.github/workflows/update_dev_docs.yml: -------------------------------------------------------------------------------- 1 | name: Deploy development documentation (from main branch) 2 | 3 | on: 4 | workflow_dispatch: 5 | push: 6 | branches: 7 | - 'main' 8 | paths: 9 | - 'docs/**' 10 | 11 | jobs: 12 | docs: 13 | uses: ./.github/workflows/docs.yml 14 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Binaries for programs and plugins 2 | *.exe 3 | *.exe~ 4 | *.dll 5 | *.so 6 | *.dylib 7 | 8 | # Test binary, built with `go test -c` 9 | *.test 10 | 11 | # Output of the go coverage tool, specifically when used with LiteIDE 12 | *.out 13 | 14 | # Dependency directories (remove the comment below to include it) 15 | vendor/ 16 | 17 | # IDEs directories 18 | .idea 19 | .vscode 20 | scratch 21 | config.yml 22 | !playground/config.yml 23 | 24 | # Project exclusion 25 | site 26 | venv 27 | .cache 28 | # Binaries 29 | cmd/greenmask/greenmask 30 | -------------------------------------------------------------------------------- /CNAME: -------------------------------------------------------------------------------- 1 | docs.greenmask.io -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | MAIN_PATH := ./cmd/greenmask/ 2 | CMD_NAME := greenmask 3 | CMD_FILES = $(wildcard *.go) 4 | TEST_FILES = $(wildcard *.go) 5 | COVERAGE_FILE := coverage.out 6 | VERSION ?= $(shell git tag --points-at HEAD) 7 | LDFLAGS ?= -X github.com/greenmaskio/greenmask/cmd/greenmask/cmd.Version=$(VERSION) 8 | 9 | .PHONY: build 10 | 11 | tests: unittest 12 | 13 | unittest: 14 | go list ./... | grep -E 'internal|pkg' | xargs go test -v 15 | 16 | coverage: 17 | go list ./... | grep -E 'internal|pkg' | xargs go test -v -coverprofile=$(COVERAGE_FILE) | grep -v 'no test files' 18 | go tool cover -html=$(COVERAGE_FILE) 19 | 20 | install: 21 | mv $(MAIN_PATH)/$(CMD_NAME) $(GOBIN)/$(CMD_NAME) 22 | 23 | # The build flag -tags=viper_bind_struct has been added to avoid the need to bind each of the environment variables 24 | build: $(CMD_FILES) 25 | CGO_ENABLED=0 go build -tags=viper_bind_struct -ldflags="$(LDFLAGS)" -v -o $(CMD_NAME) $(MAIN_PATH) 26 | 27 | lint: 28 | golangci-lint run ./... 29 | 30 | up: 31 | docker-compose up playground-dbs-filler 32 | -------------------------------------------------------------------------------- /cmd/greenmask/cmd/delete/domains.go: -------------------------------------------------------------------------------- 1 | package delete 2 | 3 | import "time" 4 | 5 | type StorageResponse struct { 6 | Valid []*Dump 7 | Failed []*Dump 8 | UnknownOrFailed []*Dump 9 | } 10 | 11 | type Dump struct { 12 | DumpId string 13 | Date time.Time 14 | Status string 15 | Database string 16 | } 17 | -------------------------------------------------------------------------------- /cmd/greenmask/main.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Greenmask 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package main 16 | 17 | import ( 18 | "github.com/rs/zerolog/log" 19 | 20 | "github.com/greenmaskio/greenmask/cmd/greenmask/cmd" 21 | ) 22 | 23 | func main() { 24 | if err := cmd.Execute(); err != nil { 25 | log.Fatal().Err(err).Msg("") 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /docker/greenmask/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG GO_VERSION=1.24 2 | ARG DEBIAN_RELEASE=bookworm 3 | 4 | FROM --platform=$BUILDPLATFORM golang:${GO_VERSION}-${DEBIAN_RELEASE} AS build 5 | 6 | ARG TARGETOS 7 | ARG TARGETARCH 8 | ARG TARGETVARIANT 9 | 10 | WORKDIR /var/lib/greenmask 11 | 12 | COPY . . 13 | 14 | RUN GOOS=${TARGETOS} GOARCH=${TARGETARCH} GOARM=$(echo ${TARGETVARIANT} | cut -d 'v' -f 2) make build 15 | 16 | FROM debian:${DEBIAN_RELEASE}-slim 17 | 18 | ENV DEBIAN_FRONTEND=noninteractive 19 | 20 | RUN apt-get update \ 21 | && apt-get install -y wget gnupg2 bash-completion \ 22 | && echo "deb https://apt.postgresql.org/pub/repos/apt bookworm-pgdg main" > /etc/apt/sources.list.d/pgdg.list \ 23 | && wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | apt-key add - \ 24 | && apt-get update \ 25 | && apt-get install --no-install-recommends --no-install-suggests -y \ 26 | postgresql-client-17 \ 27 | postgresql-client-16 \ 28 | postgresql-client-15 \ 29 | postgresql-client-14 \ 30 | postgresql-client-13 \ 31 | postgresql-client-12 \ 32 | postgresql-client-11 \ 33 | && rm -rf /var/lib/apt/lists/* 34 | 35 | COPY --from=build /var/lib/greenmask/greenmask /usr/bin 36 | 37 | RUN mkdir /home/greenmask \ 38 | && groupadd -g 10001 greenmask \ 39 | && useradd -u 10000 -g greenmask greenmask \ 40 | && chown -R greenmask:greenmask /home/greenmask 41 | 42 | USER greenmask:greenmask 43 | 44 | RUN mkdir ~/.bash_completions \ 45 | && greenmask completion bash > ~/.bash_completions/greenmask.bash \ 46 | && echo 'source /etc/bash_completion' >> ~/.bashrc \ 47 | && echo 'source ~/.bash_completions/greenmask.bash' >> ~/.bashrc 48 | 49 | WORKDIR /home/greenmask 50 | 51 | ENTRYPOINT ["greenmask"] 52 | -------------------------------------------------------------------------------- /docker/integration/filldb/.dockerignore: -------------------------------------------------------------------------------- 1 | Dockerfile 2 | -------------------------------------------------------------------------------- /docker/integration/filldb/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM postgres:latest 2 | 3 | ARG DEBIAN_FRONTEND=noninteractive 4 | 5 | ENV PGPASSWORD=example 6 | ENV FILE_DUMP="demo-big-en.zip" 7 | ENV TMP_DIR=/tmp/schema 8 | 9 | RUN apt-get update && apt-get install -y wget && mkdir /tmp/schema 10 | 11 | COPY . / 12 | 13 | RUN chmod +x ./filldb.sh 14 | 15 | CMD ["./filldb.sh"] 16 | -------------------------------------------------------------------------------- /docker/integration/filldb/filldb.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2023 Greenmask 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | cd $TMP_DIR 17 | if [ ! -f $FILE_DUMP ]; then 18 | echo "Downloading dump file" 19 | wget https://edu.postgrespro.com/$FILE_DUMP 20 | fi 21 | IFS="," read -ra PG_VERSIONS_CHECK <<< "${PG_VERSIONS_CHECK}" 22 | for pgver in ${PG_VERSIONS_CHECK[@]}; do 23 | echo "Restoring database for PostgreSQL $pgver" 24 | if psql -p 5432 -h db-$pgver -U postgres -c 'CREATE DATABASE demo;'; then 25 | psql -p 5432 -h db-$pgver -U postgres -c 'DROP DATABASE demo_restore;' 26 | psql -p 5432 -h db-$pgver -U postgres -c 'CREATE DATABASE demo_restore;' 27 | gzip -dc $FILE_DUMP | psql -p 5432 -h db-$pgver -U postgres -d demo 28 | if [ $pgver -ne '11' ]; then 29 | psql -p 5432 -h db-$pgver -U postgres -d demo -f /generated.sql 30 | fi 31 | fi 32 | done 33 | -------------------------------------------------------------------------------- /docker/integration/filldb/generated.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE public.people 2 | ( 3 | id integer GENERATED ALWAYS AS IDENTITY PRIMARY KEY, 4 | generated text GENERATED ALWAYS AS (id || first_name) STORED, 5 | first_name text 6 | ); 7 | 8 | INSERT INTO public.people("first_name") 9 | VALUES ('bob'); 10 | -------------------------------------------------------------------------------- /docker/playground/filldb/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM postgres:latest 2 | 3 | ARG DEBIAN_FRONTEND=noninteractive 4 | 5 | ENV PGPASSWORD=example 6 | ENV TMP_DIR=/tmp/schema 7 | ENV DEMODB_URL="https://github.com/morenoh149/postgresDBSamples" 8 | ENV ORIGINAL_DB_NAME="original" 9 | ENV TRANSFORMED_DB_NAME="transformed" 10 | ENV DBHOST="db" 11 | ENV DBUSER="postgres" 12 | ENV DBPASSWORD="example" 13 | 14 | RUN apt-get update && apt-get install -y wget git && mkdir /tmp/schema 15 | 16 | RUN cd $TMP_DIR && git clone $DEMODB_URL && cd $TMP_DIR/postgresDBSamples/adventureworks 17 | 18 | COPY filldb.sh /filldb.sh 19 | 20 | RUN chmod +x ./filldb.sh 21 | 22 | CMD ["./filldb.sh"] 23 | -------------------------------------------------------------------------------- /docker/playground/filldb/filldb.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2023 Greenmask 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | cd $TMP_DIR/postgresDBSamples/adventureworks || exit 17 | 18 | if ! psql -lqt -p 5432 -h playground-db -U postgres | cut -d \| -f 1 | grep -qw $ORIGINAL_DB_NAME; then 19 | psql -p 5432 -h playground-db -U postgres -c "CREATE DATABASE $ORIGINAL_DB_NAME;" 20 | psql -p 5432 -h playground-db -U postgres -d $ORIGINAL_DB_NAME < install.sql 21 | else 22 | echo "database \"$ORIGINAL_DB_NAME\" has been already created: skipping" 23 | fi 24 | 25 | if ! psql -lqt -p 5432 -h playground-db -U postgres | cut -d \| -f 1 | grep -qw $TRANSFORMED_DB_NAME; then 26 | psql -p 5432 -h playground-db -U postgres -c "CREATE DATABASE $TRANSFORMED_DB_NAME;" 27 | else 28 | echo "database \"$TRANSFORMED_DB_NAME\" has been already created: skipping" 29 | fi 30 | -------------------------------------------------------------------------------- /docs/assets/built_in_transformers/img.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GreenmaskIO/greenmask/cede356819f753633abc1782ce23d35bb45e4227/docs/assets/built_in_transformers/img.png -------------------------------------------------------------------------------- /docs/assets/built_in_transformers/orders-schema.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GreenmaskIO/greenmask/cede356819f753633abc1782ce23d35bb45e4227/docs/assets/built_in_transformers/orders-schema.png -------------------------------------------------------------------------------- /docs/assets/built_in_transformers/person-person-schema.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GreenmaskIO/greenmask/cede356819f753633abc1782ce23d35bb45e4227/docs/assets/built_in_transformers/person-person-schema.png -------------------------------------------------------------------------------- /docs/assets/list_dumps_screen.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GreenmaskIO/greenmask/cede356819f753633abc1782ce23d35bb45e4227/docs/assets/list_dumps_screen.png -------------------------------------------------------------------------------- /docs/assets/list_transformers_screen_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GreenmaskIO/greenmask/cede356819f753633abc1782ce23d35bb45e4227/docs/assets/list_transformers_screen_1.png -------------------------------------------------------------------------------- /docs/assets/list_transformers_screen_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GreenmaskIO/greenmask/cede356819f753633abc1782ce23d35bb45e4227/docs/assets/list_transformers_screen_2.png -------------------------------------------------------------------------------- /docs/assets/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GreenmaskIO/greenmask/cede356819f753633abc1782ce23d35bb45e4227/docs/assets/logo.png -------------------------------------------------------------------------------- /docs/assets/schema.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GreenmaskIO/greenmask/cede356819f753633abc1782ce23d35bb45e4227/docs/assets/schema.png -------------------------------------------------------------------------------- /docs/assets/show_transformer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GreenmaskIO/greenmask/cede356819f753633abc1782ce23d35bb45e4227/docs/assets/show_transformer.png -------------------------------------------------------------------------------- /docs/assets/validate_horizontal_diff.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GreenmaskIO/greenmask/cede356819f753633abc1782ce23d35bb45e4227/docs/assets/validate_horizontal_diff.png -------------------------------------------------------------------------------- /docs/assets/validate_vertical_diff.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GreenmaskIO/greenmask/cede356819f753633abc1782ce23d35bb45e4227/docs/assets/validate_vertical_diff.png -------------------------------------------------------------------------------- /docs/built_in_transformers/advanced_transformers/custom_functions/index.md: -------------------------------------------------------------------------------- 1 | # Template custom functions 2 | 3 | Within Greenmask, custom functions play a crucial role, providing a wide array of options for implementing diverse 4 | logic. Under the hood, the custom functions are based on 5 | the [sprig Go's template functions](https://masterminds.github.io/sprig/). Greenmask enhances this capability by 6 | introducing additional functions and transformation functions. These extensions mirror the logic found in 7 | the [standard transformers](../../standard_transformers/index.md) but offer you the flexibility to implement intricate 8 | and comprehensive logic tailored to your specific needs. 9 | 10 | Currently, you can use template custom functions for the [advanced transformers](../index.md): 11 | 12 | * [Json](../json.md) 13 | * [Template](../template.md) 14 | * [TemplateRecord](../template_record.md) 15 | 16 | and for the [Transformation condition feature](../../transformation_condition.md) as well. 17 | 18 | Custom functions are arbitrarily divided into 2 groups: 19 | 20 | - [Core functions](core_functions.md) — custom functions that vary in purpose and include PostgreSQL driver, JSON 21 | output, testing, and transformation functions. 22 | - [Faker functions](faker_function.md) — custom function of a *faker* type which generate synthetic data. 23 | -------------------------------------------------------------------------------- /docs/built_in_transformers/advanced_transformers/index.md: -------------------------------------------------------------------------------- 1 | # Advanced transformers 2 | 3 | Advanced transformers are modifiable anonymization methods that users can adjust based on their needs by using [custom functions](custom_functions/index.md). 4 | 5 | Below you can find an index of all advanced transformers currently available in Greenmask. 6 | 7 | 1. [Json](json.md) — changes a JSON content by using `delete` and `set` operations. 8 | 2. [Template](template.md) — executes a Go template of your choice and applies the result to a specified column. 9 | 3. [TemplateRecord](template_record.md) — modifies records by using a Go template of your choice and applies the changes via the PostgreSQL 10 | driver. 11 | -------------------------------------------------------------------------------- /docs/built_in_transformers/index.md: -------------------------------------------------------------------------------- 1 | # About transformers 2 | 3 | Transformers in Greenmask are methods which are applied to anonymize sensitive data. All Greenmask transformers are 4 | split into the following groups: 5 | 6 | - [Dynamic parameters](dynamic_parameters.md) — transformers that require an input of parameters and generate 7 | random data based on them. 8 | - [Transformation engines](transformation_engines.md) — the type of generator used in transformers. Hash (deterministic) 9 | and random (randomization) 10 | - [Parameters templating](parameters_templating.md) — generate static parameters values from templates. 11 | - [Transformation conditions](transformation_condition.md) — conditions that can be applied to transformers. If the 12 | condition is not met, the transformer will not be applied. 13 | - [Transformation Inheritance](transformation_inheritance.md) — transformation inheritance for partitioned tables and 14 | tables with foreign keys. Define once and apply to all. 15 | - [Standard transformers](standard_transformers/index.md) — transformers that require only an input of parameters. 16 | - [Advanced transformers](advanced_transformers/index.md) — transformers that can be modified according to user's needs 17 | with the help of [custom functions](advanced_transformers/custom_functions/index.md). 18 | - Custom transformers — coming soon... 19 | -------------------------------------------------------------------------------- /docs/built_in_transformers/standard_transformers/random_bool.md: -------------------------------------------------------------------------------- 1 | Generate random boolean values. 2 | 3 | ## Parameters 4 | 5 | | Name | Description | Default | Required | Supported DB types | 6 | |-----------|-----------------------------------------------------------------------------------------------------|----------|----------|--------------------| 7 | | column | The name of the column to be affected | | Yes | bool | 8 | | keep_null | Indicates whether NULL values should be replaced with transformed values or not | `true` | No | - | 9 | | engine | The engine used for generating the values [`random`, `hash`]. Use hash for deterministic generation | `random` | No | - | 10 | 11 | ## Description 12 | 13 | The `RandomBool` transformer generates a random boolean value. The behaviour for NULL values can be 14 | configured using the `keep_null` parameter. The `engine` parameter allows you to choose between random and hash engines 15 | for generating values. Read more about the engines in the [Transformation engines](../transformation_engines.md) 16 | section. 17 | 18 | ## Example: Generate a random boolean for a column 19 | 20 | In the following example, the `RandomBool` transformer generates a random boolean value for the `salariedflag` column. 21 | 22 | ``` yaml title="RandomBool transformer example" 23 | - schema: "humanresources" 24 | name: "employee" 25 | transformers: 26 | - name: "RandomBool" 27 | params: 28 | column: "salariedflag" 29 | ``` 30 | 31 | Result 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 |
ColumnOriginalValueTransformedValue
salariedflagtf
41 | -------------------------------------------------------------------------------- /docs/built_in_transformers/standard_transformers/random_latitude.md: -------------------------------------------------------------------------------- 1 | The `RandomLatitude` transformer generates random latitude values for specified database columns. It is designed to support geographical data enhancements, particularly useful for applications requiring randomized but plausible geographical coordinates. 2 | 3 | ## Parameters 4 | 5 | | Name | Description | Default | Required | Supported DB types | 6 | |-----------|------------------------------------------------------|---------|----------|--------------------| 7 | | column | The name of the column to be affected | | Yes | float4, float8, numeric | 8 | | keep_null | Indicates whether NULL values should be preserved | `false` | No | - | 9 | 10 | ## Description 11 | 12 | The `RandomLatitude` transformer utilizes the `faker` library to produce random latitude values within the range of -90 to +90 degrees. This transformer can be applied to columns designated to store geographical latitude information, enhancing data sets with randomized latitude coordinates. 13 | 14 | ## Example: Populate random latitude for the `locations` table 15 | 16 | This example demonstrates configuring the `RandomLatitude` transformer to populate the `latitude` column in the `locations` table with random latitude values. 17 | 18 | ```yaml title="RandomLatitude transformer example" 19 | - schema: "public" 20 | name: "locations" 21 | transformers: 22 | - name: "RandomLatitude" 23 | params: 24 | column: "latitude" 25 | keep_null: false 26 | ``` 27 | 28 | With this configuration, the `latitude` column will be filled with random latitude values, replacing any existing non-NULL values. If `keep_null` is set to `true`, existing NULL values will be preserved. 29 | -------------------------------------------------------------------------------- /docs/built_in_transformers/standard_transformers/random_longitude.md: -------------------------------------------------------------------------------- 1 | The `RandomLongitude` transformer is designed to generate random longitude values for specified database columns, enhancing datasets with realistic geographic coordinates suitable for a wide range of applications, from testing location-based services to anonymizing real geographic data. 2 | 3 | ## Parameters 4 | 5 | | Name | Description | Default | Required | Supported DB types | 6 | |-----------|------------------------------------------------------|---------|----------|--------------------| 7 | | column | The name of the column to be affected | | Yes | float4, float8, numeric | 8 | | keep_null | Indicates whether NULL values should be preserved | `false` | No | - | 9 | 10 | ## Description 11 | 12 | The `RandomLongitude` transformer leverages the `faker` library to produce random longitude values within the globally accepted range of -180 to +180 degrees. This flexibility allows the transformer to be applied to any column intended for storing longitude data, providing a simple yet powerful tool for introducing randomized longitude coordinates into a database. 13 | 14 | ## Example: Populate random longitude for the `locations` table 15 | 16 | This example shows how to use the `RandomLongitude` transformer to fill the `longitude` column in the `locations` table with random longitude values. 17 | 18 | ```yaml title="RandomLongitude transformer example" 19 | - schema: "public" 20 | name: "locations" 21 | transformers: 22 | - name: "RandomLongitude" 23 | params: 24 | column: "longitude" 25 | keep_null: false 26 | ``` 27 | 28 | This setup ensures that all entries in the `longitude` column receive a random longitude value, replacing any existing non-NULL values. If `keep_null` is set to `true`, then existing NULL values in the column will remain unchanged. 29 | -------------------------------------------------------------------------------- /docs/built_in_transformers/standard_transformers/random_month_name.md: -------------------------------------------------------------------------------- 1 | The `RandomMonthName` transformer is crafted to populate specified database columns with random month names. This 2 | transformer is especially useful for scenarios requiring the simulation of time-related data, such as user birth months 3 | or event months, without relying on specific date values. 4 | 5 | ## Parameters 6 | 7 | | Name | Description | Default | Required | Supported DB types | 8 | |-----------|---------------------------------------------------|---------|----------|-------------------------------------| 9 | | column | The name of the column to be affected | | Yes | text, varchar, char, bpchar, citext | 10 | | keep_null | Indicates whether NULL values should be preserved | `false` | No | - | 11 | 12 | ## Description 13 | 14 | The `RandomMonthName` transformer utilizes the `faker` library to generate the names of months at random. It can be 15 | applied to any textual column in a database to introduce variety and realism into data sets that require representations 16 | of months without the need for specific calendar dates. 17 | 18 | ## Example: Populate random month names for the `user_profiles` table 19 | 20 | This example demonstrates how to configure the `RandomMonthName` transformer to fill the `birth_month` column in the 21 | `user_profiles` table with random month names, adding a layer of diversity to user data without using actual birthdates. 22 | 23 | ```yaml title="RandomMonthName transformer example" 24 | - schema: "public" 25 | name: "user_profiles" 26 | transformers: 27 | - name: "RandomMonthName" 28 | params: 29 | column: "birth_month" 30 | keep_null: false 31 | ``` 32 | 33 | With this setup, the `birth_month` column will be updated with random month names, replacing any existing non-NULL 34 | values. If the `keep_null` parameter is set to `true`, then existing NULL values within the column will remain 35 | untouched. 36 | -------------------------------------------------------------------------------- /docs/built_in_transformers/standard_transformers/random_password.md: -------------------------------------------------------------------------------- 1 | The `RandomPassword` transformer is designed to populate specified database columns with random passwords. This utility 2 | is vital for applications that require the simulation of secure user data, testing systems with authentication 3 | mechanisms, or anonymizing real passwords in datasets. 4 | 5 | ## Parameters 6 | 7 | | Name | Description | Default | Required | Supported DB types | 8 | |-----------|---------------------------------------------------|---------|----------|-------------------------------------| 9 | | column | The name of the column to be affected | | Yes | text, varchar, char, bpchar, citext | 10 | | keep_null | Indicates whether NULL values should be preserved | `false` | No | - | 11 | 12 | ## Description 13 | 14 | Employing sophisticated password generation algorithms or libraries, the `RandomPassword` transformer injects random 15 | passwords into the designated database column. This feature is particularly useful for creating realistic and secure 16 | user password datasets for development, testing, or demonstration purposes. 17 | 18 | ## Example: Populate random passwords for the `user_accounts` table 19 | 20 | This example demonstrates how to configure the `RandomPassword` transformer to populate the `password` column in the 21 | `user_accounts` table with random passwords. 22 | 23 | ```yaml title="RandomPassword transformer example" 24 | - schema: "public" 25 | name: "user_accounts" 26 | transformers: 27 | - name: "RandomPassword" 28 | params: 29 | column: "password" 30 | keep_null: false 31 | ``` 32 | 33 | In this configuration, every entry in the `password` column will be updated with a random password. Setting the 34 | `keep_null` parameter to `true` will preserve existing NULL values in the column, accommodating scenarios where password 35 | data may not be applicable. 36 | -------------------------------------------------------------------------------- /docs/built_in_transformers/standard_transformers/random_sentence.md: -------------------------------------------------------------------------------- 1 | The `RandomSentence` transformer is designed to populate specified database columns with random sentences. Ideal for 2 | simulating natural language text for user comments, testing NLP systems, or anonymizing textual data in databases. 3 | 4 | ## Parameters 5 | 6 | | Name | Description | Default | Required | Supported DB types | 7 | |-----------|---------------------------------------------------|---------|----------|-------------------------------------| 8 | | column | The name of the column to be affected | | Yes | text, varchar, char, bpchar, citext | 9 | | keep_null | Indicates whether NULL values should be preserved | `false` | No | - | 10 | 11 | ## Description 12 | 13 | The `RandomSentence` transformer employs complex text generation algorithms or libraries to generate random sentences, 14 | injecting them into a designated database column without the need for specifying sentence length. This flexibility 15 | ensures the creation of varied and plausible text for a wide range of applications. 16 | 17 | ## Example: Populate random sentences for the `comments` table 18 | 19 | This example shows how to configure the `RandomSentence` transformer to populate the `comment` column in the `comments` 20 | table with random sentences. It is a straightforward method for simulating diverse user-generated content. 21 | 22 | ```yaml title="RandomSentence transformer example" 23 | - schema: "public" 24 | name: "comments" 25 | transformers: 26 | - name: "RandomSentence" 27 | params: 28 | column: "comment" 29 | keep_null: false 30 | ``` 31 | 32 | In this configuration, the `comment` column will be updated with random sentences for each entry, replacing any existing 33 | non-NULL values. If `keep_null` is set to `true`, existing NULL values in the column will be preserved, maintaining the 34 | integrity of records where comments are not applicable. 35 | -------------------------------------------------------------------------------- /docs/built_in_transformers/standard_transformers/random_url.md: -------------------------------------------------------------------------------- 1 | The `RandomURL` transformer is designed to populate specified database columns with random URL (Uniform Resource 2 | Locator) addresses. This tool is highly beneficial for simulating web content, testing applications that require URL 3 | input, or anonymizing real web addresses in datasets. 4 | 5 | ## Parameters 6 | 7 | | Name | Description | Default | Required | Supported DB types | 8 | |-----------|---------------------------------------------------|---------|----------|-------------------------------------| 9 | | column | The name of the column to be affected | | Yes | text, varchar, char, bpchar, citext | 10 | | keep_null | Indicates whether NULL values should be preserved | `false` | No | - | 11 | 12 | ## Description 13 | 14 | Utilizing advanced algorithms or libraries for generating URL strings, the `RandomURL` transformer injects random, 15 | plausible URLs into the designated database column. Each generated URL is structured to include the protocol 16 | (e.g., "http://", "https://"), domain name, and path, offering a realistic range of web addresses for various applications. 17 | 18 | ## Example: Populate random URLs for the `webpages` table 19 | 20 | This example illustrates how to configure the `RandomURL` transformer to populate the `page_url` column in a `webpages` 21 | table with random URLs, providing a broad spectrum of web addresses for testing or data simulation purposes. 22 | 23 | ```yaml title="RandomURL transformer example" 24 | - schema: "public" 25 | name: "webpages" 26 | transformers: 27 | - name: "RandomURL" 28 | params: 29 | column: "page_url" 30 | keep_null: false 31 | ``` 32 | 33 | With this configuration, the `page_url` column will be filled with random URLs for each entry, replacing any existing 34 | non-NULL values. Setting the `keep_null` parameter to `true` allows for the preservation of existing NULL values within 35 | the column, accommodating scenarios where URL data may be intentionally omitted. 36 | -------------------------------------------------------------------------------- /docs/built_in_transformers/standard_transformers/random_uuid.md: -------------------------------------------------------------------------------- 1 | Generate random unique user ID using version 4. 2 | 3 | ## Parameters 4 | 5 | | Name | Description | Default | Required | Supported DB types | 6 | |-----------|-----------------------------------------------------------------------------------------------------|----------|----------|-------------------------------------------| 7 | | column | The name of the column to be affected | | Yes | text, varchar, char, bpchar, citext, uuid | 8 | | keep_null | Indicates whether NULL values should be replaced with transformed values or not | `true` | No | - | 9 | | engine | The engine used for generating the values [`random`, `hash`]. Use hash for deterministic generation | `random` | No | - | 10 | 11 | ## Description 12 | 13 | The `RandomUuid` transformer generates a random UUID. The behaviour for NULL values can be configured using 14 | the `keep_null` parameter. 15 | 16 | The `engine` parameter allows you to choose between random and hash engines for generating values. Read more about the 17 | engines in the [Transformation engines](../transformation_engines.md) section. 18 | 19 | ## Example: Updating the `rowguid` column 20 | 21 | The following example replaces original UUID values of the `rowguid` column to randomly generated ones. 22 | 23 | ``` yaml title="RandomUuid transformer example" 24 | - schema: "humanresources" 25 | name: "employee" 26 | transformers: 27 | - name: "RandomUuid" 28 | params: 29 | column: "rowguid" 30 | keep_null: false 31 | ``` 32 | 33 | Result 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 |
ColumnOriginalValueTransformedValue
rowguidf01251e5-96a3-448d-981e-0f99d789110d8ed8c4b2-7e7a-1e8d-f0f0-768e0e8ed0d0
43 | -------------------------------------------------------------------------------- /docs/built_in_transformers/standard_transformers/random_word.md: -------------------------------------------------------------------------------- 1 | The `RandomWord` transformer populates specified database columns with random words. Ideal for simulating textual 2 | content, enhancing linguistic datasets, or anonymizing text in databases. 3 | 4 | ## Parameters 5 | 6 | | Name | Description | Default | Required | Supported DB types | 7 | |-----------|---------------------------------------------------|---------|----------|-------------------------------------| 8 | | column | The name of the column to be affected | | Yes | text, varchar, char, bpchar, citext | 9 | | keep_null | Indicates whether NULL values should be preserved | `false` | No | - | 10 | 11 | ## Description 12 | 13 | The `RandomWord` transformer employs a mechanism to inject random words into a designated database column, supporting 14 | the generation of linguistically plausible and contextually diverse text. This transformer is particularly beneficial 15 | for creating rich text datasets for development, testing, or educational purposes without specifying the language, 16 | focusing on versatility and ease of use. 17 | 18 | ## Example: Populate random words for the `content` table 19 | 20 | This example demonstrates configuring the `RandomWord` transformer to populate the `tag` column in the `content` table 21 | with random words. It is a straightforward approach to adding varied textual data for tagging or content categorization. 22 | 23 | ```yaml title="RandomWord transformer example" 24 | - schema: "public" 25 | name: "content" 26 | transformers: 27 | - name: "RandomWord" 28 | params: 29 | column: "tag" 30 | keep_null: false 31 | ``` 32 | 33 | In this setup, the `tag` column will be updated with random words for each entry, replacing any existing non-NULL 34 | values. If `keep_null` is set to `true`, existing NULL values in the column will remain unchanged, maintaining data 35 | integrity for records where textual data is not applicable. 36 | -------------------------------------------------------------------------------- /docs/built_in_transformers/standard_transformers/random_year_string.md: -------------------------------------------------------------------------------- 1 | The `RandomYearString` transformer is designed to populate specified database columns with random year strings. It is 2 | ideal for scenarios that require the representation of years without specific dates, such as manufacturing years of 3 | products, birth years of users, or any other context where only the year is relevant. 4 | 5 | ## Parameters 6 | 7 | | Name | Description | Default | Required | Supported DB types | 8 | |-----------|---------------------------------------------------|---------|----------|----------------------------------------------------------------| 9 | | column | The name of the column to be affected | | Yes | text, varchar, char, bpchar, citext, int2, int4, int8, numeric | 10 | | keep_null | Indicates whether NULL values should be preserved | `false` | No | - | 11 | 12 | ## Description 13 | 14 | The `RandomYearString` transformer leverages the `faker` library to generate strings representing random years. This 15 | allows for the easy generation of year data in a string format, adding versatility and realism to datasets that need to 16 | simulate or anonymize year-related information. 17 | 18 | ## Example: Populate random year strings for the `products` table 19 | 20 | This example shows how to use the `RandomYearString` transformer to fill the `manufacturing_year` column in the 21 | `products` table with random year strings, simulating the diversity of manufacturing dates. 22 | 23 | ```yaml title="RandomYearString transformer example" 24 | - schema: "public" 25 | name: "products" 26 | transformers: 27 | - name: "RandomYearString" 28 | params: 29 | column: "manufacturing_year" 30 | keep_null: false 31 | ``` 32 | 33 | In this configuration, the `manufacturing_year` column will be populated with random year strings, replacing any 34 | existing non-NULL values. If `keep_null` is set to `true`, then existing NULL values in the column will be preserved. 35 | -------------------------------------------------------------------------------- /docs/built_in_transformers/standard_transformers/set_null.md: -------------------------------------------------------------------------------- 1 | Set `NULL` value to a column. 2 | 3 | ## Parameters 4 | 5 | | Name | Description | Default | Required | Supported DB types | 6 | |--------|-----------------------------------------------------|---------|----------|--------------------| 7 | | column | The name of the column to be affected | | Yes | any | 8 | 9 | ## Description 10 | 11 | The `SetNull` transformer assigns `NULL` value to a column. This transformer generates warning if the affected column has `NOT NULL` constraint. 12 | 13 | ```json title="NULL constraint violation warning" 14 | { 15 | "hash": "5a229ee964a4ba674a41a4d63dab5a8c", 16 | "meta": { 17 | "ColumnName": "jobtitle", 18 | "ConstraintType": "NotNull", 19 | "ParameterName": "column", 20 | "SchemaName": "humanresources", 21 | "TableName": "employee", 22 | "TransformerName": "SetNull" 23 | }, 24 | "msg": "transformer may produce NULL values but column has NOT NULL constraint", 25 | "severity": "warning" 26 | } 27 | ``` 28 | 29 | ## Example: Set NULL value to `updated_at` column 30 | 31 | ``` yaml title="SetNull transformer example" 32 | - schema: "humanresources" 33 | name: "employee" 34 | transformation: 35 | - name: "SetNull" 36 | params: 37 | column: "jobtitle" 38 | ``` 39 | 40 | ```bash title="Expected result" 41 | 42 | | column name | original value | transformed | 43 | |-------------|-------------------------|-------------| 44 | | jobtitle | Chief Executive Officer | NULL | 45 | ``` 46 | -------------------------------------------------------------------------------- /docs/commands/delete.md: -------------------------------------------------------------------------------- 1 | # delete command 2 | 3 | Delete dump from the storage with a specific ID 4 | 5 | 6 | ```text title="Supported flags" 7 | Usage: 8 | greenmask delete [flags] [dumpId] 9 | 10 | Flags: 11 | --before-date string delete dumps older than the specified date in RFC3339Nano format: 2021-01-01T00:00.0:00Z 12 | --dry-run do not delete anything, just show what would be deleted 13 | --prune-failed prune failed dumps 14 | --prune-unsafe prune dumps with "unknown-or-failed" statuses. Works only with --prune-failed 15 | --retain-for string retain dumps for the specified duration in format: 1w2d3h4m5s6ms7us8ns 16 | --retain-recent int retain the most recent N completed dumps (default -1) 17 | ``` 18 | 19 | ```shell title="delete dump by id" 20 | greenmask --config config.yml delete 1723643249862 21 | ``` 22 | 23 | ```shell title="delete dumps older than the specified date" 24 | greenmask --config config.yml delete --before-date 2021-01-01T00:00.0:00Z --dry-run 25 | ``` 26 | 27 | ```shell title="prune failed dumps" 28 | greenmask --config config.yml delete --prune-failed --dry-run 29 | ``` 30 | 31 | ```shell title="prune dumps with 'unknown-or-failed' statuses" 32 | greenmask --config config.yml delete --prune-failed --prune-unsafe --dry-run 33 | ``` 34 | 35 | ```shell title="retain dumps for the specified duration" 36 | greenmask --config config.yml delete --retain-for 1w5d --dry-run 37 | ``` 38 | 39 | ```shell title="retain the most recent N completed dumps" 40 | greenmask --config config.yml delete --retain-recent 5 --dry-run 41 | ``` 42 | -------------------------------------------------------------------------------- /docs/commands/index.md: -------------------------------------------------------------------------------- 1 | # Commands 2 | 3 | ## Introduction 4 | 5 | ```shell title="Greenmask available commands" 6 | greenmask \ 7 | --log-format=[json|text] \ 8 | --log-level=[debug|info|warn] \ 9 | --config=config.yml \ 10 | [dump|list-dumps|delete|list-transformers|show-transformer|restore|show-dump]` 11 | ``` 12 | 13 | You can use the following commands within Greenmask: 14 | 15 | * [list-transformers](list-transformers.md) — displays a list of available transformers along with their documentation 16 | * [show-transformer](show-transformer.md) — displays information about the specified transformer 17 | * [validate](validate.md) - performs a validation procedure by testing config, comparing transformed data, identifying 18 | potential issues, and checking for schema changes. 19 | * [dump](dump.md) — initiates the data dumping process 20 | * [restore](list-dumps.md) — restores data to the target database either by specifying a `dumpId` or using the latest available dump 21 | * [list-dumps](show-dump.md) — lists all available dumps stored in the system 22 | * [show-dump](restore.md) — provides metadata information about a particular dump, offering insights into its structure and 23 | attributes 24 | * [delete](delete.md) — deletes a specific dump from the storage 25 | 26 | 27 | For any of the commands mentioned above, you can include the following common flags: 28 | 29 | * `--log-format` — specifies the desired format for log output, which can be either `json` or `text`. This parameter is 30 | optional, with the default format set to `text`. 31 | * `--log-level` — sets the desired level for log output, which can be one of `debug`, `info`, or `warn`. This parameter 32 | is optional, with the default log level being `info`. 33 | * `--config` — requires the specification of a configuration file in YAML format. This configuration file is mandatory 34 | for Greenmask to operate correctly. 35 | * `--help` — displays comprehensive help information for Greenmask, providing guidance on its usage and available 36 | commands. 37 | -------------------------------------------------------------------------------- /docs/commands/list-dumps.md: -------------------------------------------------------------------------------- 1 | ## list-dumps command 2 | 3 | The `list-dumps` command provides a list of all dumps stored in the storage. The list includes the following attributes: 4 | 5 | * `ID` — the unique identifier of the dump, used for operations like `restore`, `delete`, and `show-dump` 6 | * `DATE` — the date when the snapshot was created 7 | * `DATABASE` — the name of the database associated with the dump 8 | * `SIZE` — the original size of the dump 9 | * `COMPRESSED SIZE` — the size of the dump after compression 10 | * `DURATION` — the duration of the dump procedure 11 | * `TRANSFORMED` — indicates whether the dump has been transformed 12 | * `STATUS` — the status of the dump, which can be one of the following: 13 | * `done` — the dump was completed successfully 14 | * `in progress` — the dump is currently being created 15 | * `failed` — the dump creation process failed 16 | * `unknown or failed` — the deprecated status of the dump that is used for failed dumps or dumps in progress for 17 | version v0.1.14 and earlier 18 | 19 | Example of `list-dumps` output: 20 | ![list_dumps_screen.png](../assets/list_dumps_screen.png) 21 | 22 | !!! info 23 | 24 | Greenmask uses a heartbeat mechanism to determine the status of a dump. 25 | A dump is considered `failed` if it lacks a "done" heartbeat or if the last heartbeat timestamp exceeds 30 minutes. 26 | Heartbeats are recorded every 15 minutes by the `dump` command while it is in progress. If `greenmask` fails unexpectedly, 27 | the heartbeat stops being updated, and after 30 minutes (twice the interval), the dump is classified as `failed`. 28 | The `in progress` status indicates that a dump is still ongoing. 29 | 30 | -------------------------------------------------------------------------------- /docs/commands/list-transformers.md: -------------------------------------------------------------------------------- 1 | ## list-transformers command 2 | 3 | The `list-transformers` command provides a list of all the allowed transformers, including both standard and advanced 4 | transformers. This list can be helpful for searching for an appropriate transformer for your data transformation needs. 5 | 6 | To show a list of available transformers, use the following command: 7 | 8 | ```shell 9 | greenmask --config=config.yml list-transformers 10 | ``` 11 | 12 | Supported flags: 13 | 14 | * `--format` — allows to select the output format. There are two options available: `text` or `json`. The 15 | default setting is `text`. 16 | 17 | Example of `list-transformers` output: 18 | 19 | ![list_transformers_screen.png](../assets/list_transformers_screen_2.png) 20 | 21 | When using the `list-transformers` command, you receive a list of available transformers with essential information 22 | about each of them. Below are the key parameters for each transformer: 23 | 24 | * `NAME` — the name of the transformer 25 | * `DESCRIPTION` — a brief description of what the transformer does 26 | * `COLUMN PARAMETER NAME` — name of a column or columns affected by transformation 27 | * `SUPPORTED TYPES` — list the supported value types 28 | 29 | The JSON call `greenmask --config=config.yml list-transformers --format=json` has the same attributes: 30 | 31 | ```json title="JSON format output" 32 | [ 33 | { 34 | "name": "Cmd", 35 | "description": "Transform data via external program using stdin and stdout interaction", 36 | "parameters": [ 37 | { 38 | "name": "columns", 39 | "supported_types": [ 40 | "any" 41 | ] 42 | } 43 | ] 44 | }, 45 | { 46 | "name": "Dict", 47 | "description": "Replace values matched by dictionary keys", 48 | "parameters": [ 49 | { 50 | "name": "column", 51 | "supported_types": [ 52 | "any" 53 | ] 54 | } 55 | ] 56 | } 57 | ] 58 | ``` -------------------------------------------------------------------------------- /docs/installation.md: -------------------------------------------------------------------------------- 1 | # Installation 2 | 3 | ## Prerequisites 4 | 5 | * Ensure that you have PostgreSQL utilities preinstalled, matching the **major version** 6 | of your destination server. 7 | 8 | * If you are building Greenmask from source, make sure you have the `make` utility installed. 9 | 10 | ## Via docker 11 | 12 | You can find the docker images in the: 13 | 14 | 1. [Docker-hub page](https://hub.docker.com/r/greenmask/greenmask) 15 | 16 | To run the greenmask container from DockerHub, use the following command: 17 | ```shell 18 | docker run -it greenmask/greenmask:latest 19 | ``` 20 | 21 | 2. GitHub container registry 22 | 23 | To run the greenmask container from GitHub registry, use the following command: 24 | ```shell 25 | docker run -it ghcr.io/greenmaskio/greenmask:latest 26 | ``` 27 | 28 | !!! info 29 | 30 | For pre-releases (rc, beta, etc.), use explicit tags like `v0.2.0b2`. 31 | 32 | ## Via brew 33 | 34 | The greenmask build is [available in brew](https://formulae.brew.sh/formula/greenmask#default), 35 | but only a production build is available. To install the greenmask via brew, use the following command: 36 | 37 | ```shell 38 | brew install greenmask 39 | ``` 40 | 41 | ## From source 42 | 43 | 1. Clone the Greenmask repository by using the following command: 44 | 45 | ```bash 46 | git clone git@github.com:GreenmaskIO/greenmask.git 47 | ``` 48 | 49 | 2. Once the repository is cloned, execute the following command to build Greenmask: 50 | 51 | ```bash 52 | make build 53 | ``` 54 | 55 | After completing the build process, you will find the binary named `greenmask` in the root directory of the repository. 56 | Execute the binary to start using Greenmask. 57 | 58 | ## Playground 59 | 60 | Greenmask Playground is a sandbox environment for your experiments in Docker with sample databases included to help you 61 | try Greenmask without any additional actions. Read the [Playground](playground.md) guide to learn more. -------------------------------------------------------------------------------- /docs/overrides/main.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block announce %} 4 | A new version 0.2.12 (2025.05.28) is released 5 | {% endblock %} 6 | 7 | {% block outdated %} 8 | You're not viewing the latest version. 9 | 10 | Click here to go to latest. 11 | 12 | {% endblock %} -------------------------------------------------------------------------------- /docs/release_notes/greenmask_0_1_0_beta.md: -------------------------------------------------------------------------------- 1 | # Greenmask 0.0.1 Beta 2 | 3 | We are excited to announce the beta release of Greenmask, a versatile and open-source utility for PostgreSQL logical backup dumping, anonymization, and restoration. Greenmask is perfect for routine backup and restoration tasks. It facilitates anonymization and data masking for staging environments and analytics. 4 | 5 | This release introduces a range of features aimed at enhancing database management and security. 6 | 7 | ## Key features 8 | 9 | - Cross-platform support — fully written in Go without platform dependencies. 10 | - Type-safe database operations — validates and encodes data, maintaining integrity. 11 | - Transformation validation — ensures data transformations are correct and maintainable. 12 | - Partitioned table support — simplifies configuration for partitioned tables. 13 | - Stateless and backward compatible — works alongside standard PostgreSQL utilities. 14 | - Parallel execution — enhances efficiency in dumping and restoration processes. 15 | - Multiple storage options — supports both local (directory) and remote (S3-like) storage solutions. 16 | 17 | ## Download 18 | 19 | To download the Greenmask binary compatible with your system, see the [release's assets list](https://github.com/GreenmaskIO/greenmask/releases/tag/v0.1.0-beta). 20 | -------------------------------------------------------------------------------- /docs/release_notes/greenmask_0_1_10.md: -------------------------------------------------------------------------------- 1 | # Greenmask 0.1.10 2 | 3 | This release introduces improvements and bug fixes 4 | 5 | ## Changes 6 | 7 | * Fixed panic caused in `RandomString` transformer 8 | * Fixed wrong table size calculation. Now the table size includes TOAST table size 9 | * Added custom transformer interaction API defaults if not set 10 | * Changed docker workdir to greenmask home 11 | * Removed bucket name from object path prefix 12 | 13 | ## Assets 14 | 15 | To download the Greenmask binary compatible with your system, see 16 | the [release's assets list](https://github.com/GreenmaskIO/greenmask/releases/tag/v0.1.10). 17 | -------------------------------------------------------------------------------- /docs/release_notes/greenmask_0_1_11.md: -------------------------------------------------------------------------------- 1 | # Greenmask 0.1.11 2 | 3 | This release introduces improvements and bug fixes 4 | 5 | ## Changes 6 | 7 | * Added support for generated columns in the table 8 | * Fixed transformer parameters encoding issue caused by spf13/viper 9 | * Fixed table scoring for transformed table 10 | * Refactored connection management logic in restore command - fixes connection idle timeout 11 | 12 | ## Assets 13 | 14 | To download the Greenmask binary compatible with your system, see 15 | the [release's assets list](https://github.com/GreenmaskIO/greenmask/releases/tag/v0.1.11). 16 | -------------------------------------------------------------------------------- /docs/release_notes/greenmask_0_1_12.md: -------------------------------------------------------------------------------- 1 | # Greenmask 0.1.12 2 | 3 | This release introduces improvements and bug fixes 4 | 5 | ## Changes 6 | 7 | * Fixed config decoding issue caused 8 | * Fixed TOC entries merge behavior when data section is empty 9 | * Fixed integration tests for S3 storage 10 | 11 | ## Assets 12 | 13 | To download the Greenmask binary compatible with your system, see 14 | the [release's assets list](https://github.com/GreenmaskIO/greenmask/releases/tag/v0.1.12). 15 | -------------------------------------------------------------------------------- /docs/release_notes/greenmask_0_1_13.md: -------------------------------------------------------------------------------- 1 | # Greenmask 0.1.13 2 | 3 | This release introduces only improvements in documentation deployment. The core greenmask utility does not contain any changes. 4 | 5 | ## Changes 6 | 7 | * Added documentation deployment with versioning 8 | 9 | ## Assets 10 | 11 | To download the Greenmask binary compatible with your system, see 12 | the [release's assets list](https://github.com/GreenmaskIO/greenmask/releases/tag/v0.1.13). -------------------------------------------------------------------------------- /docs/release_notes/greenmask_0_1_14.md: -------------------------------------------------------------------------------- 1 | # Greenmask 0.1.14 2 | 3 | This release introduces bug fixes. 4 | 5 | ## Changes 6 | 7 | * Fixed large panic caused in Large Object dumper 8 | 9 | ## Assets 10 | 11 | To download the Greenmask binary compatible with your system, see 12 | the [release's assets list](https://github.com/GreenmaskIO/greenmask/releases/tag/v0.1.14). -------------------------------------------------------------------------------- /docs/release_notes/greenmask_0_1_2.md: -------------------------------------------------------------------------------- 1 | # Greenmask 0.1.2 2 | 3 | This release introduces bug fixes. 4 | 5 | ## Fixes 6 | 7 | - Fixed bug when raw COPY lines were parsed incorrectly 8 | - Fixed `--version` parameter behavior 9 | - Fixed `--dbname` parameter - now it correctly works with PostgreSQL connection string in URI format `postgresql:///` 10 | 11 | ## Assets 12 | 13 | To download the Greenmask binary compatible with your system, see the [release's assets list](https://github.com/GreenmaskIO/greenmask/releases/tag/v0.1.2). 14 | -------------------------------------------------------------------------------- /docs/release_notes/greenmask_0_1_3.md: -------------------------------------------------------------------------------- 1 | # Greenmask 0.1.3 2 | 3 | This release introduces bug fixes. 4 | 5 | ## Fixes 6 | 7 | - Fixed the JSON transformer's parsing for the `operations` fields 8 | - Fixed database connection string builder in `pg_restore` and `pg_dump` 9 | 10 | ## Assets 11 | 12 | To download the Greenmask binary compatible with your system, see the [release's assets list](https://github.com/GreenmaskIO/greenmask/releases/tag/v0.1.3). 13 | -------------------------------------------------------------------------------- /docs/release_notes/greenmask_0_1_4.md: -------------------------------------------------------------------------------- 1 | # Greenmask 0.1.4 2 | 3 | This release introduces bug fixes. 4 | 5 | ## Fixes 6 | 7 | - Fixed database connection string behavior fields 8 | 9 | ## Assets 10 | 11 | To download the Greenmask binary compatible with your system, see the [release's assets list](https://github.com/GreenmaskIO/greenmask/releases/tag/v0.1.4). 12 | -------------------------------------------------------------------------------- /docs/release_notes/greenmask_0_1_5.md: -------------------------------------------------------------------------------- 1 | # Greenmask 0.1.5 2 | 3 | This release introduces a new Greenmask command, improvements, bug fixes, and numerous documentation updates. 4 | 5 | ## New features 6 | 7 | Added a new Greenmask CLI command—[show-transformer](../commands/show-transformer.md) that shows detailed information about a specified transformer. 8 | 9 | ## Improvements 10 | 11 | - The [Hash transformer](../built_in_transformers/standard_transformers/hash.md) has been completely remastered and now has the `function` parameter to choose from some hash algorithm options and the `max_length` parameter to truncate the hash tail. 12 | - Split information about transformers between the `list-transformers` and new `show-transformer` CLI commands, which allows for more comprehensible and useful outputs for both commands 13 | - Added error severity for the `Cmd` parameter validator 14 | - Improved UX for the Greenmask release binaries 15 | 16 | ## Fixes 17 | 18 | - Fixed metadata enrichment for validation warnings caused by `RawValueValidator` 19 | - Fixed a typo in the `credit_card` value for the `type` parameter of the `Masking` transformer 20 | - Fixed Greenmask Playground environment variables and the `cleanup` command 21 | - Fixed `list-dump`, `list-transformers`, and `restore` commands exit code on error 22 | 23 | ## Assets 24 | 25 | To download the Greenmask binary compatible with your system, see the [release's assets list](https://github.com/GreenmaskIO/greenmask/releases/tag/v0.1.5). 26 | -------------------------------------------------------------------------------- /docs/release_notes/greenmask_0_1_6.md: -------------------------------------------------------------------------------- 1 | # Greenmask 0.1.6 2 | 3 | This is a minor release that introduces a bug hotfix 4 | 5 | ## Fixes 6 | 7 | - Fixed uncontrolled buffer growth in the restore command 8 | 9 | ## Assets 10 | 11 | To download the Greenmask binary compatible with your system, see the [release's assets list](https://github.com/GreenmaskIO/greenmask/releases/tag/v0.1.6). 12 | -------------------------------------------------------------------------------- /docs/release_notes/greenmask_0_1_8.md: -------------------------------------------------------------------------------- 1 | # Greenmask 0.1.8 2 | 3 | This release introduces improvements and bug fixes 4 | 5 | ## Improvements 6 | 7 | * Implemented `--exit-on-error` parameter for `pg_restore` run. But it does not play for "data" section restoration now. If any error is caused in `data` section greenmask exits with the error whether `--exit-on-error` was provided or not. This might be fixed later 8 | 9 | ## Fixes 10 | 11 | * Fixed dependent objects dropping when running with the `restore` command with the `--clean` parameter. Useful when restoring and overriding only required tables 12 | * Fixed `show-dump` command output in text mode 13 | * Disabled CGO. Fixes problem when downloaded binary from repo cannot run 14 | * Fixed `delete` dump operation 15 | 16 | 17 | ## Assets 18 | 19 | To download the Greenmask binary compatible with your system, see 20 | the [release's assets list](https://github.com/GreenmaskIO/greenmask/releases/tag/v0.1.8). 21 | -------------------------------------------------------------------------------- /docs/release_notes/greenmask_0_1_9.md: -------------------------------------------------------------------------------- 1 | # Greenmask 0.1.9 2 | 3 | This release introduces improvements and bug fixes 4 | 5 | ## Improvements 6 | 7 | * Implemented tables scoring according to the table size and transformation costs. This correctly spread the tables 8 | dumping between the requested workers pool and reduces the execution time. Now greenmask introspects the table size, 9 | adds the transformation scoring using the formula 10 | `score = tableSizeInBytes + (tableSizeInBytes * 0.03 * tableTransformationsCount)`, and uses the strategy "Largest 11 | First". The problem is described [here](https://github.com/GreenmaskIO/greenmask/discussions/50) 12 | * Introduced `no_verify_ssl` parameter for S3 storage 13 | * Adjusted Dockerfile 14 | * Changed entrypoint to `greenmask` binary 15 | * The `greenmask` container now runs under `greenmask` user and groups 16 | * Refactored storage config structure. Now it contains the `type` that is used for the storage type determination 17 | * Most of the attributes may be overridden with environment variables where the letters are capitalized and the dots 18 | are replaced with underscores. For instance, the setting `storage.type` might be represented with the environment 19 | variable `STORAGE_TYPE` 20 | * Parameter `--config` is not required anymore. This simplifies the greenmask utility user experience 21 | * Directory storage set as the default 22 | * Set the default temporary directory as `/tmp` 23 | * Added environment variable section to the configuration docs 24 | 25 | ## Fixes 26 | 27 | * Fixed `S3_REGION` environment variable usage. Tested cases where the S3 storage is set up using `S3` variables that 28 | uses by `github.com/aws/aws-sdk-go` 29 | * Updated project dependencies to the latest version 30 | 31 | ## Assets 32 | 33 | To download the Greenmask binary compatible with your system, see 34 | the [release's assets list](https://github.com/GreenmaskIO/greenmask/releases/tag/v0.1.9). 35 | -------------------------------------------------------------------------------- /docs/release_notes/greenmask_0_2_10.md: -------------------------------------------------------------------------------- 1 | # Greenmask 0.2.10 2 | 3 | ## Changes 4 | 5 | * Bump dependencies and upgraded go to 1.24 [#285](https://github.com/GreenmaskIO/greenmask/pull/285). 6 | * Changed Oid type in TOC archive library [#286](https://github.com/GreenmaskIO/greenmask/pull/286). 7 | Closes [#284](https://github.com/GreenmaskIO/greenmask/issues/284) 8 | * Revised documentation related to the log level values [#287](https://github.com/GreenmaskIO/greenmask/pull/287). 9 | Closes [#283](https://github.com/GreenmaskIO/greenmask/issues/283) 10 | 11 | #### Full Changelog: [v0.2.9...v0.2.10](https://github.com/GreenmaskIO/greenmask/compare/v0.2.9...v0.2.10) 12 | 13 | ## Links 14 | 15 | Feel free to reach out to us if you have any questions or need assistance: 16 | 17 | * [Discord](https://discord.gg/tAJegUKSTB) 18 | * [Email](mailto:support@greenmask.io) 19 | * [Twitter](https://twitter.com/GreenmaskIO) 20 | * [Telegram [RU]](https://t.me/greenmask_ru) 21 | * [DockerHub](https://hub.docker.com/r/greenmask/greenmask) 22 | -------------------------------------------------------------------------------- /docs/release_notes/greenmask_0_2_11.md: -------------------------------------------------------------------------------- 1 | # Greenmask 0.2.11 2 | 3 | ## Changes 4 | 5 | * Catch up v0.2.10 lost changes. Change Oid type to uint32 [#289](https://github.com/GreenmaskIO/greenmask/pull/289). 6 | Closes [#284](https://github.com/GreenmaskIO/greenmask/issues/284). 7 | 8 | #### Full Changelog: [v0.2.10...v0.2.11](https://github.com/GreenmaskIO/greenmask/compare/v0.2.10...v0.2.11) 9 | 10 | ## Links 11 | 12 | Feel free to reach out to us if you have any questions or need assistance: 13 | 14 | * [Discord](https://discord.gg/tAJegUKSTB) 15 | * [Email](mailto:support@greenmask.io) 16 | * [Twitter](https://twitter.com/GreenmaskIO) 17 | * [Telegram [RU]](https://t.me/greenmask_ru) 18 | * [DockerHub](https://hub.docker.com/r/greenmask/greenmask) 19 | -------------------------------------------------------------------------------- /docs/release_notes/greenmask_0_2_12.md: -------------------------------------------------------------------------------- 1 | # Greenmask 0.2.12 2 | 3 | ## Changes 4 | 5 | * Added support for dynamic parameters in the Replace transformer, allowing values to be dynamically replaced based on 6 | column values. This feature enables spreading the same value across multiple columns. See 7 | the [documentation](https://docs.greenmask.io/latest/built_in_transformers/standard_transformers/replace/) for 8 | examples. [#293](https://github.com/GreenmaskIO/greenmask/pull/293) 9 | * Updated the `--verbose` flag to a boolean type. It is now `true` if provided and `false` 10 | otherwise. [#282](https://github.com/GreenmaskIO/greenmask/pull/282) 11 | * Fixed a bug in the `RandomDate` transformer where minutes were not truncated as 12 | expected. [#298](https://github.com/GreenmaskIO/greenmask/pull/298) 13 | * Updated go dependencies to the latest. [#304](https://github.com/GreenmaskIO/greenmask/pull/304) 14 | 15 | #### Full Changelog: [v0.2.11...v0.2.12](https://github.com/GreenmaskIO/greenmask/compare/v0.2.11...v0.2.12) 16 | 17 | ## Links 18 | 19 | Feel free to reach out to us if you have any questions or need assistance: 20 | 21 | * [Discord](https://discord.gg/tAJegUKSTB) 22 | * [Email](mailto:support@greenmask.io) 23 | * [Twitter](https://twitter.com/GreenmaskIO) 24 | * [Telegram [RU]](https://t.me/greenmask_ru) 25 | * [DockerHub](https://hub.docker.com/r/greenmask/greenmask) 26 | -------------------------------------------------------------------------------- /docs/release_notes/greenmask_0_2_2.md: -------------------------------------------------------------------------------- 1 | # Greenmask 0.2.2 2 | 3 | This release introduces bug fixes. 4 | 5 | ## Changes 6 | 7 | * Fixed a case when apply_for_references applies validation to all transformations even if they are not 8 | marked as apply_for_references [#236](https://github.com/GreenmaskIO/greenmask/pull/236). 9 | * Fixed issue with the latest tag disappearing in the documentation [#234](https://github.com/GreenmaskIO/greenmask/pull/234). 10 | 11 | 12 | #### Full Changelog: [v0.2.1...v0.2.2](https://github.com/GreenmaskIO/greenmask/compare/v0.2.1...v0.2.2) 13 | 14 | ## Links 15 | 16 | Feel free to reach out to us if you have any questions or need assistance: 17 | 18 | * [Greenmask Roadmap](https://github.com/orgs/GreenmaskIO/projects/6) 19 | * [Email](mailto:support@greenmask.io) 20 | * [Twitter](https://twitter.com/GreenmaskIO) 21 | * [Telegram](https://t.me/greenmask_community) 22 | * [Discord](https://discord.gg/tAJegUKSTB) 23 | * [DockerHub](https://hub.docker.com/r/greenmask/greenmask) 24 | -------------------------------------------------------------------------------- /docs/release_notes/greenmask_0_2_3.md: -------------------------------------------------------------------------------- 1 | # Greenmask 0.2.3 2 | 3 | This release introduces bug fixes. 4 | 5 | ## Changes 6 | 7 | * Fixed an issue where the partitioned table itself was executed in the restore worker, resulting in a "file not found" 8 | error in storage. Closes bug: restoring partitioned tables 9 | fails [#238](https://github.com/GreenmaskIO/greenmask/pull/238) [#242](https://github.com/GreenmaskIO/greenmask/pull/242). 10 | * Fixed template function availability [#239](https://github.com/GreenmaskIO/greenmask/issues/239). Renamed methods 11 | according to the documentation: GetColumnRawValue is now GetRawColumnValue, and SetColumnRawValue is now 12 | SetRawColumnValue [#242](https://github.com/GreenmaskIO/greenmask/pull/242) 13 | * Resolved an issue where Dump.createTocEntries processed partitioned tables as if they were physical entities, despite 14 | being logical [#241](https://github.com/GreenmaskIO/greenmask/pull/241) 15 | * Corrected merging in the pre-data, data, and post-data sections, which previously caused a panic in dump command when 16 | the post-data section was excluded [#241](https://github.com/GreenmaskIO/greenmask/pull/241) 17 | * Fixed an issue where dumps created with --load-via-partition-root did not use the root partition table in --inserts 18 | generation during restoration [#241](https://github.com/GreenmaskIO/greenmask/pull/241) 19 | 20 | #### Full Changelog: [v0.2.2...v0.2.3](https://github.com/GreenmaskIO/greenmask/compare/v0.2.2...v0.2.3) 21 | 22 | ## Links 23 | 24 | Feel free to reach out to us if you have any questions or need assistance: 25 | 26 | * [Greenmask Roadmap](https://github.com/orgs/GreenmaskIO/projects/6) 27 | * [Email](mailto:support@greenmask.io) 28 | * [Twitter](https://twitter.com/GreenmaskIO) 29 | * [Telegram](https://t.me/greenmask_community) 30 | * [Discord](https://discord.gg/tAJegUKSTB) 31 | * [DockerHub](https://hub.docker.com/r/greenmask/greenmask) 32 | -------------------------------------------------------------------------------- /docs/release_notes/greenmask_0_2_4.md: -------------------------------------------------------------------------------- 1 | # Greenmask 0.2.4 2 | 3 | This release introduces bug fixes. 4 | 5 | ## Changes 6 | 7 | * Fixed a bug [#244](https://github.com/GreenmaskIO/greenmask/issues/244) that caused incorrect subset and transformer 8 | inheritance behavior. See the merge request [#245](https://github.com/GreenmaskIO/greenmask/pull/245). 9 | 10 | 11 | #### Full Changelog: [v0.2.3...v0.2.4](https://github.com/GreenmaskIO/greenmask/compare/v0.2.3...v0.2.4) 12 | 13 | ## Links 14 | 15 | Feel free to reach out to us if you have any questions or need assistance: 16 | 17 | * [Greenmask Roadmap](https://github.com/orgs/GreenmaskIO/projects/6) 18 | * [Email](mailto:support@greenmask.io) 19 | * [Twitter](https://twitter.com/GreenmaskIO) 20 | * [Telegram](https://t.me/greenmask_community) 21 | * [Discord](https://discord.gg/tAJegUKSTB) 22 | * [DockerHub](https://hub.docker.com/r/greenmask/greenmask) 23 | -------------------------------------------------------------------------------- /docs/release_notes/greenmask_0_2_5.md: -------------------------------------------------------------------------------- 1 | # Greenmask 0.2.5 2 | 3 | This release introduces bug fixes. 4 | 5 | ## Changes 6 | 7 | * Fixed a bug where a subset query was not generated when provided [#247](https://github.com/GreenmaskIO/greenmask/pull/247). 8 | 9 | 10 | #### Full Changelog: [v0.2.4...v0.2.5](https://github.com/GreenmaskIO/greenmask/compare/v0.2.4...v0.2.5) 11 | 12 | ## Links 13 | 14 | Feel free to reach out to us if you have any questions or need assistance: 15 | 16 | * [Greenmask Roadmap](https://github.com/orgs/GreenmaskIO/projects/6) 17 | * [Email](mailto:support@greenmask.io) 18 | * [Twitter](https://twitter.com/GreenmaskIO) 19 | * [Telegram](https://t.me/greenmask_community) 20 | * [Discord](https://discord.gg/tAJegUKSTB) 21 | * [DockerHub](https://hub.docker.com/r/greenmask/greenmask) 22 | -------------------------------------------------------------------------------- /docs/release_notes/greenmask_0_2_6.md: -------------------------------------------------------------------------------- 1 | # Greenmask 0.2.6 2 | 3 | This release introduces new features and bug fixes. 4 | 5 | ## Changes 6 | 7 | * Introduces `--disable-trigers`, `--use-session-replication-role-replica` and `--superuser` options 8 | for restore command. It allows to disable triggers during data section restore [#248](https://github.com/GreenmaskIO/greenmask/pull/252). 9 | Closes feature request [#228](https://github.com/GreenmaskIO/greenmask/issues/228) 10 | * Fix skipping unknown type when silent is true [#251](https://github.com/GreenmaskIO/greenmask/pull/251) 11 | * Added sonar qube quality gate badge [#250](https://github.com/GreenmaskIO/greenmask/pull/250) 12 | 13 | 14 | #### Full Changelog: [v0.2.5...v0.2.6](https://github.com/GreenmaskIO/greenmask/compare/v0.2.5...v0.2.6) 15 | 16 | ## Links 17 | 18 | Feel free to reach out to us if you have any questions or need assistance: 19 | 20 | * [Greenmask Roadmap](https://github.com/orgs/GreenmaskIO/projects/6) 21 | * [Email](mailto:support@greenmask.io) 22 | * [Twitter](https://twitter.com/GreenmaskIO) 23 | * [Telegram](https://t.me/greenmask_community) 24 | * [Discord](https://discord.gg/tAJegUKSTB) 25 | * [DockerHub](https://hub.docker.com/r/greenmask/greenmask) 26 | -------------------------------------------------------------------------------- /docs/release_notes/greenmask_0_2_7.md: -------------------------------------------------------------------------------- 1 | # Greenmask 0.2.7 2 | 3 | This release fixes several bugs. 4 | 5 | ## Changes 6 | 7 | * Fixed a case when converted directory dump into single file using 8 | `pg_restore --file=1738263512345.sql` is corrupted due to lost semicolon [#255](https://github.com/GreenmaskIO/greenmask/issues/255) 9 | * Updated go dependencies [#256](https://github.com/GreenmaskIO/greenmask/issues/256) 10 | 11 | 12 | #### Full Changelog: [v0.2.6...v0.2.7](https://github.com/GreenmaskIO/greenmask/compare/v0.2.6...v0.2.7) 13 | 14 | ## Links 15 | 16 | Feel free to reach out to us if you have any questions or need assistance: 17 | 18 | * [Greenmask Roadmap](https://github.com/orgs/GreenmaskIO/projects/6) 19 | * [Email](mailto:support@greenmask.io) 20 | * [Twitter](https://twitter.com/GreenmaskIO) 21 | * [Telegram](https://t.me/greenmask_community) 22 | * [Discord](https://discord.gg/tAJegUKSTB) 23 | * [DockerHub](https://hub.docker.com/r/greenmask/greenmask) 24 | -------------------------------------------------------------------------------- /docs/release_notes/greenmask_0_2_9.md: -------------------------------------------------------------------------------- 1 | # Greenmask 0.2.9 2 | 3 | This release introduces a new transformer and fixes some bug. 4 | 5 | ## Changes 6 | 7 | * Implemented [RandomCompany](https://docs.greenmask.io/latest/built_in_transformers/standard_transformers/random_company/) transformer - 8 | it's a multi-column transformer, that generates a company data with attributes `CompanyName` and `CompanyName` 9 | [#273](https://github.com/GreenmaskIO/greenmask/pull/273) [#274](https://github.com/GreenmaskIO/greenmask/pull/274). 10 | * Fixed a case when transformers with column containers were not printed on `greenmask list-transformers` command 11 | call [#275](https://github.com/GreenmaskIO/greenmask/pull/275). 12 | * Fixed `RandomEmail` transformer bug when an incorrect buffer size for hex-encoded symbols resulted in a `\0` 13 | appearing in the string [#278](https://github.com/GreenmaskIO/greenmask/pull/278). 14 | * Fixed typo in database_subset.md docs [#271](https://github.com/GreenmaskIO/greenmask/pull/271) 15 | * Revised README.md [#280](https://github.com/GreenmaskIO/greenmask/pull/280) 16 | 17 | #### Full Changelog: [v0.2.8...v0.2.9](https://github.com/GreenmaskIO/greenmask/compare/v0.2.8...v0.2.9) 18 | 19 | ## Links 20 | 21 | Feel free to reach out to us if you have any questions or need assistance: 22 | 23 | * [Greenmask Roadmap](https://github.com/orgs/GreenmaskIO/projects/6) 24 | * [Email](mailto:support@greenmask.io) 25 | * [Twitter](https://twitter.com/GreenmaskIO) 26 | * [Telegram [RU]](https://t.me/greenmask_ru) 27 | * [Discord](https://discord.gg/tAJegUKSTB) 28 | * [DockerHub](https://hub.docker.com/r/greenmask/greenmask) 29 | -------------------------------------------------------------------------------- /internal/db/postgres/cmd/validate_utils/utils.go: -------------------------------------------------------------------------------- 1 | package validate_utils 2 | 3 | import ( 4 | "slices" 5 | 6 | "github.com/greenmaskio/greenmask/internal/db/postgres/entries" 7 | "github.com/greenmaskio/greenmask/pkg/toolkit" 8 | ) 9 | 10 | var endOfFileSeq = []byte(`\.`) 11 | 12 | const nullStringValue = "NULL" 13 | 14 | func getAffectedColumns(t *entries.Table) map[string]struct{} { 15 | affectedColumns := make(map[string]struct{}) 16 | for _, tr := range t.TransformersContext { 17 | ac := tr.Transformer.GetAffectedColumns() 18 | for _, name := range ac { 19 | affectedColumns[name] = struct{}{} 20 | } 21 | } 22 | return affectedColumns 23 | } 24 | 25 | func LineIsEndOfData(line []byte) bool { 26 | return len(endOfFileSeq) == len(line) && line[0] == '\\' && line[1] == '.' 27 | } 28 | 29 | func ValuesEqual(a *toolkit.RawValue, b *toolkit.RawValue) bool { 30 | return a.IsNull == b.IsNull && slices.Equal(a.Data, b.Data) 31 | } 32 | 33 | func getPrimaryKeyConstraintColumns(t *entries.Table) map[int]*toolkit.Column { 34 | idx := slices.IndexFunc(t.Constraints, func(constraint toolkit.Constraint) bool { 35 | return constraint.Type() == toolkit.PkConstraintType 36 | }) 37 | if idx == -1 { 38 | return nil 39 | } 40 | pk := t.Constraints[idx].(*toolkit.PrimaryKey) 41 | 42 | columns := make(map[int]*toolkit.Column, len(pk.Columns)) 43 | 44 | for _, attNum := range pk.Columns { 45 | columnIdx := slices.IndexFunc(t.Columns, func(column *toolkit.Column) bool { 46 | return column.Num == attNum 47 | }) 48 | if columnIdx == -1 { 49 | panic("unable to find column by attnum") 50 | } 51 | columns[columnIdx] = t.Columns[columnIdx] 52 | } 53 | return columns 54 | } 55 | 56 | func getStringFromRawValue(v *toolkit.RawValue) string { 57 | if v.IsNull { 58 | return nullStringValue 59 | } 60 | return string(v.Data) 61 | } 62 | -------------------------------------------------------------------------------- /internal/db/postgres/context/pg_catalog_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Greenmask 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package context 16 | 17 | import ( 18 | "fmt" 19 | "testing" 20 | 21 | "github.com/stretchr/testify/assert" 22 | ) 23 | 24 | func TestBuildTableSearchQuery(t *testing.T) { 25 | includeTable := []string{"bookings.*"} 26 | excludeTable := []string{"booki*.boarding_pas*", "b?*.seats"} 27 | includeSchema := []string{"booki*"} 28 | excludeSchema := []string{"public*[[:digit:]]*1"} 29 | excludeTableData := []string{"bookings.flights"} 30 | includeForeignData := []string{"myserver"} 31 | res, err := buildTableSearchQuery(includeTable, excludeTable, excludeTableData, 32 | includeForeignData, includeSchema, excludeSchema) 33 | assert.NoError(t, err) 34 | fmt.Println(res) 35 | } 36 | 37 | func TestBuildSchemaIntrospectionQuery(t *testing.T) { 38 | includeTable := []string{"bookings.*"} 39 | excludeTable := []string{"booki*.boarding_pas*", "b?*.seats"} 40 | includeSchema := []string{"booki*"} 41 | excludeSchema := []string{"public*[[:digit:]]*1"} 42 | includeForeignData := []string{"myserver"} 43 | res, err := buildSchemaIntrospectionQuery(includeTable, excludeTable, 44 | includeForeignData, includeSchema, excludeSchema) 45 | assert.NoError(t, err) 46 | fmt.Println(res) 47 | } 48 | -------------------------------------------------------------------------------- /internal/db/postgres/context/transformers.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Greenmask 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package context 16 | 17 | import ( 18 | "context" 19 | "fmt" 20 | 21 | transformersUtils "github.com/greenmaskio/greenmask/internal/db/postgres/transformers/utils" 22 | "github.com/greenmaskio/greenmask/internal/domains" 23 | "github.com/greenmaskio/greenmask/pkg/toolkit" 24 | ) 25 | 26 | func initTransformer( 27 | ctx context.Context, d *toolkit.Driver, 28 | c *domains.TransformerConfig, 29 | r *transformersUtils.TransformerRegistry, 30 | ) (*transformersUtils.TransformerContext, toolkit.ValidationWarnings, error) { 31 | var totalWarnings toolkit.ValidationWarnings 32 | td, ok := r.Get(c.Name) 33 | if !ok { 34 | totalWarnings = append(totalWarnings, 35 | toolkit.NewValidationWarning(). 36 | SetMsg("transformer not found"). 37 | AddMeta("SchemaName", d.Table.Schema). 38 | AddMeta("TableName", d.Table.Name). 39 | AddMeta("TransformerName", c.Name). 40 | SetSeverity(toolkit.ErrorValidationSeverity), 41 | ) 42 | return nil, totalWarnings, nil 43 | } 44 | transformer, warnings, err := td.Instance(ctx, d, c.Params, c.DynamicParams, c.When) 45 | if err != nil { 46 | return nil, nil, fmt.Errorf("unable to init transformer: %w", err) 47 | } 48 | return transformer, warnings, nil 49 | } 50 | -------------------------------------------------------------------------------- /internal/db/postgres/dumpers/dumpers.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Greenmask 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package dumpers 16 | 17 | import ( 18 | "context" 19 | 20 | "github.com/jackc/pgx/v5" 21 | 22 | "github.com/greenmaskio/greenmask/internal/storages" 23 | ) 24 | 25 | type DumpTask interface { 26 | Execute(ctx context.Context, tx pgx.Tx, st storages.Storager) error 27 | DebugInfo() string 28 | } 29 | -------------------------------------------------------------------------------- /internal/db/postgres/dumpers/errors.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Greenmask 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package dumpers 16 | 17 | import "fmt" 18 | 19 | type DumpError struct { 20 | Schema string `json:"schema,omitempty"` 21 | Table string `json:"table,omitempty"` 22 | Line uint64 `json:"line,omitempty"` 23 | Err error `json:"err,omitempty"` 24 | } 25 | 26 | func NewDumpError(schema, table string, line uint64, err error) *DumpError { 27 | return &DumpError{ 28 | Schema: schema, 29 | Table: table, 30 | Line: line, 31 | Err: err, 32 | } 33 | } 34 | 35 | func (de *DumpError) Error() string { 36 | return fmt.Sprintf("dump error on table %s.%s at line %d: %s", de.Schema, de.Table, de.Line, de.Err.Error()) 37 | } 38 | -------------------------------------------------------------------------------- /internal/db/postgres/dumpers/pipeliner.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Greenmask 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package dumpers 16 | 17 | import ( 18 | "context" 19 | ) 20 | 21 | type Pipeliner interface { 22 | Dump(ctx context.Context, data []byte) error 23 | Init(ctx context.Context) error 24 | Done(ctx context.Context) error 25 | CompleteDump() error 26 | } 27 | -------------------------------------------------------------------------------- /internal/db/postgres/dumpers/plain_dump_pipeline.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Greenmask 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package dumpers 16 | 17 | import ( 18 | "context" 19 | "fmt" 20 | "io" 21 | 22 | "github.com/greenmaskio/greenmask/internal/db/postgres/entries" 23 | "github.com/greenmaskio/greenmask/internal/db/postgres/pgcopy" 24 | ) 25 | 26 | type PlainDumpPipeline struct { 27 | w io.Writer 28 | line uint64 29 | table *entries.Table 30 | } 31 | 32 | func NewPlainDumpPipeline(table *entries.Table, w io.Writer) *PlainDumpPipeline { 33 | return &PlainDumpPipeline{ 34 | table: table, 35 | w: w, 36 | } 37 | } 38 | 39 | func (pdp *PlainDumpPipeline) Init(ctx context.Context) error { 40 | return nil 41 | } 42 | 43 | func (pdp *PlainDumpPipeline) Dump(ctx context.Context, data []byte) (err error) { 44 | pdp.line++ 45 | if _, err := pdp.w.Write(data); err != nil { 46 | return NewDumpError(pdp.table.Schema, pdp.table.Name, pdp.line, err) 47 | } 48 | return nil 49 | } 50 | 51 | func (pdp *PlainDumpPipeline) Done(ctx context.Context) error { 52 | return nil 53 | } 54 | 55 | func (pdp *PlainDumpPipeline) CompleteDump() (err error) { 56 | res := make([]byte, 0, 4) 57 | res = append(res, pgcopy.DefaultCopyTerminationSeq...) 58 | res = append(res, '\n', '\n') 59 | _, err = pdp.w.Write(res) 60 | if err != nil { 61 | return NewDumpError(pdp.table.Schema, pdp.table.Name, pdp.line, fmt.Errorf("error end of dump symbols: %w", err)) 62 | } 63 | return nil 64 | } 65 | -------------------------------------------------------------------------------- /internal/db/postgres/dumpers/sequence.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Greenmask 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package dumpers 16 | 17 | import ( 18 | "context" 19 | "fmt" 20 | 21 | "github.com/jackc/pgx/v5" 22 | 23 | "github.com/greenmaskio/greenmask/internal/db/postgres/entries" 24 | "github.com/greenmaskio/greenmask/internal/storages" 25 | ) 26 | 27 | type SequenceDumper struct { 28 | sequence *entries.Sequence 29 | } 30 | 31 | func NewSequenceDumper(sequence *entries.Sequence) *SequenceDumper { 32 | return &SequenceDumper{ 33 | sequence: sequence, 34 | } 35 | } 36 | 37 | func (sd *SequenceDumper) Execute(ctx context.Context, tx pgx.Tx, st storages.Storager) error { 38 | return nil 39 | } 40 | 41 | func (sd *SequenceDumper) DebugInfo() string { 42 | return fmt.Sprintf("sequence %s.%s", sd.sequence.Schema, sd.sequence.Name) 43 | } 44 | -------------------------------------------------------------------------------- /internal/db/postgres/dumpers/validation_pipeline.go: -------------------------------------------------------------------------------- 1 | package dumpers 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "io" 7 | 8 | "golang.org/x/sync/errgroup" 9 | 10 | "github.com/greenmaskio/greenmask/internal/db/postgres/entries" 11 | ) 12 | 13 | type ValidationPipeline struct { 14 | *TransformationPipeline 15 | } 16 | 17 | func NewValidationPipeline(ctx context.Context, eg *errgroup.Group, table *entries.Table, w io.Writer) (*ValidationPipeline, error) { 18 | tpp, err := NewTransformationPipeline(ctx, eg, table, w) 19 | if err != nil { 20 | return nil, err 21 | } 22 | return &ValidationPipeline{ 23 | TransformationPipeline: tpp, 24 | }, err 25 | } 26 | 27 | func (vp *ValidationPipeline) Dump(ctx context.Context, data []byte) (err error) { 28 | _, err = vp.w.Write(data) 29 | if err != nil { 30 | return NewDumpError(vp.table.Schema, vp.table.Name, vp.line, fmt.Errorf("error writing original dumped data: %w", err)) 31 | } 32 | 33 | return vp.TransformationPipeline.Dump(ctx, data) 34 | } 35 | -------------------------------------------------------------------------------- /internal/db/postgres/entries/entry_producer.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Greenmask 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package entries 16 | 17 | import "github.com/greenmaskio/greenmask/internal/db/postgres/toc" 18 | 19 | type Entry interface { 20 | Entry() (*toc.Entry, error) 21 | SetDumpId(sequence *toc.DumpIdSequence) 22 | } 23 | -------------------------------------------------------------------------------- /internal/db/postgres/entries/sequence_test.go: -------------------------------------------------------------------------------- 1 | package entries 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | "github.com/stretchr/testify/require" 8 | 9 | "github.com/greenmaskio/greenmask/internal/db/postgres/toc" 10 | ) 11 | 12 | func TestSequence_Entry(t *testing.T) { 13 | s := &Sequence{ 14 | Schema: "public", 15 | Name: "my_sequence", 16 | Owner: "my_owner", 17 | IsCalled: true, 18 | LastValue: 1, 19 | Oid: 2, 20 | } 21 | entry, err := s.Entry() 22 | require.NoError(t, err) 23 | require.NotNil(t, entry) 24 | assert.Equal(t, "\"my_sequence\"", *entry.Tag) 25 | assert.Equal(t, "\"public\"", *entry.Namespace) 26 | assert.Equal(t, "\"my_owner\"", *entry.Owner) 27 | assert.Equal(t, "SELECT pg_catalog.setval('\"public\".\"my_sequence\"', 1, true);", *entry.Defn) 28 | assert.Equal(t, int32(0), entry.HadDumper) 29 | assert.Equal(t, toc.Oid(0), entry.CatalogId.Oid) 30 | assert.Equal(t, int32(0), entry.NDeps) 31 | assert.Equal(t, 0, len(entry.Dependencies)) 32 | assert.NotNil(t, entry.FileName) 33 | assert.NotNil(t, entry.DropStmt) 34 | } 35 | -------------------------------------------------------------------------------- /internal/db/postgres/pgcopy/common.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Greenmask 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package pgcopy 16 | 17 | const DefaultCopyDelimiter = '\t' 18 | 19 | var DefaultNullSeq = []byte("\\N") 20 | var DefaultCopyTerminationSeq = []byte("\\.") 21 | -------------------------------------------------------------------------------- /internal/db/postgres/pgdump/adapter_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Greenmask 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package pgdump 16 | 17 | import ( 18 | "testing" 19 | 20 | "github.com/stretchr/testify/assert" 21 | ) 22 | 23 | func TestParser_AdaptRegexp(t *testing.T) { 24 | testStr := `asda"As"?as*a"Te""sT"` 25 | expectedStr := `^(asdaAs.as.*aTe"sT)$` 26 | res, err := AdaptRegexp(testStr) 27 | assert.NoError(t, err) 28 | assert.Equal(t, expectedStr, res) 29 | } 30 | -------------------------------------------------------------------------------- /internal/db/postgres/restorers/sequence.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Greenmask 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package restorers 16 | 17 | import ( 18 | "context" 19 | "fmt" 20 | 21 | "github.com/jackc/pgx/v5" 22 | 23 | "github.com/greenmaskio/greenmask/internal/db/postgres/toc" 24 | "github.com/greenmaskio/greenmask/internal/db/postgres/utils" 25 | ) 26 | 27 | type SequenceRestorer struct { 28 | Entry *toc.Entry 29 | } 30 | 31 | func NewSequenceRestorer(entry *toc.Entry) *SequenceRestorer { 32 | return &SequenceRestorer{ 33 | Entry: entry, 34 | } 35 | } 36 | 37 | func (td *SequenceRestorer) GetEntry() *toc.Entry { 38 | return td.Entry 39 | } 40 | 41 | func (td *SequenceRestorer) Execute(ctx context.Context, conn utils.PGConnector) error { 42 | err := conn.WithTx(ctx, func(ctx context.Context, tx pgx.Tx) error { 43 | if td.Entry.Defn == nil { 44 | return fmt.Errorf("received nil pointer intead of sequence") 45 | } 46 | _, err := tx.Exec(ctx, *td.Entry.Defn) 47 | if err != nil { 48 | return fmt.Errorf("unable to apply sequence set val: %w", err) 49 | } 50 | return nil 51 | }) 52 | if err != nil { 53 | return fmt.Errorf("cannot commit transaction (restoring %s): %w", td.DebugInfo(), err) 54 | } 55 | return nil 56 | } 57 | 58 | func (td *SequenceRestorer) DebugInfo() string { 59 | return fmt.Sprintf("sequence %s.%s", *td.Entry.Namespace, *td.Entry.Tag) 60 | } 61 | -------------------------------------------------------------------------------- /internal/db/postgres/restorers/sequence_test.go: -------------------------------------------------------------------------------- 1 | package restorers 2 | 3 | import ( 4 | "context" 5 | 6 | "github.com/greenmaskio/greenmask/internal/db/postgres/toc" 7 | "github.com/greenmaskio/greenmask/internal/db/postgres/utils" 8 | ) 9 | 10 | func (s *restoresSuite) Test_SequencesRestorer_Execute() { 11 | ctx := context.Background() 12 | pgConn, err := s.GetConnection(ctx) 13 | s.Require().NoError(err) 14 | conn := utils.NewPGConn(pgConn) 15 | 16 | s.Run("basic", func() { 17 | 18 | _, err = pgConn.Exec(ctx, "CREATE SEQUENCE _test_seq_1231") 19 | s.Require().NoError(err) 20 | 21 | schemaName := "public" 22 | seqName := "_test_seq_123" 23 | data := "SELECT pg_catalog.setval('public._test_seq_1231', 2, false);\n" 24 | entry := &toc.Entry{ 25 | Namespace: &schemaName, 26 | Tag: &seqName, 27 | Defn: &data, 28 | } 29 | 30 | seq := NewSequenceRestorer(entry) 31 | err = seq.Execute(ctx, conn) 32 | s.Require().NoError(err) 33 | 34 | var lastVal int 35 | err = pgConn.QueryRow(ctx, "SELECT last_value FROM _test_seq_1231"). 36 | Scan(&lastVal) 37 | s.Require().NoError(err) 38 | s.Require().Equal(2, lastVal) 39 | }) 40 | 41 | s.Run("Defn is nil error expected", func() { 42 | 43 | _, err = pgConn.Exec(ctx, "CREATE SEQUENCE _test_seq_1232") 44 | s.Require().NoError(err) 45 | 46 | schemaName := "public" 47 | seqName := "_test_seq_123" 48 | entry := &toc.Entry{ 49 | Namespace: &schemaName, 50 | Tag: &seqName, 51 | } 52 | 53 | seq := NewSequenceRestorer(entry) 54 | err = seq.Execute(ctx, conn) 55 | s.Require().Error(err) 56 | 57 | var lastVal int 58 | err = pgConn.QueryRow(ctx, "SELECT last_value FROM _test_seq_1232"). 59 | Scan(&lastVal) 60 | s.Require().NoError(err) 61 | s.Require().Equal(1, lastVal) 62 | }) 63 | } 64 | -------------------------------------------------------------------------------- /internal/db/postgres/subset/component_link.go: -------------------------------------------------------------------------------- 1 | package subset 2 | 3 | type ComponentLink struct { 4 | idx int 5 | component *Component 6 | } 7 | 8 | func NewComponentLink(idx int, c *Component) *ComponentLink { 9 | return &ComponentLink{ 10 | idx: idx, 11 | component: c, 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /internal/db/postgres/subset/condenced_edge.go: -------------------------------------------------------------------------------- 1 | package subset 2 | 3 | type CondensedEdge struct { 4 | id int 5 | from *ComponentLink 6 | to *ComponentLink 7 | originalEdge *Edge 8 | } 9 | 10 | func NewCondensedEdge(id int, from, to *ComponentLink, originalEdge *Edge) *CondensedEdge { 11 | return &CondensedEdge{ 12 | id: id, 13 | from: from, 14 | to: to, 15 | originalEdge: originalEdge, 16 | } 17 | } 18 | 19 | func (e *CondensedEdge) hasPolymorphicExpressions() bool { 20 | return len(e.originalEdge.from.polymorphicExprs) > 0 || len(e.originalEdge.to.polymorphicExprs) > 0 21 | } 22 | 23 | // sortCondensedEdges - returns condensed graph vertices in topological order 24 | func sortCondensedEdges(graph [][]*CondensedEdge) []int { 25 | stack := make([]int, 0) 26 | visited := make([]bool, len(graph)) 27 | for i := range graph { 28 | if !visited[i] { 29 | topologicalSortDfs(graph, i, visited, &stack) 30 | } 31 | } 32 | return stack 33 | } 34 | 35 | // topologicalSortDfs - recursive function to visit all vertices of the graph 36 | func topologicalSortDfs(graph [][]*CondensedEdge, v int, visited []bool, stack *[]int) { 37 | visited[v] = true 38 | for _, edge := range graph[v] { 39 | if !visited[edge.to.idx] { 40 | topologicalSortDfs(graph, edge.to.idx, visited, stack) 41 | } 42 | } 43 | *stack = append(*stack, v) 44 | } 45 | -------------------------------------------------------------------------------- /internal/db/postgres/subset/cte.go: -------------------------------------------------------------------------------- 1 | package subset 2 | 3 | import ( 4 | "fmt" 5 | "slices" 6 | "strings" 7 | 8 | "github.com/greenmaskio/greenmask/internal/db/postgres/entries" 9 | ) 10 | 11 | type cteQuery struct { 12 | items []*cteItem 13 | c *Component 14 | } 15 | 16 | func newCteQuery(c *Component) *cteQuery { 17 | return &cteQuery{ 18 | c: c, 19 | } 20 | } 21 | 22 | func (c *cteQuery) addItem(name, query string) { 23 | c.items = append(c.items, &cteItem{ 24 | name: name, 25 | query: query, 26 | }) 27 | } 28 | 29 | func (c *cteQuery) generateQuery(targetTable *entries.Table) string { 30 | var queries []string 31 | var excludedCteQueries []string 32 | if len(c.c.groupedCycles) > 1 { 33 | panic("FIXME: found more than one grouped cycle") 34 | } 35 | for _, edge := range c.c.cycles[0] { 36 | if edge.from.table.Oid == targetTable.Oid { 37 | continue 38 | } 39 | excludedCteQuery := fmt.Sprintf("%s__%s__ids", edge.from.table.Schema, edge.from.table.Name) 40 | excludedCteQueries = append(excludedCteQueries, excludedCteQuery) 41 | } 42 | 43 | for _, item := range c.items { 44 | if slices.Contains(excludedCteQueries, item.name) { 45 | continue 46 | } 47 | queries = append(queries, fmt.Sprintf(" %s AS (%s)", item.name, item.query)) 48 | } 49 | var leftTableKeys, rightTableKeys []string 50 | rightTableName := fmt.Sprintf("%s__%s__ids", targetTable.Schema, targetTable.Name) 51 | for _, key := range targetTable.PrimaryKey { 52 | leftTableKeys = append(leftTableKeys, fmt.Sprintf(`"%s"."%s"."%s"`, targetTable.Schema, targetTable.Name, key)) 53 | rightTableKeys = append(rightTableKeys, fmt.Sprintf(`"%s"."%s"`, rightTableName, key)) 54 | } 55 | 56 | resultingQuery := fmt.Sprintf( 57 | `SELECT * FROM "%s"."%s" WHERE %s IN (SELECT %s FROM "%s")`, 58 | targetTable.Schema, 59 | targetTable.Name, 60 | fmt.Sprintf("(%s)", strings.Join(leftTableKeys, ",")), 61 | strings.Join(rightTableKeys, ","), 62 | rightTableName, 63 | ) 64 | res := fmt.Sprintf("WITH RECURSIVE %s %s", strings.Join(queries, ","), resultingQuery) 65 | return res 66 | } 67 | 68 | type cteItem struct { 69 | name string 70 | query string 71 | } 72 | -------------------------------------------------------------------------------- /internal/db/postgres/subset/cycle_edge.go: -------------------------------------------------------------------------------- 1 | package subset 2 | 3 | import "github.com/greenmaskio/greenmask/internal/db/postgres/entries" 4 | 5 | type CycleEdge struct { 6 | id int 7 | from string 8 | to string 9 | tables []*entries.Table 10 | } 11 | 12 | func NewCycleEdge(id int, from, to string, tables []*entries.Table) *CycleEdge { 13 | if len(tables) == 0 { 14 | panic("empty tables provided for cycle edge") 15 | } 16 | return &CycleEdge{ 17 | id: id, 18 | from: from, 19 | to: to, 20 | tables: tables, 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /internal/db/postgres/subset/edge.go: -------------------------------------------------------------------------------- 1 | package subset 2 | 3 | type Edge struct { 4 | id int 5 | idx int 6 | isNullable bool 7 | from *TableLink 8 | to *TableLink 9 | } 10 | 11 | func NewEdge(id, idx int, isNullable bool, a *TableLink, b *TableLink) *Edge { 12 | return &Edge{ 13 | id: id, 14 | idx: idx, 15 | isNullable: isNullable, 16 | from: a, 17 | to: b, 18 | } 19 | } 20 | 21 | func (e *Edge) ID() int { 22 | return e.id 23 | } 24 | 25 | func (e *Edge) Index() int { 26 | return e.idx 27 | } 28 | 29 | func (e *Edge) IsNullable() bool { 30 | return e.isNullable 31 | } 32 | 33 | func (e *Edge) From() *TableLink { 34 | return e.from 35 | } 36 | 37 | func (e *Edge) To() *TableLink { 38 | return e.to 39 | } 40 | -------------------------------------------------------------------------------- /internal/db/postgres/subset/scope_edge.go: -------------------------------------------------------------------------------- 1 | package subset 2 | 3 | type ScopeEdge struct { 4 | scopeId int 5 | originalCondensedEdge *CondensedEdge 6 | isNullable bool 7 | } 8 | -------------------------------------------------------------------------------- /internal/db/postgres/subset/set_queries.go: -------------------------------------------------------------------------------- 1 | package subset 2 | 3 | func SetSubsetQueries(graph *Graph) error { 4 | graph.findSubsetVertexes() 5 | for _, p := range graph.paths { 6 | if isPathForScc(p, graph) { 7 | graph.generateAndSetQueryForScc(p) 8 | } else { 9 | graph.generateAndSetQueryForTable(p) 10 | } 11 | } 12 | return nil 13 | } 14 | -------------------------------------------------------------------------------- /internal/db/postgres/subset/table_link.go: -------------------------------------------------------------------------------- 1 | package subset 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/greenmaskio/greenmask/internal/db/postgres/entries" 7 | "github.com/greenmaskio/greenmask/internal/domains" 8 | ) 9 | 10 | type Key struct { 11 | Name string 12 | Expression string 13 | } 14 | 15 | func (k *Key) GetKeyReference(t *entries.Table) string { 16 | if k.Expression != "" { 17 | return k.Expression 18 | } 19 | return fmt.Sprintf(`"%s"."%s"."%s"`, t.Schema, t.Name, k.Name) 20 | } 21 | 22 | func NewKeysByColumn(cols []string) []*Key { 23 | keys := make([]*Key, 0, len(cols)) 24 | for _, col := range cols { 25 | keys = append(keys, &Key{Name: col}) 26 | } 27 | return keys 28 | } 29 | 30 | func NewKeysByReferencedColumn(cols []*domains.ReferencedColumn) []*Key { 31 | keys := make([]*Key, 0, len(cols)) 32 | for _, col := range cols { 33 | keys = append(keys, &Key{Name: col.Name, Expression: col.Expression}) 34 | } 35 | return keys 36 | } 37 | 38 | type TableLink struct { 39 | idx int 40 | table *entries.Table 41 | keys []*Key 42 | // polymorphicExprs - polymorphicExprs for single conditions that are not used to match FK and PK values 43 | // this might be used for polymorphic relations 44 | polymorphicExprs []string 45 | } 46 | 47 | func NewTableLink(idx int, t *entries.Table, keys []*Key, polymorphicExprs []string) *TableLink { 48 | return &TableLink{ 49 | idx: idx, 50 | table: t, 51 | keys: keys, 52 | polymorphicExprs: polymorphicExprs, 53 | } 54 | } 55 | 56 | func (tl *TableLink) Index() int { 57 | return tl.idx 58 | } 59 | 60 | func (tl *TableLink) Table() *entries.Table { 61 | return tl.table 62 | } 63 | 64 | func (tl *TableLink) Keys() []*Key { 65 | return tl.keys 66 | } 67 | 68 | func (tl *TableLink) PolymorphicExprs() []string { 69 | return tl.polymorphicExprs 70 | } 71 | -------------------------------------------------------------------------------- /internal/db/postgres/toc/common.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Greenmask 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package toc 16 | 17 | import ( 18 | "sync/atomic" 19 | ) 20 | 21 | var ( 22 | TableDataDesc = "TABLE DATA" 23 | LargeObjectsDesc = "BLOB" 24 | BlobsDesc = "BLOBS" 25 | SequenceSetDesc = "SEQUENCE SET" 26 | CommentDesc = "COMMENT" 27 | AclDesc = "ACL" 28 | ) 29 | 30 | type Oid uint32 31 | 32 | type DumpIdSequence struct { 33 | current int32 34 | } 35 | 36 | func NewDumpSequence(initVal int32) *DumpIdSequence { 37 | return &DumpIdSequence{ 38 | current: initVal, 39 | } 40 | } 41 | 42 | func (dis *DumpIdSequence) Next() int32 { 43 | atomic.AddInt32(&dis.current, 1) 44 | return dis.current 45 | } 46 | -------------------------------------------------------------------------------- /internal/db/postgres/toc/toc.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Greenmask 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package toc 16 | 17 | type Toc struct { 18 | Header *Header 19 | Entries []*Entry 20 | } 21 | 22 | func (t *Toc) Copy() *Toc { 23 | 24 | entries := make([]*Entry, len(t.Entries)) 25 | 26 | for i, entry := range t.Entries { 27 | entries[i] = entry.Copy() 28 | } 29 | 30 | return &Toc{ 31 | Header: t.Header.Copy(), 32 | Entries: entries, 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /internal/db/postgres/toc/utils.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Greenmask 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package toc 16 | 17 | const ( 18 | ArchUnknown byte = 0 19 | ArchCustom byte = 1 20 | ArchTar byte = 3 21 | ArchNull byte = 4 22 | ArchDirectory byte = 5 23 | ) 24 | 25 | const InvalidOid = 0 26 | 27 | const MaxVersion = "1.16" 28 | 29 | const ( 30 | PgCompressionNone int32 = iota 31 | PgCompressionGzip 32 | PgCompressionLz4 33 | PgCompressionZSTD 34 | ) 35 | 36 | var ( 37 | BackupVersions = map[string]int{ 38 | "1.0": MakeArchiveVersion(1, 0, 0), 39 | "1.2": MakeArchiveVersion(1, 2, 0), 40 | "1.3": MakeArchiveVersion(1, 3, 0), 41 | "1.4": MakeArchiveVersion(1, 4, 0), 42 | "1.5": MakeArchiveVersion(1, 5, 0), 43 | "1.6": MakeArchiveVersion(1, 6, 0), 44 | "1.7": MakeArchiveVersion(1, 7, 0), 45 | "1.8": MakeArchiveVersion(1, 8, 0), 46 | "1.9": MakeArchiveVersion(1, 9, 0), 47 | "1.10": MakeArchiveVersion(1, 10, 0), 48 | "1.11": MakeArchiveVersion(1, 11, 0), 49 | "1.12": MakeArchiveVersion(1, 12, 0), 50 | "1.13": MakeArchiveVersion(1, 13, 0), 51 | "1.14": MakeArchiveVersion(1, 14, 0), 52 | "1.15": MakeArchiveVersion(1, 15, 0), 53 | "1.16": MakeArchiveVersion(1, 16, 0), 54 | } 55 | 56 | BackupFormats = map[byte]string{ 57 | ArchUnknown: "unknown", 58 | ArchCustom: "custom", 59 | ArchTar: "tar", 60 | ArchNull: "null", 61 | ArchDirectory: "directory", 62 | } 63 | ) 64 | 65 | func MakeArchiveVersion(major, minor, rev byte) int { 66 | return (int(major)*256+int(minor))*256 + int(rev) 67 | } 68 | -------------------------------------------------------------------------------- /internal/db/postgres/transformers/custom/test/example_transformer.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Copyright 2023 Greenmask 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | function exit0() { 18 | exit 0 19 | } 20 | 21 | trap 'exit0' 15 22 | 23 | transform=false 24 | validate=false 25 | print_config=false 26 | 27 | while [[ $# -gt 0 ]]; do 28 | case $1 in 29 | --meta) 30 | metadata="$2" 31 | shift # past argument 32 | shift # past value 33 | ;; 34 | --print-definition) 35 | print_config=true 36 | shift # past argument 37 | ;; 38 | --validate) 39 | validate=true 40 | shift # past argument 41 | ;; 42 | --transform) 43 | transform=true 44 | shift # past argument 45 | ;; 46 | -*|--*) 47 | echo "Unknown option $1" 48 | exit 1 49 | ;; 50 | esac 51 | done 52 | 53 | if [ $print_config = "true" ]; then 54 | echo '{"name":"TwoDatesGen","description":"Generate diff between two dates","parameters":[{"name":"column_a","description":"test1","required":true,"is_column":true,"column_properties":{"affected":true,"allowed_types":["date","timestamp","timestamptz"]}},{"name":"column_b","description":"test2","required":true,"is_column":true,"column_properties":{"affected":true,"allowed_types":["date","timestamp","timestamptz"]}}]}' 55 | exit 0 56 | elif [ $transform = "true" ]; then 57 | cat 58 | elif [ $validate = "true" ]; then 59 | printf '{"msg": "test validation warning", "severity": "warning"}\n' 60 | else 61 | exit 1 62 | fi 63 | -------------------------------------------------------------------------------- /internal/db/postgres/transformers/data.go: -------------------------------------------------------------------------------- 1 | package transformers 2 | 3 | // Predefined global variable containing a list of top email providers as a slice of strings 4 | var defaultEmailProviders = []string{ 5 | "gmail.com", // Google Gmail 6 | "yahoo.com", // Yahoo Mail 7 | "outlook.com", // Microsoft Outlook 8 | "hotmail.com", // Microsoft Hotmail (now part of Outlook) 9 | "aol.com", // AOL Mail 10 | "icloud.com", // Apple iCloud Mail 11 | "mail.com", // Mail.com 12 | "zoho.com", // Zoho Mail 13 | "yandex.com", // Yandex Mail 14 | "protonmail.com", // ProtonMail 15 | "gmx.com", // GMX Mail 16 | "fastmail.com", // Fastmail 17 | } 18 | -------------------------------------------------------------------------------- /internal/db/postgres/transformers/default_params.go: -------------------------------------------------------------------------------- 1 | package transformers 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | 7 | "github.com/greenmaskio/greenmask/pkg/toolkit" 8 | ) 9 | 10 | const ( 11 | RandomEngineParameterName = "random" 12 | HashEngineParameterName = "hash" 13 | ) 14 | 15 | var ( 16 | engineParameterDefinition = toolkit.MustNewParameterDefinition( 17 | "engine", 18 | "The engine used for generating the values [random, hash]", 19 | ).SetDefaultValue([]byte("random")). 20 | SetRawValueValidator(engineValidator) 21 | 22 | keepNullParameterDefinition = toolkit.MustNewParameterDefinition( 23 | "keep_null", 24 | "indicates that NULL values must not be replaced with transformed values", 25 | ).SetDefaultValue(toolkit.ParamsValue("true")) 26 | 27 | minRatioParameterDefinition = toolkit.MustNewParameterDefinition( 28 | "min_ratio", 29 | "min random percentage for noise", 30 | ).SetDefaultValue(toolkit.ParamsValue("0.05")) 31 | 32 | maxRatioParameterDefinition = toolkit.MustNewParameterDefinition( 33 | "max_ratio", 34 | "max random percentage for noise", 35 | ).SetDefaultValue(toolkit.ParamsValue("0.2")) 36 | 37 | truncateDateParameterDefinition = toolkit.MustNewParameterDefinition( 38 | "truncate", 39 | fmt.Sprintf("truncate date till the part (%s)", strings.Join(truncateParts, ", ")), 40 | ).SetRawValueValidator(validateDateTruncationParameterValue) 41 | ) 42 | 43 | func engineValidator(p *toolkit.ParameterDefinition, v toolkit.ParamsValue) (toolkit.ValidationWarnings, error) { 44 | value := string(v) 45 | if value != RandomEngineParameterName && value != HashEngineParameterName { 46 | return toolkit.ValidationWarnings{ 47 | toolkit.NewValidationWarning(). 48 | SetMsg("Invalid engine value"). 49 | AddMeta("ParameterValue", value). 50 | SetSeverity(toolkit.ErrorValidationSeverity), 51 | }, nil 52 | } 53 | return nil, nil 54 | } 55 | -------------------------------------------------------------------------------- /internal/db/postgres/transformers/json_context.go: -------------------------------------------------------------------------------- 1 | package transformers 2 | 3 | import ( 4 | "github.com/tidwall/gjson" 5 | 6 | "github.com/greenmaskio/greenmask/pkg/toolkit" 7 | ) 8 | 9 | type JsonContext struct { 10 | exists bool 11 | originalValue any 12 | path string 13 | rc *toolkit.RecordContext 14 | } 15 | 16 | func NewJsonContext() *JsonContext { 17 | return &JsonContext{ 18 | rc: &toolkit.RecordContext{}, 19 | } 20 | } 21 | 22 | func (jc *JsonContext) setValue(data []byte, path string) { 23 | res := gjson.GetBytes(data, path) 24 | jc.originalValue = res.Value() 25 | jc.exists = res.Exists() 26 | jc.path = path 27 | } 28 | 29 | func (jc *JsonContext) setRecord(r *toolkit.Record) { 30 | jc.rc.SetRecord(r) 31 | } 32 | 33 | func (jc *JsonContext) GetPath() string { 34 | return jc.path 35 | } 36 | 37 | func (jc *JsonContext) GetOriginalValue() any { 38 | return jc.originalValue 39 | } 40 | 41 | func (jc *JsonContext) OriginalValueExists() bool { 42 | return jc.exists 43 | } 44 | 45 | func (jc *JsonContext) GetColumnValue(name string) (any, error) { 46 | return jc.rc.GetColumnValue(name) 47 | } 48 | 49 | func (jc *JsonContext) GetRawColumnValue(name string) (any, error) { 50 | return jc.rc.GetRawColumnValue(name) 51 | } 52 | 53 | func (jc *JsonContext) EncodeValueByColumn(name string, v any) (any, error) { 54 | return jc.rc.EncodeValueByColumn(name, v) 55 | } 56 | 57 | func (jc *JsonContext) DecodeValueByColumn(name string, v any) (any, error) { 58 | return jc.rc.DecodeValueByColumn(name, v) 59 | } 60 | 61 | func (jc *JsonContext) EncodeValueByType(name string, v any) (any, error) { 62 | return jc.rc.EncodeValueByType(name, v) 63 | } 64 | 65 | func (jc *JsonContext) DecodeValueByType(name string, v any) (any, error) { 66 | return jc.rc.DecodeValueByType(name, v) 67 | } 68 | -------------------------------------------------------------------------------- /internal/db/postgres/transformers/set_null_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Greenmask 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package transformers 16 | 17 | import ( 18 | "context" 19 | "testing" 20 | 21 | "github.com/stretchr/testify/assert" 22 | "github.com/stretchr/testify/require" 23 | 24 | "github.com/greenmaskio/greenmask/pkg/toolkit" 25 | ) 26 | 27 | func TestSetNullTransformer_Transform(t *testing.T) { 28 | var columnName = "id" 29 | var originalValue = "1" 30 | var expectedValue = "\\N" 31 | 32 | driver, record := getDriverAndRecord(columnName, originalValue) 33 | 34 | transformerCtx, warnings, err := SetNullTransformerDefinition.Instance( 35 | context.Background(), 36 | driver, map[string]toolkit.ParamsValue{ 37 | "column": toolkit.ParamsValue(columnName), 38 | }, 39 | nil, 40 | "", 41 | ) 42 | require.NoError(t, err) 43 | assert.Empty(t, warnings) 44 | 45 | r, err := transformerCtx.Transformer.Transform( 46 | context.Background(), 47 | record, 48 | ) 49 | require.NoError(t, err) 50 | encoded, err := r.Encode() 51 | require.NoError(t, err) 52 | res, err := encoded.Encode() 53 | require.NoError(t, err) 54 | assert.Equal(t, expectedValue, string(res)) 55 | } 56 | -------------------------------------------------------------------------------- /internal/db/postgres/transformers/test/cmd_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Copyright 2023 Greenmask 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | while read CMD; do 18 | echo '{"9":{"d":"MjAxNy0wNi0xMyAxMToyNDowMCswMg==","n":false},"3":{"d":"MjAxNy0wNi0xMyAxMToyMDowMCswMg==","n":false}}' 19 | done 20 | -------------------------------------------------------------------------------- /internal/db/postgres/transformers/utils/properties.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Greenmask 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package utils 16 | 17 | type MetaKey string 18 | 19 | type TransformerProperties struct { 20 | Name string `json:"name"` 21 | Description string `json:"description"` 22 | IsCustom bool `json:"is_custom"` 23 | Meta map[MetaKey]any `json:"meta"` 24 | } 25 | 26 | func NewTransformerProperties( 27 | name, description string, 28 | ) *TransformerProperties { 29 | return &TransformerProperties{ 30 | Name: name, 31 | Description: description, 32 | Meta: make(map[MetaKey]any), 33 | } 34 | } 35 | 36 | func (tp *TransformerProperties) AddMeta(key MetaKey, value any) *TransformerProperties { 37 | tp.Meta[key] = value 38 | return tp 39 | } 40 | 41 | func (tp *TransformerProperties) GetMeta(key MetaKey) (any, bool) { 42 | value, ok := tp.Meta[key] 43 | return value, ok 44 | } 45 | -------------------------------------------------------------------------------- /internal/db/postgres/transformers/utils/registry.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Greenmask 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package utils 16 | 17 | import ( 18 | "fmt" 19 | ) 20 | 21 | var DefaultTransformerRegistry = NewTransformerRegistry() 22 | 23 | type TransformerRegistry struct { 24 | M map[string]*TransformerDefinition 25 | } 26 | 27 | func NewTransformerRegistry() *TransformerRegistry { 28 | return &TransformerRegistry{ 29 | M: make(map[string]*TransformerDefinition), 30 | } 31 | } 32 | 33 | func (tm *TransformerRegistry) Register(definition *TransformerDefinition) error { 34 | if _, ok := tm.M[definition.Properties.Name]; ok { 35 | return fmt.Errorf("unable to register transformer: transformer with Name %s already exists", 36 | definition.Properties.Name) 37 | } 38 | tm.M[definition.Properties.Name] = definition 39 | return nil 40 | } 41 | 42 | func (tm *TransformerRegistry) MustRegister(definition *TransformerDefinition) { 43 | if err := tm.Register(definition); err != nil { 44 | panic(err.Error()) 45 | } 46 | } 47 | 48 | func (tm *TransformerRegistry) Get(name string) (*TransformerDefinition, bool) { 49 | t, ok := tm.M[name] 50 | return t, ok 51 | } 52 | -------------------------------------------------------------------------------- /internal/db/postgres/transformers/utils/transformer.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Greenmask 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package utils 16 | 17 | import ( 18 | "context" 19 | 20 | "github.com/greenmaskio/greenmask/pkg/toolkit" 21 | ) 22 | 23 | type Transformer interface { 24 | Init(ctx context.Context) error 25 | Done(ctx context.Context) error 26 | Transform(ctx context.Context, r *toolkit.Record) (*toolkit.Record, error) 27 | GetAffectedColumns() map[int]string 28 | } 29 | -------------------------------------------------------------------------------- /internal/db/postgres/utils/connector.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | 7 | "github.com/jackc/pgx/v5" 8 | "github.com/rs/zerolog/log" 9 | ) 10 | 11 | type PGConnector interface { 12 | WithTx(ctx context.Context, fn func(ctx context.Context, tx pgx.Tx) error) error 13 | GetConn() *pgx.Conn 14 | } 15 | 16 | // PGConn is a wrapper around pgx.Conn that allows to wrap logic in transactions 17 | type PGConn struct { 18 | con *pgx.Conn 19 | } 20 | 21 | func NewPGConn(con *pgx.Conn) *PGConn { 22 | return &PGConn{ 23 | con: con, 24 | } 25 | } 26 | 27 | func (p *PGConn) GetConn() *pgx.Conn { 28 | return p.con 29 | } 30 | 31 | func (p *PGConn) WithTx(ctx context.Context, fn func(ctx context.Context, tx pgx.Tx) error) error { 32 | tx, err := p.con.Begin(ctx) 33 | if err != nil { 34 | return fmt.Errorf("cannot start transaction: %w", err) 35 | } 36 | if err := fn(ctx, tx); err != nil { 37 | if txErr := tx.Rollback(ctx); txErr != nil { 38 | log.Warn(). 39 | Err(txErr). 40 | Msg("cannot rollback transaction") 41 | } 42 | return err 43 | } 44 | return tx.Commit(ctx) 45 | } 46 | -------------------------------------------------------------------------------- /internal/db/postgres/utils/connector_test.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "testing" 7 | 8 | "github.com/jackc/pgx/v5" 9 | "github.com/stretchr/testify/suite" 10 | 11 | "github.com/greenmaskio/greenmask/internal/utils/testutils" 12 | ) 13 | 14 | type connectorSuite struct { 15 | testutils.PgContainerSuite 16 | } 17 | 18 | func TestRestorers(t *testing.T) { 19 | suite.Run(t, new(connectorSuite)) 20 | } 21 | 22 | func (s *connectorSuite) Test_connectorSuite_WithTx() { 23 | ctx := context.Background() 24 | conn, err := s.PgContainerSuite.GetConnection(ctx) 25 | s.Require().NoError(err) 26 | pgConn := NewPGConn(conn) 27 | s.Run("check commit", func() { 28 | err := pgConn.WithTx(ctx, func(ctx context.Context, tx pgx.Tx) error { 29 | _, err := tx.Exec(ctx, "CREATE TABLE _test_table_commit (id SERIAL PRIMARY KEY, name TEXT)") 30 | return err 31 | }) 32 | s.Require().NoError(err) 33 | 34 | var relOid uint32 35 | err = conn.QueryRow(ctx, "SELECT oid FROM pg_catalog.pg_class WHERE relname = '_test_table_commit'"). 36 | Scan(&relOid) 37 | s.Require().NoError(err) 38 | s.Require().NotZero(relOid) 39 | }) 40 | 41 | s.Run("check error", func() { 42 | err := pgConn.WithTx(ctx, func(ctx context.Context, tx pgx.Tx) error { 43 | _, err := tx.Exec(ctx, "CREATE TABLE _test_table_rollback (id SERIAL PRIMARY KEY, name TEXT)") 44 | s.Require().NoError(err) 45 | return errors.New("some error") 46 | }) 47 | s.Require().Error(err) 48 | 49 | var relOid uint32 50 | err = conn.QueryRow(ctx, "SELECT oid FROM pg_catalog.pg_class WHERE relname = '_test_table_rollback'"). 51 | Scan(&relOid) 52 | s.Require().ErrorIs(err, pgx.ErrNoRows) 53 | }) 54 | } 55 | -------------------------------------------------------------------------------- /internal/domains/virtual_references.go: -------------------------------------------------------------------------------- 1 | package domains 2 | 3 | type ReferencedColumn struct { 4 | Name string `mapstructure:"name" json:"name" yaml:"name"` 5 | Expression string `mapstructure:"expression" json:"expression" yaml:"expression"` 6 | } 7 | 8 | type Reference struct { 9 | Schema string `mapstructure:"schema" json:"schema" yaml:"schema"` 10 | Name string `mapstructure:"name" json:"name" yaml:"name"` 11 | NotNull bool `mapstructure:"not_null" json:"not_null" yaml:"not_null"` 12 | Columns []*ReferencedColumn `mapstructure:"columns" json:"columns" yaml:"columns"` 13 | PolymorphicExprs []string `mapstructure:"polymorphic_exprs" json:"polymorphic_exprs" yaml:"polymorphic_exprs"` 14 | } 15 | 16 | type VirtualReference struct { 17 | Schema string `mapstructure:"schema" json:"schema" yaml:"schema"` 18 | Name string `mapstructure:"name" json:"name" yaml:"name"` 19 | References []*Reference `mapstructure:"references" json:"references" yaml:"references"` 20 | } 21 | -------------------------------------------------------------------------------- /internal/generators/generator.go: -------------------------------------------------------------------------------- 1 | package generators 2 | 3 | // We don't know the byte length in the output, min value, max value 4 | type Generator interface { 5 | Generate([]byte) ([]byte, error) 6 | Size() int 7 | } 8 | -------------------------------------------------------------------------------- /internal/generators/hash.go: -------------------------------------------------------------------------------- 1 | package generators 2 | 3 | import ( 4 | "crypto/sha1" 5 | "crypto/sha256" 6 | "crypto/sha512" 7 | "fmt" 8 | "hash" 9 | 10 | "golang.org/x/crypto/sha3" 11 | ) 12 | 13 | const ( 14 | Sha1Name = "sha1" 15 | Sha256Name = "sha256" 16 | Sha512Name = "sha512" 17 | Sha3224 = "sha3-224" 18 | Sha3256 = "sha3-256" 19 | Sha3384 = "sha3-384" 20 | Sha3512 = "sha3-512" 21 | ) 22 | 23 | type Hash struct { 24 | hash.Hash 25 | salt []byte 26 | buf []byte 27 | } 28 | 29 | func NewHash(salt []byte, funcName string) (Generator, error) { 30 | 31 | var h hash.Hash 32 | 33 | switch funcName { 34 | case Sha1Name: 35 | h = sha1.New() 36 | case Sha256Name: 37 | h = sha256.New() 38 | case Sha512Name: 39 | h = sha512.New() 40 | case Sha3224: 41 | h = sha3.New224() 42 | case Sha3256: 43 | h = sha3.New256() 44 | case Sha3384: 45 | h = sha3.New384() 46 | case Sha3512: 47 | h = sha3.New512() 48 | default: 49 | return nil, fmt.Errorf("unknow hash function name \"%s\"", funcName) 50 | } 51 | 52 | size := h.Size() 53 | 54 | return &Hash{ 55 | Hash: h, 56 | buf: make([]byte, size), 57 | salt: salt, 58 | }, nil 59 | } 60 | 61 | func (s *Hash) Generate(data []byte) ([]byte, error) { 62 | defer s.Reset() 63 | _, err := s.Write(s.salt) 64 | if err != nil { 65 | return nil, fmt.Errorf("unable to write salt into writer: %w", err) 66 | } 67 | _, err = s.Write(data) 68 | if err != nil { 69 | return nil, fmt.Errorf("unable to write data into writer: %w", err) 70 | } 71 | 72 | s.buf = s.buf[:0] 73 | return s.Sum(s.buf), nil 74 | } 75 | -------------------------------------------------------------------------------- /internal/generators/hash_reducer.go: -------------------------------------------------------------------------------- 1 | package generators 2 | 3 | type HashReducer struct { 4 | g Generator 5 | size int 6 | } 7 | 8 | func NewHashReducer(g Generator, size int) Generator { 9 | return &HashReducer{ 10 | g: g, 11 | size: size, 12 | } 13 | } 14 | 15 | func (hr *HashReducer) Generate(data []byte) (res []byte, err error) { 16 | res, err = hr.g.Generate(data) 17 | if err != nil { 18 | return nil, err 19 | } 20 | 21 | return res[:hr.size], nil 22 | } 23 | 24 | func (hr *HashReducer) Size() int { 25 | return hr.size 26 | } 27 | -------------------------------------------------------------------------------- /internal/generators/hybrid.go: -------------------------------------------------------------------------------- 1 | package generators 2 | 3 | import ( 4 | "encoding/binary" 5 | "math/rand" 6 | ) 7 | 8 | type HybridBytes struct { 9 | r *rand.Rand 10 | g Generator 11 | size int 12 | randomIters int 13 | resBuf []byte 14 | randomBuf []byte 15 | } 16 | 17 | func NewHybridBytes(seed int64, requestedSize int, h Generator) *HybridBytes { 18 | genSize := h.Size() 19 | if genSize < 8 { 20 | panic("generator size must be at least 8 bytes") 21 | } 22 | 23 | var requiredRandomSize int 24 | if genSize < requestedSize { 25 | requiredRandomSize = requestedSize - genSize 26 | } 27 | 28 | randomIters := requiredRandomSize / 8 29 | if requiredRandomSize%8 > 0 { 30 | randomIters += 1 31 | } 32 | return &HybridBytes{ 33 | r: rand.New(rand.NewSource(seed)), 34 | g: h, 35 | size: requestedSize, 36 | randomIters: randomIters, 37 | resBuf: make([]byte, requestedSize), 38 | randomBuf: make([]byte, 8), 39 | } 40 | } 41 | 42 | func (hb *HybridBytes) Generate(data []byte) ([]byte, error) { 43 | hb.resBuf = hb.resBuf[:0] 44 | res, err := hb.g.Generate(data) 45 | if err != nil { 46 | return nil, err 47 | } 48 | hb.resBuf = append(hb.resBuf, res...) 49 | seed := int64(binary.LittleEndian.Uint64(res[len(res)-8:])) 50 | hb.r.Seed(seed) 51 | for i := 0; i < hb.randomIters; i++ { 52 | binary.LittleEndian.PutUint64(hb.randomBuf, hb.r.Uint64()) 53 | hb.resBuf = append(hb.resBuf, hb.randomBuf...) 54 | } 55 | return hb.resBuf[:hb.size], nil 56 | } 57 | -------------------------------------------------------------------------------- /internal/generators/hybrid_test.go: -------------------------------------------------------------------------------- 1 | package generators 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/rs/zerolog/log" 7 | "github.com/stretchr/testify/require" 8 | ) 9 | 10 | func TestSibHashHybrid(t *testing.T) { 11 | expected := []byte{176, 20, 124, 157, 15, 119, 202, 213, 41, 32} 12 | requiredLength := 10 13 | sp, err := NewSipHash([]byte("test")) 14 | require.NoError(t, err) 15 | hb := NewHybridBytes(0, requiredLength, sp) 16 | res, err := hb.Generate([]byte("test")) 17 | log.Debug(). 18 | Bytes("Res", res). 19 | Msg("") 20 | require.NoError(t, err) 21 | require.Equal(t, res, expected) 22 | } 23 | -------------------------------------------------------------------------------- /internal/generators/murmur.go: -------------------------------------------------------------------------------- 1 | package generators 2 | 3 | import ( 4 | "fmt" 5 | "hash" 6 | 7 | "github.com/spaolacci/murmur3" 8 | ) 9 | 10 | const ( 11 | MurMurHash32Size = 4 12 | MurMurHash64Size = 8 13 | MurMurHash128Size = 16 14 | ) 15 | 16 | type MurmurHash struct { 17 | hash.Hash 18 | size int 19 | } 20 | 21 | func NewMurmurHash(seed uint32, size int) *MurmurHash { 22 | var h hash.Hash 23 | switch size { 24 | case MurMurHash32Size: 25 | h = murmur3.New32WithSeed(seed) 26 | case MurMurHash64Size: 27 | h = murmur3.New64WithSeed(seed) 28 | case MurMurHash128Size: 29 | h = murmur3.New128WithSeed(seed) 30 | default: 31 | panic(fmt.Sprintf("unknown size for hash %d", size)) 32 | } 33 | return &MurmurHash{ 34 | Hash: h, 35 | size: size, 36 | } 37 | } 38 | 39 | func (mh *MurmurHash) Size() int { 40 | return mh.size 41 | } 42 | 43 | func (mh *MurmurHash) Generate(data []byte) ([]byte, error) { 44 | return mh.Sum(data), nil 45 | } 46 | -------------------------------------------------------------------------------- /internal/generators/projector.go: -------------------------------------------------------------------------------- 1 | package generators 2 | 3 | import "fmt" 4 | 5 | type Projector struct { 6 | generators []Generator 7 | } 8 | 9 | func NewProjector(generators ...Generator) *Projector { 10 | return &Projector{ 11 | generators: generators, 12 | } 13 | } 14 | 15 | func (p *Projector) Generate(data []byte) (res []byte, err error) { 16 | res = data 17 | for idx, g := range p.generators { 18 | res, err = g.Generate(res) 19 | if err != nil { 20 | return nil, fmt.Errorf("error generating data using %d genrator: %w", idx, err) 21 | } 22 | } 23 | return res, nil 24 | } 25 | 26 | func (p *Projector) Size() int { 27 | return p.generators[len(p.generators)-1].Size() 28 | } 29 | -------------------------------------------------------------------------------- /internal/generators/random_bytes.go: -------------------------------------------------------------------------------- 1 | package generators 2 | 3 | import ( 4 | "encoding/binary" 5 | "math/rand" 6 | ) 7 | 8 | type RandomBytes struct { 9 | r *rand.Rand 10 | size int 11 | iters int 12 | } 13 | 14 | func NewRandomBytes(seed int64, size int) *RandomBytes { 15 | iters := size / 8 16 | if size%8 > 0 { 17 | iters += 1 18 | } 19 | return &RandomBytes{ 20 | r: rand.New(rand.NewSource(seed)), 21 | size: size, 22 | iters: iters, 23 | } 24 | } 25 | 26 | func (br *RandomBytes) Generate(data []byte) ([]byte, error) { 27 | res := make([]byte, 0, br.size) 28 | buf := make([]byte, 8) 29 | for i := 0; i < br.iters; i++ { 30 | binary.LittleEndian.PutUint64(buf, br.r.Uint64()) 31 | res = append(res, buf...) 32 | } 33 | return res[:br.size], nil 34 | } 35 | 36 | func (br *RandomBytes) Size() int { 37 | return br.size 38 | } 39 | -------------------------------------------------------------------------------- /internal/generators/random_int64.go: -------------------------------------------------------------------------------- 1 | package generators 2 | 3 | import ( 4 | "encoding/binary" 5 | "math/rand" 6 | ) 7 | 8 | type Int64Random struct { 9 | r *rand.Rand 10 | size int 11 | } 12 | 13 | func NewInt64Random(seed int64) (*Int64Random, error) { 14 | return &Int64Random{ 15 | r: rand.New(rand.NewSource(seed)), 16 | size: 8, 17 | }, nil 18 | } 19 | 20 | func (i *Int64Random) Generate(data []byte) ([]byte, error) { 21 | res := make([]byte, i.size) 22 | binary.LittleEndian.PutUint64(res, i.r.Uint64()) 23 | return res, nil 24 | } 25 | 26 | func (i *Int64Random) Size() int { 27 | return i.size 28 | } 29 | -------------------------------------------------------------------------------- /internal/generators/random_int64_test.go: -------------------------------------------------------------------------------- 1 | package generators 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/require" 7 | ) 8 | 9 | func TestBytesRandom_Generate(t *testing.T) { 10 | r := NewRandomBytes(0, 3) 11 | res, err := r.Generate(nil) 12 | require.NoError(t, err) 13 | require.Len(t, res, 3) 14 | require.Equal(t, []byte{1, 148, 253}, res) 15 | } 16 | -------------------------------------------------------------------------------- /internal/generators/siphash.go: -------------------------------------------------------------------------------- 1 | package generators 2 | 3 | import ( 4 | "fmt" 5 | "hash" 6 | 7 | "github.com/dchest/siphash" 8 | "golang.org/x/crypto/sha3" 9 | ) 10 | 11 | type SipHash struct { 12 | hash.Hash 13 | salt []byte 14 | buf []byte 15 | size int 16 | } 17 | 18 | func NewSipHash(salt []byte) (Generator, error) { 19 | 20 | salt = sha3.New224().Sum(salt)[:16] 21 | 22 | h := siphash.New(salt) 23 | 24 | return &SipHash{ 25 | Hash: h, 26 | buf: make([]byte, 8), 27 | salt: salt, 28 | size: 8, 29 | }, nil 30 | } 31 | 32 | func (s *SipHash) Generate(data []byte) ([]byte, error) { 33 | defer s.Reset() 34 | 35 | if _, err := s.Write(data); err != nil { 36 | return nil, fmt.Errorf("unable to write data into writer: %w", err) 37 | } 38 | 39 | s.buf = s.buf[:0] 40 | return s.Sum(s.buf), nil 41 | } 42 | -------------------------------------------------------------------------------- /internal/generators/transformers/noise_int64_test.go: -------------------------------------------------------------------------------- 1 | package transformers 2 | 3 | import ( 4 | "testing" 5 | "time" 6 | 7 | "github.com/rs/zerolog/log" 8 | "github.com/stretchr/testify/require" 9 | 10 | "github.com/greenmaskio/greenmask/internal/generators" 11 | ) 12 | 13 | func TestNoiseInt64Transformer_Transform(t *testing.T) { 14 | minVal := int64(-1000) 15 | maxVal := int64(100) 16 | l, err := NewNoiseInt64Limiter(minVal, maxVal) 17 | require.NoError(t, err) 18 | tr, err := NewNoiseInt64Transformer(l, 0.1, 0.9) 19 | require.NoError(t, err) 20 | g := generators.NewRandomBytes(time.Now().UnixNano(), tr.GetRequiredGeneratorByteLength()) 21 | err = tr.SetGenerator(g) 22 | require.NoError(t, err) 23 | res, err := tr.Transform(nil, 17) 24 | require.NoError(t, err) 25 | log.Debug().Int64("value", res).Msg("") 26 | require.True(t, res >= minVal && res <= maxVal) 27 | } 28 | -------------------------------------------------------------------------------- /internal/generators/transformers/noise_timestamp_test.go: -------------------------------------------------------------------------------- 1 | package transformers 2 | 3 | import ( 4 | "testing" 5 | "time" 6 | 7 | "github.com/rs/zerolog/log" 8 | "github.com/stretchr/testify/require" 9 | 10 | "github.com/greenmaskio/greenmask/internal/generators" 11 | ) 12 | 13 | func TestNoiseTimestamp_Transform(t *testing.T) { 14 | //fmt.Printf("%d", time.Now().Unix()) 15 | original := time.Unix(1712668244, 0) 16 | minRatio := 10 * (time.Hour * 24) // 10 days 17 | maxRatio := 90 * (time.Hour * 24) // 90 days 18 | 19 | expectedMinValue := original.Add(-80 * (time.Hour * 24)) // now - 10 days 20 | expectedMaxValue := original.Add(+80 * (time.Hour * 24)) // now + 10 days 21 | 22 | l, err := NewNoiseTimestampLimiter(&expectedMinValue, &expectedMaxValue) 23 | require.NoError(t, err) 24 | 25 | tr, err := NewNoiseTimestamp(minRatio, maxRatio, "", l) 26 | require.NoError(t, err) 27 | g := generators.NewRandomBytes(time.Now().UnixNano(), tr.GetRequiredGeneratorByteLength()) 28 | require.NoError(t, err) 29 | err = tr.SetGenerator(g) 30 | require.NoError(t, err) 31 | res, err := tr.Transform(nil, original) 32 | require.NoError(t, err) 33 | log.Debug(). 34 | Time("original", original). 35 | Time("transformed", res). 36 | Dur("minRatio", minRatio). 37 | Dur("maxRatio", maxRatio). 38 | Time("minExpected", expectedMinValue). 39 | Time("maxExpected", expectedMaxValue). 40 | Msg("") 41 | require.True(t, res.After(expectedMinValue.Add(-1)) && res.Before(expectedMaxValue.Add(1))) 42 | } 43 | -------------------------------------------------------------------------------- /internal/generators/transformers/random_boolean.go: -------------------------------------------------------------------------------- 1 | package transformers 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/greenmaskio/greenmask/internal/generators" 7 | ) 8 | 9 | type RandomBoolean struct { 10 | generator generators.Generator 11 | byteLength int 12 | } 13 | 14 | func NewRandomBoolean() *RandomBoolean { 15 | return &RandomBoolean{ 16 | byteLength: 1, 17 | } 18 | } 19 | 20 | func (b *RandomBoolean) GetRequiredGeneratorByteLength() int { 21 | return b.byteLength 22 | } 23 | 24 | func (b *RandomBoolean) SetGenerator(g generators.Generator) error { 25 | if g.Size() < b.byteLength { 26 | return fmt.Errorf("requested byte length (%d) higher than generator can produce (%d)", b.byteLength, g.Size()) 27 | } 28 | b.generator = g 29 | return nil 30 | } 31 | 32 | func (b *RandomBoolean) Transform(original []byte) (bool, error) { 33 | resBytes, err := b.generator.Generate(original) 34 | if err != nil { 35 | return false, err 36 | } 37 | return resBytes[0]%2 == 0, nil 38 | } 39 | -------------------------------------------------------------------------------- /internal/generators/transformers/random_choice.go: -------------------------------------------------------------------------------- 1 | package transformers 2 | 3 | import ( 4 | "encoding/binary" 5 | "fmt" 6 | 7 | "github.com/greenmaskio/greenmask/internal/generators" 8 | "github.com/greenmaskio/greenmask/pkg/toolkit" 9 | ) 10 | 11 | type RandomChoiceTransformer struct { 12 | values []*toolkit.RawValue 13 | byteLength int 14 | generator generators.Generator 15 | } 16 | 17 | func NewRandomChoiceTransformer(values []*toolkit.RawValue) *RandomChoiceTransformer { 18 | return &RandomChoiceTransformer{ 19 | values: values, 20 | byteLength: 4, 21 | } 22 | } 23 | 24 | func (rc *RandomChoiceTransformer) Transform(original []byte) (*toolkit.RawValue, error) { 25 | resBytes, err := rc.generator.Generate(original) 26 | if err != nil { 27 | return nil, err 28 | } 29 | idx := int(binary.LittleEndian.Uint32(resBytes)) % len(rc.values) 30 | return rc.values[idx], nil 31 | } 32 | 33 | func (rc *RandomChoiceTransformer) GetRequiredGeneratorByteLength() int { 34 | return rc.byteLength 35 | } 36 | 37 | func (rc *RandomChoiceTransformer) SetGenerator(g generators.Generator) error { 38 | if g.Size() < rc.byteLength { 39 | return fmt.Errorf("requested byte length (%d) higher than generator can produce (%d)", rc.byteLength, g.Size()) 40 | } 41 | rc.generator = g 42 | return nil 43 | } 44 | -------------------------------------------------------------------------------- /internal/generators/transformers/random_choice_test.go: -------------------------------------------------------------------------------- 1 | package transformers 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/greenmaskio/greenmask/internal/generators" 7 | "github.com/greenmaskio/greenmask/pkg/toolkit" 8 | "github.com/stretchr/testify/require" 9 | ) 10 | 11 | func TestChoiceTransformer_Transform(t *testing.T) { 12 | data := []*toolkit.RawValue{ 13 | {Data: []byte("a")}, 14 | {Data: []byte("b")}, 15 | } 16 | tr := NewRandomChoiceTransformer(data) 17 | g, err := generators.NewHash([]byte{}, "sha1") 18 | require.NoError(t, err) 19 | g = generators.NewHashReducer(g, tr.GetRequiredGeneratorByteLength()) 20 | err = tr.SetGenerator(g) 21 | require.NoError(t, err) 22 | res, err := tr.Transform([]byte{}) 23 | require.NoError(t, err) 24 | require.Contains(t, data, res) 25 | } 26 | -------------------------------------------------------------------------------- /internal/generators/transformers/random_company_test.go: -------------------------------------------------------------------------------- 1 | package transformers 2 | 3 | import ( 4 | "slices" 5 | "testing" 6 | "time" 7 | 8 | "github.com/stretchr/testify/require" 9 | 10 | "github.com/greenmaskio/greenmask/internal/generators" 11 | ) 12 | 13 | func TestRandomCompanyNameTransformer_GetCompanyName(t *testing.T) { 14 | rnt := NewRandomCompanyTransformer(nil) 15 | g := generators.NewRandomBytes(time.Now().UnixNano(), rnt.GetRequiredGeneratorByteLength()) 16 | err := rnt.SetGenerator(g) 17 | require.NoError(t, err) 18 | res, err := rnt.GetCompanyName([]byte{}) 19 | require.NoError(t, err) 20 | require.True(t, slices.Contains(DefaultCompanyNames, res["CompanyName"])) 21 | } 22 | -------------------------------------------------------------------------------- /internal/generators/transformers/random_float64_test.go: -------------------------------------------------------------------------------- 1 | package transformers 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/rs/zerolog/log" 7 | "github.com/stretchr/testify/require" 8 | 9 | "github.com/greenmaskio/greenmask/internal/generators" 10 | ) 11 | 12 | func TestNewFloat64Transformer(t *testing.T) { 13 | limiter, err := NewFloat64Limiter(-1, 1, 2) 14 | require.NoError(t, err) 15 | tr := NewRandomFloat64Transformer(limiter) 16 | g, err := generators.NewHash([]byte{}, "sha1") 17 | require.NoError(t, err) 18 | g = generators.NewHashReducer(g, tr.GetRequiredGeneratorByteLength()) 19 | err = tr.SetGenerator(g) 20 | require.NoError(t, err) 21 | res, err := tr.Transform(nil, []byte{}) 22 | require.NoError(t, err) 23 | log.Debug().Msgf("value = %f", res) 24 | require.True(t, res >= -1 && res <= 1) 25 | } 26 | -------------------------------------------------------------------------------- /internal/generators/transformers/random_int64.go: -------------------------------------------------------------------------------- 1 | package transformers 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | 7 | "github.com/greenmaskio/greenmask/internal/generators" 8 | ) 9 | 10 | var ( 11 | ErrWrongLimits = errors.New("wrong limits") 12 | ) 13 | 14 | type Int64Limiter struct { 15 | MinValue int64 16 | MaxValue int64 17 | distance uint64 18 | } 19 | 20 | func NewInt64Limiter(minValue, maxValue int64) (*Int64Limiter, error) { 21 | if minValue >= maxValue { 22 | return nil, ErrWrongLimits 23 | } 24 | 25 | return &Int64Limiter{ 26 | MinValue: minValue, 27 | MaxValue: maxValue, 28 | distance: uint64(maxValue - minValue), 29 | }, nil 30 | } 31 | 32 | func (l *Int64Limiter) Limit(v uint64) int64 { 33 | res := l.MinValue + int64(v%l.distance) 34 | if res < 0 { 35 | return res % l.MinValue 36 | } 37 | return res % l.MaxValue 38 | } 39 | 40 | type RandomInt64Transformer struct { 41 | generator generators.Generator 42 | limiter *Int64Limiter 43 | byteLength int 44 | } 45 | 46 | func NewRandomInt64Transformer(limiter *Int64Limiter, size int) (*RandomInt64Transformer, error) { 47 | return &RandomInt64Transformer{ 48 | limiter: limiter, 49 | byteLength: size, 50 | }, nil 51 | } 52 | 53 | func (ig *RandomInt64Transformer) Transform(l *Int64Limiter, original []byte) (int64, error) { 54 | var res int64 55 | limiter := ig.limiter 56 | if l != nil { 57 | limiter = l 58 | } 59 | 60 | resBytes, err := ig.generator.Generate(original) 61 | if err != nil { 62 | return 0, err 63 | } 64 | 65 | if limiter != nil { 66 | res = limiter.Limit(generators.BuildUint64FromBytes(resBytes)) 67 | } else { 68 | res = generators.BuildInt64FromBytes(resBytes) 69 | } 70 | 71 | return res, nil 72 | } 73 | 74 | func (ig *RandomInt64Transformer) GetRequiredGeneratorByteLength() int { 75 | return ig.byteLength 76 | } 77 | 78 | func (ig *RandomInt64Transformer) SetGenerator(g generators.Generator) error { 79 | if g.Size() < ig.byteLength { 80 | return fmt.Errorf("requested byte length (%d) higher than generator can produce (%d)", ig.byteLength, g.Size()) 81 | } 82 | ig.generator = g 83 | return nil 84 | } 85 | -------------------------------------------------------------------------------- /internal/generators/transformers/random_int64_test.go: -------------------------------------------------------------------------------- 1 | package transformers 2 | 3 | import ( 4 | "math" 5 | "testing" 6 | 7 | "github.com/rs/zerolog/log" 8 | "github.com/stretchr/testify/require" 9 | ) 10 | 11 | func TestLimiter_Limit(t *testing.T) { 12 | minValue := int64(math.MinInt64) 13 | maxValue := int64(math.MaxInt64) 14 | l, err := NewInt64Limiter(minValue, maxValue) 15 | require.NoError(t, err) 16 | res := l.Limit(uint64(math.MaxUint64 - 1)) 17 | require.True(t, res == math.MaxInt64-1) 18 | } 19 | 20 | func TestLimiter_negative_Limit(t *testing.T) { 21 | minValue := int64(-10000) 22 | maxValue := int64(-1) 23 | l, err := NewInt64Limiter(minValue, maxValue) 24 | require.NoError(t, err) 25 | res := l.Limit(100000000) 26 | log.Debug().Int64("res", res).Msg("") 27 | require.True(t, res == -9999) 28 | } 29 | -------------------------------------------------------------------------------- /internal/generators/transformers/random_numeric_test.go: -------------------------------------------------------------------------------- 1 | package transformers 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/shopspring/decimal" 7 | "github.com/stretchr/testify/require" 8 | 9 | "github.com/greenmaskio/greenmask/internal/generators" 10 | ) 11 | 12 | func TestBigIntTransformer_Transform(t *testing.T) { 13 | sha1, err := generators.NewHash([]byte{1, 2, 3, 4}, "sha1") 14 | require.NoError(t, err) 15 | minValue, err := decimal.NewFromString("-999999999999999999999999999999999999999") 16 | require.NoError(t, err) 17 | maxValue, err := decimal.NewFromString("999999999999999999999999999999999999999") 18 | require.NoError(t, err) 19 | limiter, err := NewRandomNumericLimiter(minValue, maxValue) 20 | require.NoError(t, err) 21 | tr, err := NewRandomNumericTransformer(limiter, 0) 22 | require.NoError(t, err) 23 | err = tr.SetGenerator(sha1) 24 | require.NoError(t, err) 25 | res, err := tr.Transform([]byte("199999999999999999999999999999999999999")) 26 | require.NoError(t, err) 27 | require.True(t, res.LessThanOrEqual(maxValue) && res.GreaterThanOrEqual(minValue)) 28 | } 29 | 30 | func TestBigFloatTransformer_Transform(t *testing.T) { 31 | sha1, err := generators.NewHash([]byte{1, 2, 3, 4}, "sha1") 32 | require.NoError(t, err) 33 | minValue, err := decimal.NewFromString("-1") 34 | require.NoError(t, err) 35 | maxValue, err := decimal.NewFromString("999999999999999999999999999999999999999.12345") 36 | require.NoError(t, err) 37 | limiter, err := NewRandomNumericLimiter(minValue, maxValue) 38 | require.NoError(t, err) 39 | //limiter.SetPrecision(3) 40 | tr, err := NewRandomNumericTransformer(limiter, 3) 41 | require.NoError(t, err) 42 | err = tr.SetGenerator(sha1) 43 | require.NoError(t, err) 44 | res, err := tr.Transform([]byte("1999999999999999999999999999999999999990")) 45 | require.NoError(t, err) 46 | require.True(t, res.LessThanOrEqual(maxValue) && res.GreaterThanOrEqual(minValue)) 47 | } 48 | -------------------------------------------------------------------------------- /internal/generators/transformers/random_person_test.go: -------------------------------------------------------------------------------- 1 | package transformers 2 | 3 | import ( 4 | "slices" 5 | "testing" 6 | "time" 7 | 8 | "github.com/stretchr/testify/require" 9 | 10 | "github.com/greenmaskio/greenmask/internal/generators" 11 | ) 12 | 13 | func TestRandomNameTransformer_GetFullName(t *testing.T) { 14 | rnt := NewRandomPersonTransformer(AnyGenderName, nil) 15 | g := generators.NewRandomBytes(time.Now().UnixNano(), rnt.GetRequiredGeneratorByteLength()) 16 | err := rnt.SetGenerator(g) 17 | require.NoError(t, err) 18 | res, err := rnt.GetFullName("", []byte{}) 19 | require.NoError(t, err) 20 | require.True(t, slices.Contains(DefaultFirstNamesMale, res["FirstName"]) || slices.Contains(DefaultFirstNamesFemale, res["FirstName"])) 21 | require.True(t, slices.Contains(DefaultLastNames, res["LastName"])) 22 | } 23 | -------------------------------------------------------------------------------- /internal/generators/transformers/random_string_test.go: -------------------------------------------------------------------------------- 1 | package transformers 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/greenmaskio/greenmask/internal/generators" 7 | "github.com/rs/zerolog/log" 8 | "github.com/stretchr/testify/require" 9 | ) 10 | 11 | func TestStringTransformer_Transform_hash(t *testing.T) { 12 | st, err := NewRandomStringTransformer([]rune("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789._\\-~"), 10, 100) 13 | require.NoError(t, err) 14 | 15 | hashFuncName, _, err := generators.GetHashFunctionNameBySize(st.GetRequiredGeneratorByteLength()) 16 | require.NoError(t, err) 17 | g, err := generators.NewHash([]byte{}, hashFuncName) 18 | require.NoError(t, err) 19 | err = st.SetGenerator(g) 20 | require.NoError(t, err) 21 | res := st.Transform([]byte{}) 22 | log.Debug().Str("value", string(res)).Msg("") 23 | require.True(t, len(res) >= 10 && len(res) <= 100) 24 | require.Equal(t, "-bM6BQ6~uJ", string(res)) 25 | } 26 | 27 | func TestStringTransformer_Transform_random(t *testing.T) { 28 | st, err := NewRandomStringTransformer([]rune("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789._\\-~"), 10, 100) 29 | require.NoError(t, err) 30 | 31 | g := generators.NewRandomBytes(0, st.GetRequiredGeneratorByteLength()) 32 | err = st.SetGenerator(g) 33 | require.NoError(t, err) 34 | res := st.Transform([]byte{}) 35 | log.Debug().Str("value", string(res)).Msg("") 36 | require.True(t, len(res) >= 10 && len(res) <= 100) 37 | require.Equal(t, "xvz16-K2SEYfw~rMwctQfflfq3rAHtLyyYNppFhYXrNyw027~L3TFZgxAsNxduRggmgr4sBIuMzzZOqqGiZYsOzx138AM4UGahy", string(res)) 38 | } 39 | -------------------------------------------------------------------------------- /internal/generators/transformers/random_timestamp_test.go: -------------------------------------------------------------------------------- 1 | package transformers 2 | 3 | import ( 4 | "testing" 5 | "time" 6 | 7 | "github.com/rs/zerolog/log" 8 | "github.com/stretchr/testify/require" 9 | 10 | "github.com/greenmaskio/greenmask/internal/generators" 11 | ) 12 | 13 | func TestTimestampLimiter_Limit_positive_distance(t *testing.T) { 14 | minDate := time.Unix(1646812104, 0) 15 | maxDate := time.Unix(1709970504, 0) 16 | l, err := NewTimestampLimiter(minDate, maxDate) 17 | require.NoError(t, err) 18 | sec, _ := l.Limit(1246474821, 100) 19 | require.True(t, sec >= minDate.Unix() && sec <= maxDate.Unix()) 20 | } 21 | 22 | func TestTimestampLimiter_Limit_negative_positive_distance(t *testing.T) { 23 | minDate := time.Unix(-783101496, 0) 24 | maxDate := time.Unix(1709970504, 0) 25 | l, err := NewTimestampLimiter(minDate, maxDate) 26 | require.NoError(t, err) 27 | sec, _ := l.Limit(1246474821121, 100) 28 | require.True(t, sec >= minDate.Unix() && sec <= maxDate.Unix()) 29 | } 30 | 31 | func TestTimestampLimiter_Limit_negative_negative_distance(t *testing.T) { 32 | minDate := time.Unix(-2203172704, 0) 33 | maxDate := time.Unix(-783101496, 0) 34 | l, err := NewTimestampLimiter(minDate, maxDate) 35 | require.NoError(t, err) 36 | sec, _ := l.Limit(1246474821121, 100) 37 | require.True(t, sec >= minDate.Unix() && sec <= maxDate.Unix()) 38 | } 39 | 40 | func TestTimestamp_Transform(t *testing.T) { 41 | minDate := time.Unix(-2203172704, 0) 42 | maxDate := time.Unix(-783101496, 0) 43 | l, err := NewTimestampLimiter(minDate, maxDate) 44 | require.NoError(t, err) 45 | gen := generators.NewRandomBytes(0, 16) 46 | tr, err := NewRandomTimestamp("", l) 47 | require.NoError(t, err) 48 | err = tr.SetGenerator(gen) 49 | require.NoError(t, err) 50 | res, err := tr.Transform(nil, []byte{}) 51 | require.NoError(t, err) 52 | log.Debug(). 53 | Str("minDate", minDate.String()). 54 | Str("maxDate", maxDate.String()). 55 | Str("result", res.String()). 56 | Msg("") 57 | require.True(t, res.After(minDate) && res.Before(maxDate)) 58 | } 59 | -------------------------------------------------------------------------------- /internal/generators/transformers/random_uuid.go: -------------------------------------------------------------------------------- 1 | package transformers 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/google/uuid" 7 | "github.com/greenmaskio/greenmask/internal/generators" 8 | ) 9 | 10 | const uuidTransformerRequiredLength = 16 11 | 12 | type RandomUuidTransformer struct { 13 | byteLength int 14 | generator generators.Generator 15 | } 16 | 17 | func NewRandomUuidTransformer() *RandomUuidTransformer { 18 | return &RandomUuidTransformer{ 19 | byteLength: uuidTransformerRequiredLength, 20 | } 21 | } 22 | 23 | func (ut *RandomUuidTransformer) Transform(data []byte) (uuid.UUID, error) { 24 | resBytes, err := ut.generator.Generate(data) 25 | if err != nil { 26 | return uuid.UUID{}, fmt.Errorf("failed to generate random bytes: %w", err) 27 | } 28 | return uuid.FromBytes(resBytes) 29 | } 30 | 31 | func (ut *RandomUuidTransformer) GetRequiredGeneratorByteLength() int { 32 | return ut.byteLength 33 | } 34 | 35 | func (ut *RandomUuidTransformer) SetGenerator(g generators.Generator) error { 36 | if g.Size() < ut.byteLength { 37 | return fmt.Errorf("requested byte length (%d) higher than generator can produce (%d)", ut.byteLength, g.Size()) 38 | } 39 | ut.generator = g 40 | return nil 41 | } 42 | -------------------------------------------------------------------------------- /internal/generators/transformers/random_uuid_test.go: -------------------------------------------------------------------------------- 1 | package transformers 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/greenmaskio/greenmask/internal/generators" 7 | "github.com/rs/zerolog/log" 8 | "github.com/stretchr/testify/require" 9 | ) 10 | 11 | func TestUuidTransformer_Transform_hash(t *testing.T) { 12 | regexp := `^[\d\w]{8}-[\d\w]{4}-[\d\w]{4}-[\d\w]{4}-[\d\w]{12}$` 13 | 14 | ut := NewRandomUuidTransformer() 15 | hashFuncName, _, err := generators.GetHashFunctionNameBySize(ut.GetRequiredGeneratorByteLength()) 16 | require.NoError(t, err) 17 | g, err := generators.NewHash([]byte{}, hashFuncName) 18 | require.NoError(t, err) 19 | g = generators.NewHashReducer(g, uuidTransformerRequiredLength) 20 | err = ut.SetGenerator(g) 21 | require.NoError(t, err) 22 | res, err := ut.Transform([]byte{}) 23 | require.NoError(t, err) 24 | resStr, err := res.MarshalText() 25 | require.NoError(t, err) 26 | require.Regexp(t, regexp, string(resStr)) 27 | log.Debug().Str("value", string(resStr)).Msg("") 28 | } 29 | -------------------------------------------------------------------------------- /internal/generators/transformers/tramsformer.go: -------------------------------------------------------------------------------- 1 | package transformers 2 | 3 | import ( 4 | "github.com/greenmaskio/greenmask/internal/generators" 5 | ) 6 | 7 | type Transformer interface { 8 | GetRequiredGeneratorByteLength() int 9 | SetGenerator(g generators.Generator) error 10 | } 11 | -------------------------------------------------------------------------------- /internal/generators/utils.go: -------------------------------------------------------------------------------- 1 | package generators 2 | 3 | import ( 4 | "encoding/binary" 5 | "fmt" 6 | ) 7 | 8 | func BuildBytesFromInt64(value int64) []byte { 9 | res := make([]byte, 8) 10 | binary.LittleEndian.PutUint64(res, uint64(value)) 11 | return res 12 | } 13 | 14 | // BuildInt64FromBytes - decode bytes array to int64 representation. In case there is less 15 | func BuildInt64FromBytes(data []byte) (res int64) { 16 | intBytes := data 17 | if len(data) != 8 { 18 | intBytes = make([]byte, 8) 19 | copy(intBytes, data[:8]) 20 | } 21 | 22 | return int64(binary.LittleEndian.Uint64(intBytes)) 23 | } 24 | 25 | func BuildBytesFromUint64(value uint64) []byte { 26 | res := make([]byte, 8) 27 | binary.LittleEndian.PutUint64(res, value) 28 | return res 29 | } 30 | 31 | func BuildUint64FromBytes(data []byte) (res uint64) { 32 | intBytes := data 33 | if len(data) != 8 { 34 | intBytes = make([]byte, 8) 35 | copy(intBytes, data[:8]) 36 | } 37 | 38 | return binary.LittleEndian.Uint64(intBytes) 39 | } 40 | 41 | func GetHashBytesGen(salt []byte, size int) (Generator, error) { 42 | hashFunctionName, hashSize, err := GetHashFunctionNameBySize(size) 43 | if err != nil { 44 | return nil, fmt.Errorf("unable to determine hash function for deterministic transformer: %w", err) 45 | } 46 | g, err := NewHash(salt, hashFunctionName) 47 | if err != nil { 48 | return nil, fmt.Errorf("cannot create hash function backend: %w", err) 49 | } 50 | if size < hashSize { 51 | g = NewHashReducer(g, size) 52 | } 53 | 54 | return g, nil 55 | } 56 | 57 | func GetHashFunctionNameBySize(size int) (string, int, error) { 58 | if size <= 28 { 59 | return Sha3224, 28, nil 60 | } else if size <= 32 { 61 | return Sha3256, 32, nil 62 | } else if size <= 48 { 63 | return Sha3384, 48, nil 64 | } else if size <= 64 { 65 | return Sha3512, 64, nil 66 | } 67 | return "", 0, fmt.Errorf("unable to find suitable hash function for requested %d size", size) 68 | } 69 | -------------------------------------------------------------------------------- /internal/generators/utils_test.go: -------------------------------------------------------------------------------- 1 | package generators 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/require" 7 | ) 8 | 9 | func TestBuildBytesFromInt(t *testing.T) { 10 | value := int64(123) 11 | intBytes := BuildBytesFromInt64(value)[:3] 12 | res := BuildInt64FromBytes(intBytes) 13 | require.Equal(t, value, res) 14 | } 15 | -------------------------------------------------------------------------------- /internal/storages/builder/builder.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Greenmask 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package builder 16 | 17 | import ( 18 | "context" 19 | "fmt" 20 | 21 | "github.com/greenmaskio/greenmask/internal/domains" 22 | "github.com/greenmaskio/greenmask/internal/storages" 23 | "github.com/greenmaskio/greenmask/internal/storages/directory" 24 | "github.com/greenmaskio/greenmask/internal/storages/s3" 25 | ) 26 | 27 | const ( 28 | DirectoryStorageType = "directory" 29 | S3StorageType = "s3" 30 | ) 31 | 32 | func GetStorage(ctx context.Context, stCfg *domains.StorageConfig, logCgf *domains.LogConfig) ( 33 | storages.Storager, error, 34 | ) { 35 | 36 | switch stCfg.Type { 37 | case DirectoryStorageType: 38 | if err := stCfg.Directory.Validate(); err != nil { 39 | return nil, fmt.Errorf("directory storage config validation failed: %w", err) 40 | } 41 | return directory.NewStorage(stCfg.Directory) 42 | case S3StorageType: 43 | return s3.NewStorage(ctx, stCfg.S3, logCgf.Level) 44 | } 45 | return nil, fmt.Errorf("unknown storage type: %s", stCfg.Type) 46 | } 47 | -------------------------------------------------------------------------------- /internal/storages/directory/config.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Greenmask 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package directory 16 | 17 | import ( 18 | "errors" 19 | "os" 20 | ) 21 | 22 | var ErrPathIsRequired = errors.New("path is required") 23 | 24 | type Config struct { 25 | Path string `mapstructure:"path"` 26 | } 27 | 28 | func NewConfig() *Config { 29 | return &Config{} 30 | } 31 | 32 | func (d *Config) Validate() error { 33 | if d.Path == "" { 34 | return ErrPathIsRequired 35 | } 36 | if _, err := os.Stat(d.Path); err != nil { 37 | return err 38 | } 39 | return nil 40 | } 41 | -------------------------------------------------------------------------------- /internal/storages/directory/directiry_test.go: -------------------------------------------------------------------------------- 1 | package directory 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "os" 7 | "testing" 8 | 9 | "github.com/rs/zerolog/log" 10 | "github.com/stretchr/testify/suite" 11 | ) 12 | 13 | type DirectorySuite struct { 14 | suite.Suite 15 | tmpDir string 16 | st *Storage 17 | } 18 | 19 | func (suite *DirectorySuite) SetupSuite() { 20 | var err error 21 | tempDir := os.Getenv("DIRECTORY_TEST_TEMP_DIR") 22 | if tempDir == "" { 23 | tempDir = "/tmp" 24 | } 25 | 26 | suite.tmpDir, err = os.MkdirTemp(tempDir, "directory_storage_unit_test_") 27 | suite.Require().NoError(err) 28 | 29 | suite.st, err = NewStorage(&Config{Path: suite.tmpDir}) 30 | suite.Require().NoError(err) 31 | } 32 | 33 | func (suite *DirectorySuite) TestPutObject() { 34 | buf := bytes.NewBuffer(nil) 35 | buf.Write([]byte("test")) 36 | 37 | err := suite.st.PutObject(context.Background(), "1/2/3/test.txt", buf) 38 | suite.Require().NoError(err) 39 | } 40 | 41 | func (suite *DirectorySuite) TearDownSuite() { 42 | if err := os.RemoveAll(suite.tmpDir); err != nil { 43 | log.Warn().Err(err).Msg("error deleting tmp dir") 44 | } 45 | } 46 | 47 | func TestDirectoryStorage(t *testing.T) { 48 | suite.Run(t, new(DirectorySuite)) 49 | } 50 | -------------------------------------------------------------------------------- /internal/storages/domains/domains.go: -------------------------------------------------------------------------------- 1 | package domains 2 | 3 | import "time" 4 | 5 | type ObjectStat struct { 6 | Name string 7 | LastModified time.Time 8 | Exist bool 9 | } 10 | -------------------------------------------------------------------------------- /internal/storages/s3/logger.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Greenmask 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package s3 16 | 17 | import ( 18 | "fmt" 19 | 20 | "github.com/rs/zerolog" 21 | "github.com/rs/zerolog/log" 22 | ) 23 | 24 | type LogWrapper struct { 25 | logger *zerolog.Logger 26 | } 27 | 28 | func (lw LogWrapper) Log(objs ...interface{}) { 29 | event := log.Debug() 30 | for idx, o := range objs { 31 | event.Any(fmt.Sprintf("%d", idx), o) 32 | } 33 | event.Msg("s3 storage logging") 34 | } 35 | -------------------------------------------------------------------------------- /internal/storages/storager.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Greenmask 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package storages 16 | 17 | import ( 18 | "context" 19 | "io" 20 | 21 | "github.com/greenmaskio/greenmask/internal/storages/domains" 22 | ) 23 | 24 | type Storager interface { 25 | // GetCwd - get current working directory (CWD) path 26 | GetCwd() string 27 | // Dirname - returns current dirname without prefix 28 | Dirname() string 29 | // ListDir - walking through storage and returns directories and files in the cwd 30 | ListDir(ctx context.Context) (files []string, dirs []Storager, err error) 31 | // GetObject - returns ReadCloser by the provided path 32 | GetObject(ctx context.Context, filePath string) (reader io.ReadCloser, err error) 33 | // PutObject - puts data to the provided file path 34 | PutObject(ctx context.Context, filePath string, body io.Reader) error 35 | // Delete - delete list of objects by the provided paths 36 | Delete(ctx context.Context, filePaths ...string) error 37 | // DeleteAll - delete all objects by the provided path prefix 38 | DeleteAll(ctx context.Context, pathPrefix string) error 39 | // Exists - check object existence 40 | Exists(ctx context.Context, fileName string) (bool, error) 41 | // SubStorage - get new Storage instance with the samo config but change current cwd via subPath 42 | // If relative == true then path is sub folder in cwd 43 | SubStorage(subPath string, relative bool) Storager 44 | // Stat - get the metadata info about object from the storage 45 | Stat(fileName string) (*domains.ObjectStat, error) 46 | } 47 | -------------------------------------------------------------------------------- /internal/storages/utils.go: -------------------------------------------------------------------------------- 1 | package storages 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "path" 7 | ) 8 | 9 | func Walk(ctx context.Context, st Storager, parent string) (res []string, err error) { 10 | var files []string 11 | files, dirs, err := st.ListDir(ctx) 12 | if err != nil { 13 | return nil, fmt.Errorf("error listing directory: %w", err) 14 | } 15 | for _, f := range files { 16 | res = append(res, path.Join(parent, f)) 17 | } 18 | if len(dirs) > 0 { 19 | for _, d := range dirs { 20 | subFiles, err := Walk(ctx, d, d.Dirname()) 21 | if err != nil { 22 | return nil, fmt.Errorf("error walking through directory: %w", err) 23 | } 24 | for _, f := range subFiles { 25 | res = append(res, path.Join(parent, f)) 26 | } 27 | } 28 | } 29 | 30 | return 31 | } 32 | -------------------------------------------------------------------------------- /internal/utils/config/mapstructure_hook.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Greenmask 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package config 16 | 17 | import ( 18 | "encoding/json" 19 | "fmt" 20 | "reflect" 21 | 22 | "github.com/go-viper/mapstructure/v2" 23 | 24 | "github.com/greenmaskio/greenmask/pkg/toolkit" 25 | ) 26 | 27 | func ParamsToByteSliceHookFunc() mapstructure.DecodeHookFunc { 28 | return func( 29 | f reflect.Type, 30 | t reflect.Type, 31 | data interface{}, 32 | ) (interface{}, error) { 33 | if t != reflect.TypeOf(toolkit.ParamsValue{}) { 34 | return data, nil 35 | } 36 | 37 | switch v := data.(type) { 38 | case string: 39 | return toolkit.ParamsValue(v), nil 40 | default: 41 | res, err := json.Marshal(data) 42 | if err != nil { 43 | return nil, fmt.Errorf("cannot convert object to json bytes: %w", err) 44 | } 45 | return res, nil 46 | } 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /internal/utils/context.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import "context" 4 | 5 | type saltKey struct{} 6 | 7 | func WithSalt(ctx context.Context, salt []byte) context.Context { 8 | return context.WithValue(ctx, saltKey{}, salt) 9 | } 10 | 11 | func SaltFromCtx(ctx context.Context) []byte { 12 | salt, _ := ctx.Value(saltKey{}).([]byte) 13 | return salt 14 | } 15 | -------------------------------------------------------------------------------- /internal/utils/context_test.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "context" 5 | "testing" 6 | ) 7 | 8 | func TestContextSalt(t *testing.T) { 9 | ctx := context.Background() 10 | salt := []byte("some_salt") 11 | ctx = WithSalt(ctx, salt) 12 | got := SaltFromCtx(ctx) 13 | if string(got) != string(salt) { 14 | t.Errorf("expected %s, got %s", salt, got) 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /internal/utils/ioutils/count_reader.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Greenmask 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package ioutils 16 | 17 | import "io" 18 | 19 | type CountReadCloser interface { 20 | GetCount() int64 21 | io.ReadCloser 22 | } 23 | 24 | type Reader struct { 25 | r io.ReadCloser 26 | Count int64 27 | } 28 | 29 | func NewReader(r io.ReadCloser) *Reader { 30 | return &Reader{ 31 | r: r, 32 | } 33 | } 34 | 35 | func (r *Reader) Read(p []byte) (n int, err error) { 36 | c, err := r.r.Read(p) 37 | r.Count += int64(c) 38 | return c, err 39 | } 40 | 41 | func (r *Reader) Close() error { 42 | return r.r.Close() 43 | } 44 | 45 | func (r *Reader) GetCount() int64 { 46 | return r.Count 47 | } 48 | -------------------------------------------------------------------------------- /internal/utils/ioutils/count_writer.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Greenmask 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package ioutils 16 | 17 | import "io" 18 | 19 | type CountWriteCloser interface { 20 | GetCount() int64 21 | io.WriteCloser 22 | } 23 | 24 | type Writer struct { 25 | w io.WriteCloser 26 | Count int64 27 | } 28 | 29 | func NewWriter(w io.WriteCloser) *Writer { 30 | return &Writer{ 31 | w: w, 32 | } 33 | } 34 | 35 | func (cw *Writer) Write(p []byte) (int, error) { 36 | c, err := cw.w.Write(p) 37 | cw.Count += int64(c) 38 | return c, err 39 | } 40 | 41 | func (cw *Writer) Close() error { 42 | return cw.w.Close() 43 | } 44 | 45 | func (cw *Writer) GetCount() int64 { 46 | return cw.Count 47 | } 48 | -------------------------------------------------------------------------------- /internal/utils/ioutils/gzip_reader.go: -------------------------------------------------------------------------------- 1 | package ioutils 2 | 3 | import ( 4 | "fmt" 5 | "io" 6 | 7 | "github.com/rs/zerolog/log" 8 | ) 9 | 10 | type GzipReader struct { 11 | gz io.ReadCloser 12 | r io.ReadCloser 13 | } 14 | 15 | func NewGzipReader(r io.ReadCloser, usePgzip bool) (*GzipReader, error) { 16 | gz, err := GetGzipReadCloser(r, usePgzip) 17 | if err != nil { 18 | if err := r.Close(); err != nil { 19 | log.Warn(). 20 | Err(err). 21 | Msg("error closing dump file") 22 | } 23 | return nil, fmt.Errorf("cannot create gzip reader: %w", err) 24 | } 25 | 26 | return &GzipReader{ 27 | gz: gz, 28 | r: r, 29 | }, nil 30 | 31 | } 32 | 33 | func (r *GzipReader) Read(p []byte) (n int, err error) { 34 | return r.gz.Read(p) 35 | } 36 | 37 | func (r *GzipReader) Close() error { 38 | var lastErr error 39 | if err := r.gz.Close(); err != nil { 40 | lastErr = fmt.Errorf("error closing gzip reader: %w", err) 41 | log.Warn(). 42 | Err(err). 43 | Msg("error closing gzip reader") 44 | } 45 | if err := r.r.Close(); err != nil { 46 | lastErr = fmt.Errorf("error closing dump file: %w", err) 47 | log.Warn(). 48 | Err(err). 49 | Msg("error closing dump file") 50 | } 51 | return lastErr 52 | } 53 | -------------------------------------------------------------------------------- /internal/utils/ioutils/gzip_reader_test.go: -------------------------------------------------------------------------------- 1 | package ioutils 2 | 3 | import ( 4 | "bytes" 5 | "compress/gzip" 6 | "testing" 7 | 8 | "github.com/stretchr/testify/require" 9 | ) 10 | 11 | type readCloserMock struct { 12 | *bytes.Buffer 13 | closeCallCount int 14 | } 15 | 16 | func (r *readCloserMock) Close() error { 17 | r.closeCallCount++ 18 | return nil 19 | } 20 | 21 | func TestNewGzipReader_Read(t *testing.T) { 22 | data := `20383 24ca7574-0adb-4b17-8777-93f5589dbea2 2017-12-13 13:46:49.39 23 | 20384 d0d4a55c-7752-453e-8334-772a889fb917 2017-12-13 13:46:49.453 24 | 20385 ac8617aa-5a2d-4bb8-a9a5-ed879a4b33cd 2017-12-13 13:46:49.5 25 | ` 26 | buf := new(bytes.Buffer) 27 | gzData := gzip.NewWriter(buf) 28 | _, err := gzData.Write([]byte(data)) 29 | require.NoError(t, err) 30 | err = gzData.Flush() 31 | require.NoError(t, err) 32 | err = gzData.Close() 33 | require.NoError(t, err) 34 | objSrc := &readCloserMock{Buffer: buf} 35 | r, err := NewGzipReader(objSrc, false) 36 | require.NoError(t, err) 37 | readBuf := make([]byte, 1024) 38 | n, err := r.Read(readBuf) 39 | require.NoError(t, err) 40 | require.Equal(t, []byte(data), readBuf[:n]) 41 | } 42 | 43 | func TestNewGzipReader_Close(t *testing.T) { 44 | data := "" 45 | buf := new(bytes.Buffer) 46 | gzData := gzip.NewWriter(buf) 47 | _, err := gzData.Write([]byte(data)) 48 | require.NoError(t, err) 49 | err = gzData.Flush() 50 | require.NoError(t, err) 51 | err = gzData.Close() 52 | require.NoError(t, err) 53 | objSrc := &readCloserMock{Buffer: buf, closeCallCount: 0} 54 | r, err := NewGzipReader(objSrc, false) 55 | require.NoError(t, err) 56 | err = r.Close() 57 | require.NoError(t, err) 58 | require.Equal(t, 1, objSrc.closeCallCount) 59 | gz := r.gz.(*gzip.Reader) 60 | _, err = gz.Read([]byte{}) 61 | require.Error(t, err) 62 | } 63 | -------------------------------------------------------------------------------- /internal/utils/ioutils/gzip_writer.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Greenmask 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package ioutils 16 | 17 | import ( 18 | "compress/gzip" 19 | "fmt" 20 | "io" 21 | 22 | "github.com/klauspost/pgzip" 23 | "github.com/rs/zerolog/log" 24 | ) 25 | 26 | type WriteCloseFlusher interface { 27 | io.WriteCloser 28 | Flush() error 29 | } 30 | 31 | type GzipWriter struct { 32 | w io.WriteCloser 33 | gz WriteCloseFlusher 34 | } 35 | 36 | func NewGzipWriter(w io.WriteCloser, usePgzip bool) *GzipWriter { 37 | var gz WriteCloseFlusher 38 | if usePgzip { 39 | gz = pgzip.NewWriter(w) 40 | } else { 41 | gz = gzip.NewWriter(w) 42 | } 43 | return &GzipWriter{ 44 | w: w, 45 | gz: gz, 46 | } 47 | } 48 | 49 | func (gw *GzipWriter) Write(p []byte) (int, error) { 50 | return gw.gz.Write(p) 51 | } 52 | 53 | // Close - closing method with gz buffer flushing 54 | func (gw *GzipWriter) Close() error { 55 | var globalErr error 56 | if err := gw.gz.Flush(); err != nil { 57 | globalErr = fmt.Errorf("error flushing gzip buffer: %w", err) 58 | log.Warn().Err(err).Msg("error flushing gzip buffer") 59 | } 60 | if err := gw.gz.Close(); err != nil { 61 | globalErr = fmt.Errorf("error closing gzip writer: %w", err) 62 | log.Warn().Err(err).Msg("error closing gzip writer") 63 | } 64 | if err := gw.w.Close(); err != nil { 65 | globalErr = fmt.Errorf("error closing dump file: %w", err) 66 | log.Warn().Err(err).Msg("error closing dump file") 67 | } 68 | return globalErr 69 | } 70 | -------------------------------------------------------------------------------- /internal/utils/ioutils/pipe.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Greenmask 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package ioutils 16 | 17 | import ( 18 | "io" 19 | ) 20 | 21 | // NewGzipPipe - returns wrapped PipeWriter into (GzipWriter && Writer) and PipeReader into (Reader) 22 | func NewGzipPipe(usePgzip bool) (CountWriteCloser, CountReadCloser) { 23 | pr, pw := io.Pipe() 24 | // Wrapping writer pipe into count writer and gzip writer and reader pipe 25 | // into count reader 26 | return NewWriter(NewGzipWriter(pw, usePgzip)), NewReader(pr) 27 | } 28 | -------------------------------------------------------------------------------- /internal/utils/ioutils/utils.go: -------------------------------------------------------------------------------- 1 | package ioutils 2 | 3 | import ( 4 | "compress/gzip" 5 | "fmt" 6 | "io" 7 | 8 | "github.com/klauspost/pgzip" 9 | ) 10 | 11 | // GetGzipReadCloser - returns a gzip or pgzip reader 12 | func GetGzipReadCloser(r io.Reader, usePgzip bool) (gz io.ReadCloser, err error) { 13 | if usePgzip { 14 | gz, err = pgzip.NewReader(r) 15 | if err != nil { 16 | return nil, fmt.Errorf("cannot create pgzip reader: %w", err) 17 | } 18 | } else { 19 | gz, err = gzip.NewReader(r) 20 | if err != nil { 21 | return nil, fmt.Errorf("cannot create gzip reader: %w", err) 22 | } 23 | } 24 | return gz, nil 25 | } 26 | -------------------------------------------------------------------------------- /internal/utils/logger/logger.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Greenmask 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package logger 16 | 17 | import ( 18 | "fmt" 19 | "io" 20 | "os" 21 | "time" 22 | 23 | "github.com/rs/zerolog" 24 | "github.com/rs/zerolog/log" 25 | ) 26 | 27 | const ( 28 | LogFormatJsonValue = "json" 29 | LogFormatTextValue = "text" 30 | ) 31 | 32 | func SetLogLevel(logLevelStr string, logFormat string) error { 33 | 34 | var logLevel zerolog.Level 35 | switch logLevelStr { 36 | case zerolog.LevelDebugValue: 37 | logLevel = zerolog.DebugLevel 38 | case zerolog.LevelInfoValue: 39 | logLevel = zerolog.InfoLevel 40 | case zerolog.LevelWarnValue: 41 | logLevel = zerolog.WarnLevel 42 | default: 43 | return fmt.Errorf("unknown log level %s", logLevelStr) 44 | 45 | } 46 | 47 | var formatWriter io.Writer 48 | switch logFormat { 49 | case LogFormatJsonValue: 50 | formatWriter = os.Stderr 51 | case LogFormatTextValue: 52 | formatWriter = zerolog.ConsoleWriter{Out: os.Stderr, TimeFormat: time.RFC3339} 53 | } 54 | 55 | if logLevelStr == zerolog.LevelDebugValue { 56 | log.Logger = zerolog.New(formatWriter). 57 | Level(logLevel). 58 | With(). 59 | Timestamp(). 60 | Caller(). 61 | Int("pid", os.Getpid()).Logger() 62 | } else { 63 | log.Logger = zerolog.New(formatWriter). 64 | Level(logLevel). 65 | With(). 66 | Timestamp(). 67 | Logger() 68 | } 69 | return nil 70 | } 71 | -------------------------------------------------------------------------------- /internal/utils/pgerrors/wrapper.go: -------------------------------------------------------------------------------- 1 | package pgerrors 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/jackc/pgx/v5/pgproto3" 7 | ) 8 | 9 | type PgError struct { 10 | Err *pgproto3.ErrorResponse 11 | } 12 | 13 | func NewPgError(err *pgproto3.ErrorResponse) error { 14 | return &PgError{Err: err} 15 | } 16 | 17 | func (e *PgError) Error() string { 18 | return fmt.Sprintf("%s %s (code %s)", e.Err.Message, e.Err.Detail, e.Err.Code) 19 | } 20 | -------------------------------------------------------------------------------- /internal/utils/reader/reader.go: -------------------------------------------------------------------------------- 1 | package reader 2 | 3 | import ( 4 | "bufio" 5 | "fmt" 6 | ) 7 | 8 | func ReadLine(r *bufio.Reader, buf []byte) ([]byte, error) { 9 | buf = buf[:0] 10 | for { 11 | var line []byte 12 | line, isPrefix, err := r.ReadLine() 13 | if err != nil { 14 | return nil, fmt.Errorf("unable to read line: %w", err) 15 | } 16 | buf = append(buf, line...) 17 | if !isPrefix { 18 | break 19 | } 20 | } 21 | return buf, nil 22 | } 23 | -------------------------------------------------------------------------------- /internal/utils/strings/strings.go: -------------------------------------------------------------------------------- 1 | package strings 2 | 3 | import ( 4 | "strings" 5 | 6 | "github.com/mitchellh/go-wordwrap" 7 | ) 8 | 9 | func WrapString(v string, maxLength int) string { 10 | strs := strings.Split(wordwrap.WrapString(v, uint(maxLength)), "\n") 11 | res := make([]string, 0, len(strs)) 12 | for _, s := range strs { 13 | if len(s) > maxLength { 14 | 15 | for idx := 0; idx < len(s); idx += maxLength { 16 | rest := idx + maxLength 17 | if rest > len(s) { 18 | rest = idx + len(s) - idx 19 | } 20 | res = append(res, s[idx:rest]) 21 | } 22 | 23 | } else { 24 | res = append(res, s) 25 | } 26 | } 27 | return strings.Join(res, "\n") 28 | } 29 | -------------------------------------------------------------------------------- /internal/utils/strings/strings_test.go: -------------------------------------------------------------------------------- 1 | package strings 2 | 3 | import ( 4 | "strings" 5 | "testing" 6 | 7 | "github.com/stretchr/testify/require" 8 | ) 9 | 10 | func TestWrapString(t *testing.T) { 11 | original := "1234567890" 12 | maxLength := 7 13 | 14 | strs := strings.Split(WrapString(original, maxLength), "\n") 15 | require.Len(t, strs[0], 7) 16 | require.Len(t, strs[1], 3) 17 | } 18 | -------------------------------------------------------------------------------- /internal/utils/testutils/storage.go: -------------------------------------------------------------------------------- 1 | package testutils 2 | 3 | import ( 4 | "context" 5 | "io" 6 | 7 | "github.com/stretchr/testify/mock" 8 | 9 | "github.com/greenmaskio/greenmask/internal/storages" 10 | "github.com/greenmaskio/greenmask/internal/storages/domains" 11 | ) 12 | 13 | type StorageMock struct { 14 | mock.Mock 15 | } 16 | 17 | func (s *StorageMock) GetCwd() string { 18 | args := s.Called() 19 | return args.String(0) 20 | } 21 | 22 | func (s *StorageMock) Dirname() string { 23 | args := s.Called() 24 | return args.String(0) 25 | } 26 | 27 | func (s *StorageMock) ListDir(ctx context.Context) (files []string, dirs []storages.Storager, err error) { 28 | args := s.Called(ctx) 29 | return args.Get(0).([]string), args.Get(1).([]storages.Storager), args.Error(2) 30 | } 31 | 32 | func (s *StorageMock) GetObject(ctx context.Context, filePath string) (reader io.ReadCloser, err error) { 33 | args := s.Called(ctx, filePath) 34 | if args.Get(0) == nil { 35 | return nil, args.Error(1) 36 | } 37 | return args.Get(0).(io.ReadCloser), args.Error(1) 38 | } 39 | 40 | func (s *StorageMock) PutObject(ctx context.Context, filePath string, body io.Reader) error { 41 | args := s.Called(ctx, filePath, body) 42 | return args.Error(0) 43 | } 44 | 45 | func (s *StorageMock) Delete(ctx context.Context, filePaths ...string) error { 46 | args := s.Called(ctx, filePaths) 47 | return args.Error(0) 48 | } 49 | 50 | func (s *StorageMock) DeleteAll(ctx context.Context, pathPrefix string) error { 51 | args := s.Called(ctx, pathPrefix) 52 | return args.Error(0) 53 | } 54 | 55 | func (s *StorageMock) Exists(ctx context.Context, fileName string) (bool, error) { 56 | args := s.Called(ctx, fileName) 57 | return args.Bool(0), args.Error(1) 58 | } 59 | 60 | func (s *StorageMock) SubStorage(subPath string, relative bool) storages.Storager { 61 | args := s.Called(subPath, relative) 62 | return args.Get(0).(storages.Storager) 63 | } 64 | 65 | func (s *StorageMock) Stat(fileName string) (*domains.ObjectStat, error) { 66 | args := s.Called(fileName) 67 | return args.Get(0).(*domains.ObjectStat), args.Error(1) 68 | } 69 | -------------------------------------------------------------------------------- /pkg/toolkit/expt_test.go: -------------------------------------------------------------------------------- 1 | package toolkit 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/require" 7 | ) 8 | 9 | func TestWhenCond_Evaluate(t *testing.T) { 10 | driver := getDriver() 11 | record := NewRecord(driver) 12 | row := newTestRowDriver([]string{"1", "2023-08-27 00:00:00.000000", testNullSeq, `{"a": 1}`, "123.0"}) 13 | record.SetRow(row) 14 | 15 | type test struct { 16 | name string 17 | when string 18 | expected bool 19 | } 20 | tests := []test{ 21 | { 22 | name: "int value equal", 23 | when: "record.id == 1", 24 | expected: true, 25 | }, 26 | { 27 | name: "raw int value equal", 28 | when: "raw_record.id == \"1\"", 29 | expected: true, 30 | }, 31 | { 32 | name: "is null value check", 33 | when: "record.title == null", 34 | expected: true, 35 | }, 36 | { 37 | name: "test date cmp", 38 | when: "record.created_at > now()", 39 | expected: false, 40 | }, 41 | { 42 | name: "test json cmp and sping func", 43 | when: `raw_record.json_data | jsonGet("a") == 1`, 44 | expected: false, 45 | }, 46 | { 47 | name: "check has array func", 48 | when: `record.id | has([1, 2, 3, 9223372036854775807])`, 49 | expected: true, 50 | }, 51 | { 52 | name: "float cmp", 53 | when: `record.float_data | has([123.0, 1., 10.])`, 54 | expected: true, 55 | }, 56 | } 57 | 58 | for _, tt := range tests { 59 | t.Run(tt.name, func(t *testing.T) { 60 | whenCond, warns := NewWhenCond(tt.when, driver, make(map[string]any)) 61 | require.Empty(t, warns) 62 | res, err := whenCond.Evaluate(record) 63 | require.NoError(t, err) 64 | require.Equal(t, tt.expected, res) 65 | }) 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /pkg/toolkit/meta.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Greenmask 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package toolkit 16 | 17 | type Meta struct { 18 | Table *Table `json:"table"` 19 | Parameters *Parameters `json:"parameters"` 20 | Types []*Type `json:"types"` 21 | ColumnsTypeOverride map[string]string `json:"columns_type_override"` 22 | } 23 | 24 | type Parameters struct { 25 | Static StaticParameters `json:"static,omitempty"` 26 | Dynamic DynamicParameters `json:"dynamic,omitempty"` 27 | } 28 | -------------------------------------------------------------------------------- /pkg/toolkit/parametrizer.go: -------------------------------------------------------------------------------- 1 | package toolkit 2 | 3 | type Parameterizer interface { 4 | Value() (value any, err error) 5 | RawValue() (rawValue ParamsValue, err error) 6 | Scan(dest any) (err error) 7 | GetDefinition() *ParameterDefinition 8 | IsDynamic() bool 9 | IsEmpty() (bool, error) 10 | } 11 | -------------------------------------------------------------------------------- /pkg/toolkit/raw_record.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Greenmask 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package toolkit 16 | 17 | import ( 18 | "encoding/json" 19 | "fmt" 20 | ) 21 | 22 | type RawRecord map[int]*RawValue 23 | 24 | func (rr *RawRecord) GetColumn(idx int) (*RawValue, error) { 25 | res, ok := (*rr)[idx] 26 | if !ok { 27 | return nil, fmt.Errorf("column with idx=%d is not found", idx) 28 | } 29 | return res, nil 30 | } 31 | 32 | func (rr *RawRecord) SetColumn(idx int, v *RawValue) error { 33 | (*rr)[idx] = v 34 | return nil 35 | } 36 | 37 | func (rr *RawRecord) Encode() ([]byte, error) { 38 | res, err := json.Marshal(rr) 39 | if err != nil { 40 | return nil, fmt.Errorf("error encoding: %w", err) 41 | } 42 | return res, nil 43 | } 44 | 45 | func (rr *RawRecord) Decode(data []byte) error { 46 | *rr = make(map[int]*RawValue, len(*rr)) 47 | return json.Unmarshal(data, rr) 48 | } 49 | 50 | func (rr *RawRecord) Length() int { 51 | return len(*rr) 52 | } 53 | 54 | func (rr *RawRecord) Clean() { 55 | for key := range *rr { 56 | delete(*rr, key) 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /pkg/toolkit/raw_record_str.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Greenmask 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package toolkit 16 | 17 | import ( 18 | "encoding/json" 19 | "fmt" 20 | ) 21 | 22 | // RawRecordStr - Record data transfer object for interaction with custom transformer via PIPE 23 | type RawRecordStr map[int]*RawValueStr 24 | 25 | func (rrs *RawRecordStr) GetColumn(idx int) (*RawValue, error) { 26 | res, ok := (*rrs)[idx] 27 | if !ok { 28 | return nil, fmt.Errorf("column with idx=%d is not found", idx) 29 | } 30 | var data []byte 31 | if res.Data != nil { 32 | data = []byte(*res.Data) 33 | } 34 | return NewRawValue(data, res.IsNull), nil 35 | } 36 | 37 | func (rrs *RawRecordStr) SetColumn(idx int, v *RawValue) error { 38 | (*rrs)[idx] = NewRawValueStr(v.Data, v.IsNull) 39 | return nil 40 | } 41 | 42 | func (rrs *RawRecordStr) Encode() ([]byte, error) { 43 | res, err := json.Marshal(rrs) 44 | if err != nil { 45 | return nil, fmt.Errorf("error encoding: %w", err) 46 | } 47 | return res, nil 48 | } 49 | 50 | func (rrs *RawRecordStr) Decode(data []byte) error { 51 | *rrs = make(map[int]*RawValueStr, len(*rrs)) 52 | return json.Unmarshal(data, rrs) 53 | } 54 | 55 | func (rrs *RawRecordStr) Length() int { 56 | return len(*rrs) 57 | } 58 | 59 | func (rrs *RawRecordStr) Clean() { 60 | for key := range *rrs { 61 | delete(*rrs, key) 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /pkg/toolkit/raw_record_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Greenmask 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package toolkit 16 | 17 | import ( 18 | "encoding/json" 19 | "testing" 20 | 21 | "github.com/stretchr/testify/require" 22 | ) 23 | 24 | func TestRawRecordDto(t *testing.T) { 25 | rawData := []byte(`{"8":{"d":"","n":true},"9":{"d":"","n":true}}`) 26 | expected := []byte(`{"8":{"d":"test","n":false},"9":{"d":"","n":true}}`) 27 | rrd := &RawRecordStr{} 28 | err := json.Unmarshal(rawData, rrd) 29 | require.NoError(t, err) 30 | 31 | err = rrd.SetColumn(8, NewRawValue([]byte("test"), false)) 32 | require.NoError(t, err) 33 | _, err = rrd.GetColumn(10) 34 | require.Error(t, err) 35 | 36 | res, err := json.Marshal(rrd) 37 | require.NoError(t, err) 38 | require.JSONEq(t, string(expected), string(res)) 39 | } 40 | -------------------------------------------------------------------------------- /pkg/toolkit/raw_record_text.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Greenmask 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package toolkit 16 | 17 | var DefaultNullSeq RawRecordText = []byte("\\N") 18 | 19 | type RawRecordText []byte 20 | 21 | func NewRawRecordText() *RawRecordText { 22 | return new(RawRecordText) 23 | } 24 | 25 | func (r *RawRecordText) GetColumn(idx int) (*RawValue, error) { 26 | if r == &DefaultNullSeq { 27 | return NewRawValue(nil, true), nil 28 | } 29 | return NewRawValue(*r, false), nil 30 | } 31 | 32 | func (r *RawRecordText) SetColumn(idx int, v *RawValue) error { 33 | if v.IsNull { 34 | *r = DefaultNullSeq 35 | return nil 36 | } 37 | *r = v.Data 38 | return nil 39 | } 40 | 41 | func (r *RawRecordText) Encode() ([]byte, error) { 42 | return *r, nil 43 | } 44 | 45 | func (r *RawRecordText) Decode(data []byte) error { 46 | *r = data 47 | return nil 48 | } 49 | 50 | func (r *RawRecordText) Length() int { 51 | return 1 52 | } 53 | 54 | func (r *RawRecordText) Clean() { 55 | *r = (*r)[:0] 56 | } 57 | -------------------------------------------------------------------------------- /pkg/toolkit/row_driver.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Greenmask 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package toolkit 16 | 17 | // RowDriver - represents methods for interacts with any transferring format 18 | // It might be COPY, CSV, JSON, etc. 19 | // See implementation pgcopy.Row 20 | // RowDriver must keep the current row state 21 | type RowDriver interface { 22 | // GetColumn - get raw []byte value by column idx 23 | GetColumn(idx int) (*RawValue, error) 24 | // SetColumn - set RawValue value by column idx to the current row 25 | SetColumn(idx int, v *RawValue) error 26 | // Encode - encode the whole row to the []byte representation of RowDriver. It would be CSV 27 | // line or JSON object, etc. 28 | Encode() ([]byte, error) 29 | // Decode - decode []bytes to RowDriver instance 30 | Decode([]byte) error 31 | // Length - count of attributes in the row 32 | Length() int 33 | // Clean - clean the state 34 | Clean() 35 | } 36 | -------------------------------------------------------------------------------- /pkg/toolkit/static_parameter_context.go: -------------------------------------------------------------------------------- 1 | package toolkit 2 | 3 | import "fmt" 4 | 5 | type StaticParameterContext struct { 6 | rc *RecordContext 7 | linkedColumnName string 8 | } 9 | 10 | func NewStaticParameterContext(d *Driver, linkedColumnName string) *StaticParameterContext { 11 | dummyRecord := NewRecord(d) 12 | rc := NewRecordContext() 13 | rc.SetRecord(dummyRecord) 14 | return &StaticParameterContext{ 15 | rc: rc, 16 | linkedColumnName: linkedColumnName, 17 | } 18 | } 19 | 20 | func (spc *StaticParameterContext) GetColumnType(name string) (string, error) { 21 | return spc.rc.GetColumnType(name) 22 | } 23 | 24 | func (spc *StaticParameterContext) EncodeValueByColumn(name string, v any) (any, error) { 25 | return spc.rc.EncodeValueByColumn(name, v) 26 | } 27 | 28 | func (spc *StaticParameterContext) DecodeValueByColumn(name string, v any) (any, error) { 29 | return spc.rc.DecodeValueByColumn(name, v) 30 | } 31 | 32 | func (spc *StaticParameterContext) EncodeValueByType(name string, v any) (any, error) { 33 | return spc.rc.EncodeValueByType(name, v) 34 | } 35 | 36 | func (spc *StaticParameterContext) DecodeValueByType(name string, v any) (any, error) { 37 | return spc.rc.DecodeValueByType(name, v) 38 | } 39 | 40 | func (spc *StaticParameterContext) EncodeValue(v any) (any, error) { 41 | if spc.linkedColumnName == "" { 42 | return nil, fmt.Errorf("linked column name is not set use .EncodeValueByType or .EncodeValueByColumn instead") 43 | } 44 | return spc.rc.EncodeValueByColumn(spc.linkedColumnName, v) 45 | } 46 | 47 | func (spc *StaticParameterContext) DecodeValue(v any) (any, error) { 48 | if spc.linkedColumnName == "" { 49 | return nil, fmt.Errorf("linked column name is not set use .DecodeValueByType or .DecodeValueByColumn instead") 50 | } 51 | return spc.rc.DecodeValueByColumn(spc.linkedColumnName, v) 52 | } 53 | -------------------------------------------------------------------------------- /pkg/toolkit/static_parameter_test.go: -------------------------------------------------------------------------------- 1 | package toolkit 2 | -------------------------------------------------------------------------------- /pkg/toolkit/table.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Greenmask 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package toolkit 16 | 17 | import "errors" 18 | 19 | type Reference struct { 20 | Idx int 21 | Schema string 22 | Name string 23 | // ReferencedKeys - list of foreign keys of current table 24 | ReferencedKeys []string 25 | IsNullable bool 26 | } 27 | 28 | type Table struct { 29 | Schema string `json:"schema"` 30 | Name string `json:"name"` 31 | Oid Oid `json:"oid"` 32 | Columns []*Column `json:"columns"` 33 | Kind string `json:"kind"` 34 | Parent Oid `json:"parent"` 35 | Children []Oid `json:"children"` 36 | Size int64 `json:"size"` 37 | PrimaryKey []string `json:"primary_key"` 38 | // RootPtSchema, RootPtName, RootPtOid - the first parent of the partitioned table 39 | RootPtSchema string `json:"root_pt_schema"` 40 | RootPtName string `json:"root_pt_name"` 41 | RootPtOid Oid `json:"root_pt_oid"` 42 | Constraints []Constraint `json:"-"` 43 | } 44 | 45 | func (t *Table) Validate() error { 46 | if t.Schema == "" { 47 | return errors.New("empty table schema") 48 | } 49 | if t.Name == "" { 50 | return errors.New("empty table name") 51 | } 52 | if t.Oid == 0 { 53 | return errors.New("empty table oid") 54 | } 55 | if len(t.Columns) == 0 { 56 | return errors.New("empty table columns") 57 | } 58 | 59 | return nil 60 | } 61 | -------------------------------------------------------------------------------- /pkg/toolkit/testutils.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Greenmask 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package toolkit 16 | 17 | var testNullSeq = "\\N" 18 | var testDelim byte = '\t' 19 | 20 | type TestRowDriver struct { 21 | row []string 22 | } 23 | 24 | func newTestRowDriver(row []string) *TestRowDriver { 25 | return &TestRowDriver{row: row} 26 | } 27 | 28 | func (trd *TestRowDriver) GetColumn(idx int) (*RawValue, error) { 29 | val := trd.row[idx] 30 | if val == testNullSeq { 31 | return NewRawValue(nil, true), nil 32 | } 33 | return NewRawValue([]byte(val), false), nil 34 | } 35 | 36 | func (trd *TestRowDriver) SetColumn(idx int, v *RawValue) error { 37 | if v.IsNull { 38 | trd.row[idx] = testNullSeq 39 | } else { 40 | trd.row[idx] = string(v.Data) 41 | } 42 | return nil 43 | } 44 | 45 | func (trd *TestRowDriver) Encode() ([]byte, error) { 46 | var res []byte 47 | for idx, v := range trd.row { 48 | res = append(res, []byte(v)...) 49 | if idx != len(trd.row)-1 { 50 | res = append(res, testDelim) 51 | } 52 | } 53 | return res, nil 54 | } 55 | 56 | func (trd *TestRowDriver) Decode([]byte) error { 57 | panic("is not implemented") 58 | } 59 | 60 | func (trd *TestRowDriver) Length() int { 61 | return len(trd.row) 62 | } 63 | 64 | func (trd *TestRowDriver) Clean() { 65 | 66 | } 67 | -------------------------------------------------------------------------------- /pkg/toolkit/testutils/testutils.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Greenmask 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package testutils 16 | 17 | import "github.com/greenmaskio/greenmask/pkg/toolkit" 18 | 19 | var NullSeq = "\\N" 20 | var Delim byte = '\t' 21 | 22 | type TestRowDriver struct { 23 | row []string 24 | } 25 | 26 | func NewTestRowDriver(row []string) *TestRowDriver { 27 | return &TestRowDriver{row: row} 28 | } 29 | 30 | func (trd *TestRowDriver) GetColumn(idx int) (*toolkit.RawValue, error) { 31 | val := trd.row[idx] 32 | if val == NullSeq { 33 | return toolkit.NewRawValue(nil, true), nil 34 | } 35 | return toolkit.NewRawValue([]byte(val), false), nil 36 | } 37 | 38 | func (trd *TestRowDriver) SetColumn(idx int, v *toolkit.RawValue) error { 39 | if v.IsNull { 40 | trd.row[idx] = NullSeq 41 | } else { 42 | trd.row[idx] = string(v.Data) 43 | } 44 | return nil 45 | } 46 | 47 | func (trd *TestRowDriver) Encode() ([]byte, error) { 48 | var res []byte 49 | for idx, v := range trd.row { 50 | res = append(res, []byte(v)...) 51 | if idx != len(trd.row)-1 { 52 | res = append(res, Delim) 53 | } 54 | } 55 | return res, nil 56 | } 57 | 58 | func (trd *TestRowDriver) Decode([]byte) error { 59 | panic("is not implemented") 60 | } 61 | 62 | func (trd *TestRowDriver) Length() int { 63 | return len(trd.row) 64 | } 65 | 66 | func (trd *TestRowDriver) Clean() { 67 | 68 | } 69 | -------------------------------------------------------------------------------- /pkg/toolkit/transformer.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Greenmask 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package toolkit 16 | 17 | import ( 18 | "context" 19 | ) 20 | 21 | type NewTransformerFunc func(ctx context.Context, driver *Driver, parameters map[string]Parameterizer) ( 22 | Transformer, ValidationWarnings, error, 23 | ) 24 | 25 | type Transformer interface { 26 | Validate(ctx context.Context) (ValidationWarnings, error) 27 | Transform(ctx context.Context, r *Record) error 28 | } 29 | -------------------------------------------------------------------------------- /pkg/toolkit/utils.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Greenmask 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package toolkit 16 | 17 | import ( 18 | "errors" 19 | "reflect" 20 | ) 21 | 22 | func ScanPointer(src, dest any) error { 23 | srcValue := reflect.ValueOf(src) 24 | destValue := reflect.ValueOf(dest) 25 | if srcValue.Kind() == destValue.Kind() { 26 | srcInd := reflect.Indirect(srcValue) 27 | destInd := reflect.Indirect(destValue) 28 | if srcInd.Kind() == destInd.Kind() { 29 | if srcInd.CanSet() { 30 | destInd.Set(srcInd) 31 | return nil 32 | } 33 | return errors.New("unable to set the value") 34 | } 35 | return errors.New("unexpected src type") 36 | } 37 | return errors.New("src must be pointer") 38 | } 39 | -------------------------------------------------------------------------------- /pkg/toolkit/values.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Greenmask 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package toolkit 16 | 17 | type RawValue struct { 18 | Data []byte `json:"d"` 19 | IsNull bool `json:"n"` 20 | } 21 | 22 | func NewRawValue(data []byte, isNull bool) *RawValue { 23 | return &RawValue{ 24 | Data: data, 25 | IsNull: isNull, 26 | } 27 | } 28 | 29 | type Value struct { 30 | Value any 31 | IsNull bool 32 | } 33 | 34 | func NewValue(v any, isNull bool) *Value { 35 | return &Value{ 36 | Value: v, 37 | IsNull: isNull, 38 | } 39 | } 40 | 41 | type RawValueStr struct { 42 | Data *string `json:"d"` 43 | IsNull bool `json:"n"` 44 | } 45 | 46 | func NewRawValueStr(data []byte, isNull bool) *RawValueStr { 47 | res := string(data) 48 | return &RawValueStr{ 49 | Data: &res, 50 | IsNull: isNull, 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /playground/.pgpass: -------------------------------------------------------------------------------- 1 | # hostname:port:database:username:password 2 | db:5432:*:postgres:example 3 | -------------------------------------------------------------------------------- /playground/cleanup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if psql -lqt -p 5432 -h $DATABASE_HOST -U postgres | cut -d \| -f 1 | grep -qw $TRANSFORMED_DB_NAME; then 4 | psql -p 5432 -h $DATABASE_HOST -U postgres -c "DROP DATABASE $TRANSFORMED_DB_NAME;" 5 | psql -p 5432 -h $DATABASE_HOST -U postgres -c "CREATE DATABASE $TRANSFORMED_DB_NAME;" 6 | fi 7 | -------------------------------------------------------------------------------- /playground/config.yml: -------------------------------------------------------------------------------- 1 | common: 2 | pg_bin_path: "/usr/lib/postgresql/16/bin" 3 | tmp_dir: "/tmp" 4 | 5 | storage: 6 | type: "s3" 7 | s3: 8 | endpoint: "http://playground-storage:9000" 9 | bucket: "adventureworks" 10 | region: "us-east-1" 11 | access_key_id: "Q3AM3UQ867SPQQA43P2F" 12 | secret_access_key: "zuf+tfteSlswRu7BJ86wekitnifILbZam1KYY3TG" 13 | 14 | validate: 15 | # resolved_warnings: 16 | # - "aa808fb574a1359c6606e464833feceb" 17 | 18 | dump: 19 | pg_dump_options: # pg_dump option that will be provided 20 | dbname: "host=playground-db user=postgres password=example dbname=original" 21 | jobs: 10 22 | 23 | transformation: # List of tables to transform 24 | - schema: "humanresources" # Table schema 25 | name: "employee" # Table name 26 | transformers: # List of transformers to apply 27 | - name: "NoiseDate" # name of transformers 28 | params: # Transformer parameters 29 | max_ratio: "10 year 9 mon 1 day" 30 | column: "birthdate" # Column parameter - this transformer affects scheduled_departure column 31 | 32 | restore: 33 | pg_restore_options: # pg_restore option (you can use the same options as pg_restore has) 34 | jobs: 10 35 | dbname: "host=playground-db user=postgres password=example dbname=transformed" 36 | -------------------------------------------------------------------------------- /playground/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo "alias psql='psql -U postgres -d $ORIGINAL_DB_NAME -h playground-db'" >> ~/.bashrc 3 | echo "alias psql_o='psql -U postgres -d $ORIGINAL_DB_NAME -h playground-db'" >> ~/.bashrc 4 | echo "alias psql_t='psql -U postgres -d $TRANSFORMED_DB_NAME -h playground-db'" >> ~/.bashrc 5 | echo "alias cleanup='/var/lib/playground/cleanup.sh'" >> ~/.bashrc 6 | bash 7 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | babel==2.16.0 2 | beautifulsoup4==4.12.3 3 | cairocffi==1.7.1 4 | CairoSVG==2.7.1 5 | certifi==2024.8.30 6 | cffi==1.17.1 7 | charset-normalizer==3.4.0 8 | click==8.1.7 9 | colorama==0.4.6 10 | cssselect2==0.7.0 11 | defusedxml==0.7.1 12 | ghp-import==2.1.0 13 | gitdb==4.0.11 14 | GitPython==3.1.43 15 | hjson==3.1.0 16 | idna==3.10 17 | importlib_metadata==8.5.0 18 | importlib_resources==6.4.5 19 | Jinja2==3.1.4 20 | lxml==5.3.0 21 | Markdown==3.7 22 | MarkupSafe==3.0.1 23 | mergedeep==1.3.4 24 | mike==2.1.3 25 | mkdocs==1.6.1 26 | mkdocs-get-deps==0.2.0 27 | mkdocs-git-authors-plugin==0.9.0 28 | mkdocs-git-committers-plugin-2==2.4.1 29 | mkdocs-git-revision-date-localized-plugin==1.2.9 30 | mkdocs-macros-plugin==1.3.5 31 | mkdocs-material==9.5.40 32 | mkdocs-material-extensions==1.3.1 33 | packaging==24.1 34 | paginate==0.5.7 35 | pathspec==0.12.1 36 | pillow==10.4.0 37 | platformdirs==4.3.6 38 | pycparser==2.22 39 | Pygments==2.18.0 40 | pymdown-extensions==10.11.2 41 | pyparsing==3.2.0 42 | python-dateutil==2.9.0.post0 43 | pytz==2024.2 44 | PyYAML==6.0.2 45 | pyyaml_env_tag==0.1 46 | regex==2024.9.11 47 | requests==2.32.3 48 | six==1.16.0 49 | smmap==5.0.1 50 | soupsieve==2.6 51 | super_collections==0.5.3 52 | termcolor==2.5.0 53 | tinycss2==1.3.0 54 | urllib3==2.2.3 55 | verspec==0.1.0 56 | watchdog==5.0.3 57 | webencodings==0.5.1 58 | zipp==3.20.2 59 | -------------------------------------------------------------------------------- /tests/debug_utils/toc/main.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Greenmask 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package main 16 | 17 | import ( 18 | "os" 19 | "path" 20 | 21 | "github.com/rs/zerolog/log" 22 | 23 | toclib "github.com/greenmaskio/greenmask/internal/db/postgres/toc" 24 | ) 25 | 26 | func main() { 27 | dirPath := os.Args[1] 28 | src, err := os.Open(path.Join(dirPath, "toc.dat")) 29 | if err != nil { 30 | log.Fatal().Err(err).Msg("error") 31 | } 32 | defer src.Close() 33 | dest, err := os.Create(path.Join(dirPath, "new_toc2.dat")) 34 | if err != nil { 35 | log.Fatal().Err(err).Msg("error") 36 | } 37 | defer dest.Close() 38 | 39 | reader := toclib.NewReader(src) 40 | toc, err := reader.Read() 41 | if err != nil { 42 | log.Fatal().Err(err).Msgf("err") 43 | } 44 | 45 | for _, item := range toc.Entries { 46 | if item.Section == toclib.SectionData { 47 | log.Printf("%+v\n", item) 48 | } 49 | } 50 | 51 | writer := toclib.NewWriter(dest) 52 | if err := writer.Write(toc); err != nil { 53 | log.Fatal().Err(err).Msgf("err") 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /tests/integration/greenmask/main_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Greenmask 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package greenmask 16 | 17 | import ( 18 | "testing" 19 | 20 | "github.com/stretchr/testify/suite" 21 | ) 22 | 23 | func TestTocLibrary(t *testing.T) { 24 | suite.Run(t, new(TocReadWriterSuite)) 25 | } 26 | 27 | func TestGreenmaskBackwardCompatibility(t *testing.T) { 28 | suite.Run(t, new(BackwardCompatibilitySuite)) 29 | } 30 | -------------------------------------------------------------------------------- /tests/integration/storages/main_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Greenmask 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package storages 16 | 17 | import ( 18 | "testing" 19 | 20 | "github.com/stretchr/testify/suite" 21 | ) 22 | 23 | func TestS3Storage(t *testing.T) { 24 | suite.Run(t, new(S3StorageSuite)) 25 | } 26 | --------------------------------------------------------------------------------