├── website ├── static │ ├── .nojekyll │ └── img │ │ ├── favicon.ico │ │ ├── datastore │ │ ├── aws │ │ │ ├── 01.png │ │ │ ├── 02.png │ │ │ └── 03.png │ │ └── gcp │ │ │ └── 01.jpg │ │ ├── tutorial │ │ ├── localeDropdown.png │ │ └── docsVersionDropdown.png │ │ └── replibyte_dump_and_restore.jpg ├── docs │ ├── design │ │ ├── _category_.json │ │ └── how-database-subset-works.md │ ├── guides │ │ ├── _category_.json │ │ ├── deploy-replibyte │ │ │ ├── _category_.json │ │ │ ├── qovery.md │ │ │ └── container.md │ │ ├── 4-delete-a-dump.md │ │ ├── 3-subset-a-dump.md │ │ └── 2-restore-a-dump.md │ ├── advanced-guides │ │ ├── _category_.json │ │ └── web-assembly-transformer.md │ ├── getting-started │ │ ├── _category_.json │ │ ├── concepts.md │ │ ├── installation.mdx │ │ └── configuration.md │ ├── contributing.md │ ├── faq.md │ ├── databases.mdx │ ├── how-replibyte-works.md │ ├── introduction.mdx │ └── datastores.mdx ├── babel.config.js ├── tsconfig.json ├── src │ ├── components │ │ └── HomepageFeatures │ │ │ ├── styles.module.css │ │ │ └── index.tsx │ ├── pages │ │ ├── index.module.css │ │ └── index.tsx │ └── css │ │ └── custom.css ├── .gitignore ├── sidebars.js ├── README.md ├── package.json └── docusaurus.config.js ├── docker └── exec.sh ├── .dockerignore ├── assets ├── video_.png └── RepliByte Logo.png ├── replibyte ├── src │ ├── commands │ │ ├── mod.rs │ │ ├── transformer.rs │ │ └── source.rs │ ├── connector.rs │ ├── destination │ │ ├── mod.rs │ │ ├── generic_stdout.rs │ │ ├── mysql.rs │ │ ├── mysql_docker.rs │ │ ├── mongodb.rs │ │ ├── postgres_docker.rs │ │ ├── postgres.rs │ │ ├── docker.rs │ │ └── mongodb_docker.rs │ ├── tasks │ │ ├── mod.rs │ │ ├── full_restore.rs │ │ └── full_dump.rs │ ├── runtime.rs │ ├── source │ │ ├── mysql_stdin.rs │ │ ├── mod.rs │ │ ├── mongodb_stdin.rs │ │ └── postgres_stdin.rs │ ├── transformer │ │ ├── transient.rs │ │ ├── random.rs │ │ ├── phone_number.rs │ │ ├── credit_card.rs │ │ ├── mod.rs │ │ ├── email.rs │ │ ├── first_name.rs │ │ ├── keep_first_char.rs │ │ └── redacted.rs │ ├── types.rs │ ├── migration │ │ ├── update_version_number.rs │ │ └── rename_backups_to_dumps.rs │ ├── utils.rs │ └── cli.rs └── Cargo.toml ├── Cargo.toml ├── db ├── postgres │ └── 01-init.sql └── mongodb │ └── init-mongo.js ├── examples ├── wasm │ ├── wasm-transformer-reverse-string.wasm │ ├── README.md │ └── replibyte.yaml ├── destination-postgres.yaml ├── source-postgres-with-no-transformers.yaml ├── source-postgres-with-env-vars.yaml ├── with-local-disk-datastore.yaml ├── source-postgres-with-gcp-datastore.yaml ├── source-postgres.yaml ├── source-mysql-bridge-minio.yaml ├── source-and-destination-postgres.yaml ├── source-and-dest-mongodb-bridge-minio.yaml ├── with-transformer-options.yaml ├── with-encryption.yaml ├── source-postgres-bridge-minio.yaml ├── replibyte.yaml ├── with-skip.yaml └── with-subset-and-transformer.yaml ├── subset ├── README.md ├── src │ ├── utils.rs │ ├── dedup.rs │ └── lib.rs └── Cargo.toml ├── dump-parser ├── src │ ├── errors.rs │ └── lib.rs ├── Cargo.toml └── README.md ├── .github └── workflows │ ├── on-tag.yml │ ├── website.yml │ ├── publish-image.yaml │ ├── build-and-test.yml │ └── on-release.yml ├── docker-compose-postgres.yml ├── docker-compose-mongodb.yml ├── docker-compose-mysql.yml ├── docker-compose-postgres-minio.yml ├── docker-compose-mongodb-minio.yml ├── docker-compose-mysql-minio.yml ├── Dockerfile ├── .gitignore ├── release.sh ├── docs └── DESIGN.md ├── docker-compose-dev.yml └── README.md /website/static/.nojekyll: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docker/exec.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | eval "./replibyte $@" 3 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | target 2 | .git 3 | .gitignore 4 | .iml 5 | .md 6 | -------------------------------------------------------------------------------- /assets/video_.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qovery/Replibyte/HEAD/assets/video_.png -------------------------------------------------------------------------------- /replibyte/src/commands/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod dump; 2 | pub mod source; 3 | pub mod transformer; 4 | -------------------------------------------------------------------------------- /website/docs/design/_category_.json: -------------------------------------------------------------------------------- 1 | { 2 | "label": "Design", 3 | "position": 10 4 | } 5 | -------------------------------------------------------------------------------- /website/docs/guides/_category_.json: -------------------------------------------------------------------------------- 1 | { 2 | "label": "Guides", 3 | "position": 8 4 | } 5 | -------------------------------------------------------------------------------- /assets/RepliByte Logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qovery/Replibyte/HEAD/assets/RepliByte Logo.png -------------------------------------------------------------------------------- /website/docs/advanced-guides/_category_.json: -------------------------------------------------------------------------------- 1 | { 2 | "label": "Advanced Guides", 3 | "position": 9 4 | } 5 | -------------------------------------------------------------------------------- /website/docs/getting-started/_category_.json: -------------------------------------------------------------------------------- 1 | { 2 | "label": "Getting Started", 3 | "position": 3 4 | } 5 | -------------------------------------------------------------------------------- /website/static/img/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qovery/Replibyte/HEAD/website/static/img/favicon.ico -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | exclude = ["db/*", "assets/*"] 3 | members = ["dump-parser", "replibyte", "subset"] 4 | -------------------------------------------------------------------------------- /db/postgres/01-init.sql: -------------------------------------------------------------------------------- 1 | -- CREATE USER postgres SUPERUSER; 2 | -- CREATE DATABASE postgres WITH OWNER postgres; 3 | -------------------------------------------------------------------------------- /website/docs/guides/deploy-replibyte/_category_.json: -------------------------------------------------------------------------------- 1 | { 2 | "label": "Deploy Replibyte", 3 | "position": 6 4 | } 5 | -------------------------------------------------------------------------------- /website/babel.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | presets: [require.resolve('@docusaurus/core/lib/babel/preset')], 3 | }; 4 | -------------------------------------------------------------------------------- /website/static/img/datastore/aws/01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qovery/Replibyte/HEAD/website/static/img/datastore/aws/01.png -------------------------------------------------------------------------------- /website/static/img/datastore/aws/02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qovery/Replibyte/HEAD/website/static/img/datastore/aws/02.png -------------------------------------------------------------------------------- /website/static/img/datastore/aws/03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qovery/Replibyte/HEAD/website/static/img/datastore/aws/03.png -------------------------------------------------------------------------------- /website/static/img/datastore/gcp/01.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qovery/Replibyte/HEAD/website/static/img/datastore/gcp/01.jpg -------------------------------------------------------------------------------- /replibyte/src/connector.rs: -------------------------------------------------------------------------------- 1 | use std::io::Error; 2 | 3 | pub trait Connector { 4 | fn init(&mut self) -> Result<(), Error>; 5 | } 6 | -------------------------------------------------------------------------------- /website/static/img/tutorial/localeDropdown.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qovery/Replibyte/HEAD/website/static/img/tutorial/localeDropdown.png -------------------------------------------------------------------------------- /examples/wasm/wasm-transformer-reverse-string.wasm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qovery/Replibyte/HEAD/examples/wasm/wasm-transformer-reverse-string.wasm -------------------------------------------------------------------------------- /website/static/img/replibyte_dump_and_restore.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qovery/Replibyte/HEAD/website/static/img/replibyte_dump_and_restore.jpg -------------------------------------------------------------------------------- /website/static/img/tutorial/docsVersionDropdown.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qovery/Replibyte/HEAD/website/static/img/tutorial/docsVersionDropdown.png -------------------------------------------------------------------------------- /subset/README.md: -------------------------------------------------------------------------------- 1 | # Subset 2 | 3 | Subset is a Rust crate to scale down a database to a more reasonable size. So it can be used in staging, test and development environments. 4 | -------------------------------------------------------------------------------- /examples/wasm/README.md: -------------------------------------------------------------------------------- 1 | # RepliByte with a custom WebAssembly transformer 2 | 3 | Check out the [official guide here](https://www.replibyte.com/docs/advanced-guides/web-assembly-transformer) 4 | -------------------------------------------------------------------------------- /subset/src/utils.rs: -------------------------------------------------------------------------------- 1 | use std::time::{SystemTime, UNIX_EPOCH}; 2 | 3 | pub fn epoch_millis() -> u128 { 4 | SystemTime::now() 5 | .duration_since(UNIX_EPOCH) 6 | .unwrap() 7 | .as_millis() 8 | } 9 | -------------------------------------------------------------------------------- /website/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | // This file is not used in compilation. It is here just for a nice editor experience. 3 | "extends": "@tsconfig/docusaurus/tsconfig.json", 4 | "compilerOptions": { 5 | "baseUrl": "." 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /website/src/components/HomepageFeatures/styles.module.css: -------------------------------------------------------------------------------- 1 | .features { 2 | display: flex; 3 | align-items: center; 4 | padding: 2rem 0; 5 | width: 100%; 6 | } 7 | 8 | .featureSvg { 9 | height: 200px; 10 | width: 200px; 11 | } 12 | -------------------------------------------------------------------------------- /subset/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "subset" 3 | version = "0.10.0" 4 | edition = "2021" 5 | 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 7 | 8 | [dependencies] 9 | dump-parser = { path = "../dump-parser" } 10 | tempfile = "3.3" 11 | md5 = "0.7" 12 | -------------------------------------------------------------------------------- /examples/destination-postgres.yaml: -------------------------------------------------------------------------------- 1 | destination: 2 | connection_uri: postgres://root:password@localhost:5453/root 3 | datastore: 4 | aws: 5 | bucket: replibyte-test 6 | region: us-east-2 7 | credentials: 8 | access_key_id: $AWS_ACCESS_KEY_ID 9 | secret_access_key: $AWS_SECRET_ACCESS_KEY 10 | -------------------------------------------------------------------------------- /examples/source-postgres-with-no-transformers.yaml: -------------------------------------------------------------------------------- 1 | source: 2 | connection_uri: postgres://root:password@localhost:5432/root 3 | datastore: 4 | aws: 5 | bucket: replibyte-test 6 | region: us-east-2 7 | credentials: 8 | access_key_id: $AWS_ACCESS_KEY_ID 9 | secret_access_key: $AWS_SECRET_ACCESS_KEY 10 | -------------------------------------------------------------------------------- /website/.gitignore: -------------------------------------------------------------------------------- 1 | # Dependencies 2 | /node_modules 3 | 4 | # Production 5 | /build 6 | 7 | # Generated files 8 | .docusaurus 9 | .cache-loader 10 | 11 | # Misc 12 | .DS_Store 13 | .env.local 14 | .env.development.local 15 | .env.test.local 16 | .env.production.local 17 | 18 | npm-debug.log* 19 | yarn-debug.log* 20 | yarn-error.log* 21 | -------------------------------------------------------------------------------- /website/docs/guides/deploy-replibyte/qovery.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Qovery 3 | sidebar_position: 2 4 | --- 5 | 6 | # Deploy Replibyte with Qovery 7 | 8 | To use Replibyte with Qovery, refer to the [Qovery documentation - Lifecycle Job](https://hub.qovery.com/docs/using-qovery/configuration/lifecycle-job/) and this [example](https://github.com/Qovery/lifecycle-job-examples/tree/main/examples/seed-database-with-replibyte). 9 | -------------------------------------------------------------------------------- /replibyte/src/destination/mod.rs: -------------------------------------------------------------------------------- 1 | use std::io::Error; 2 | 3 | use crate::connector::Connector; 4 | use crate::types::Bytes; 5 | 6 | mod docker; 7 | pub mod generic_stdout; 8 | pub mod mongodb; 9 | pub mod mongodb_docker; 10 | pub mod mysql; 11 | pub mod mysql_docker; 12 | pub mod postgres; 13 | pub mod postgres_docker; 14 | 15 | pub trait Destination: Connector { 16 | fn write(&self, data: Bytes) -> Result<(), Error>; 17 | } 18 | -------------------------------------------------------------------------------- /replibyte/src/commands/transformer.rs: -------------------------------------------------------------------------------- 1 | use crate::transformer::transformers; 2 | use crate::utils::table; 3 | 4 | /// display all transformers available 5 | pub fn list() { 6 | let mut table = table(); 7 | table.set_titles(row!["name", "description"]); 8 | 9 | for transformer in transformers() { 10 | table.add_row(row![transformer.id(), transformer.description()]); 11 | } 12 | 13 | let _ = table.printstd(); 14 | } 15 | -------------------------------------------------------------------------------- /examples/source-postgres-with-env-vars.yaml: -------------------------------------------------------------------------------- 1 | source: 2 | connection_uri: $DATABASE_URL 3 | transformers: 4 | - database: public 5 | table: employees 6 | columns: 7 | - name: last_name 8 | transformer_name: random 9 | datastore: 10 | aws: 11 | bucket: $BUCKET_NAME 12 | region: us-east-2 13 | credentials: 14 | access_key_id: $AWS_ACCESS_KEY_ID 15 | secret_access_key: $AWS_SECRET_ACCESS_KEY 16 | -------------------------------------------------------------------------------- /dump-parser/src/errors.rs: -------------------------------------------------------------------------------- 1 | use std::io::ErrorKind; 2 | 3 | #[derive(Debug)] 4 | pub enum Error { 5 | DumpFile(DumpFileError), 6 | } 7 | 8 | #[derive(Debug)] 9 | pub enum DumpFileError { 10 | DoesNotExist, 11 | ReadError(std::io::Error), 12 | MalFormatted, 13 | } 14 | 15 | impl From for std::io::Error { 16 | fn from(err: DumpFileError) -> Self { 17 | std::io::Error::new(ErrorKind::Other, format!("{:?}", err)) 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /replibyte/src/tasks/mod.rs: -------------------------------------------------------------------------------- 1 | use std::io::Error; 2 | 3 | pub mod full_dump; 4 | pub mod full_restore; 5 | 6 | pub type TransferredBytes = usize; 7 | pub type MaxBytes = usize; 8 | 9 | pub trait Task { 10 | fn run(self, progress_callback: F) -> Result<(), Error>; 11 | } 12 | 13 | /// inter-thread message for Source/Destination and Datastore 14 | #[derive(Debug, Clone)] 15 | enum Message { 16 | Data(T), 17 | EOF, 18 | } 19 | -------------------------------------------------------------------------------- /examples/with-local-disk-datastore.yaml: -------------------------------------------------------------------------------- 1 | source: 2 | connection_uri: postgres://root:password@localhost:5432/root 3 | transformers: 4 | - database: public 5 | table: employees 6 | columns: 7 | - name: first_name 8 | transformer_name: first-name 9 | - name: last_name 10 | transformer_name: random 11 | datastore: 12 | local_disk: 13 | dir: ./my-datastore 14 | destination: 15 | connection_uri: postgres://root:password@localhost:5453/root 16 | -------------------------------------------------------------------------------- /website/src/pages/index.module.css: -------------------------------------------------------------------------------- 1 | /** 2 | * CSS files with the .module.css suffix will be treated as CSS modules 3 | * and scoped locally. 4 | */ 5 | 6 | .heroBanner { 7 | padding: 4rem 0; 8 | text-align: center; 9 | position: relative; 10 | overflow: hidden; 11 | } 12 | 13 | @media screen and (max-width: 996px) { 14 | .heroBanner { 15 | padding: 2rem; 16 | } 17 | } 18 | 19 | .buttons { 20 | display: flex; 21 | align-items: center; 22 | justify-content: center; 23 | } 24 | -------------------------------------------------------------------------------- /examples/source-postgres-with-gcp-datastore.yaml: -------------------------------------------------------------------------------- 1 | source: 2 | connection_uri: postgres://root:password@localhost:5432/root 3 | transformers: 4 | - database: public 5 | table: employees 6 | columns: 7 | - name: fist_name 8 | transformer_name: first-name 9 | - name: last_name 10 | transformer_name: random 11 | datastore: 12 | gcp: 13 | bucket: replibyte-test 14 | region: us-west1 15 | access_key: $GS_ACCESS_KEY_ID 16 | secret: $GS_SECRET_ACCESS_KEY 17 | -------------------------------------------------------------------------------- /examples/source-postgres.yaml: -------------------------------------------------------------------------------- 1 | source: 2 | connection_uri: postgres://root:password@localhost:5432/root 3 | transformers: 4 | - database: public 5 | table: employees 6 | columns: 7 | - name: fist_name 8 | transformer_name: first-name 9 | - name: last_name 10 | transformer_name: random 11 | datastore: 12 | aws: 13 | bucket: replibyte-test 14 | region: us-east-2 15 | credentials: 16 | access_key_id: $AWS_ACCESS_KEY_ID 17 | secret_access_key: $AWS_SECRET_ACCESS_KEY 18 | -------------------------------------------------------------------------------- /replibyte/src/runtime.rs: -------------------------------------------------------------------------------- 1 | use lazy_static::lazy_static; 2 | use std::future::Future; 3 | use std::sync::Mutex; 4 | use tokio::runtime::{Builder, Runtime}; 5 | 6 | lazy_static! { 7 | static ref TOKIO_RUNTIME: Mutex = Mutex::new({ 8 | Builder::new_current_thread() 9 | .thread_name("tokio-blocking") 10 | .enable_all() 11 | .build() 12 | .unwrap() 13 | }); 14 | } 15 | 16 | pub fn block_on(future: F) -> F::Output { 17 | TOKIO_RUNTIME.lock().unwrap().block_on(future) 18 | } 19 | -------------------------------------------------------------------------------- /.github/workflows/on-tag.yml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | tags: 4 | - "v*" 5 | 6 | jobs: 7 | release-tag: 8 | runs-on: ubuntu-latest 9 | steps: 10 | - name: Checkout code 11 | uses: actions/checkout@master 12 | - name: Create Release 13 | id: create_release 14 | uses: actions/create-release@v1 15 | env: 16 | GITHUB_TOKEN: ${{ secrets.PERSONAL_TOKEN }} 17 | with: 18 | tag_name: ${{ github.ref }} 19 | release_name: Release ${{ github.ref }} 20 | draft: false 21 | prerelease: false 22 | -------------------------------------------------------------------------------- /website/docs/design/how-database-subset-works.md: -------------------------------------------------------------------------------- 1 | --- 2 | sidebar_position: 1 3 | --- 4 | 5 | # How database subset works 6 | 7 | This design doc explains how the database subset has been implemented on Replibyte. 8 | 9 | :::note 10 | 11 | Feel free to dig into the [Replibyte Subset](https://github.com/Qovery/Replibyte/tree/main/subset) source-code if you are even more curious. 12 | 13 | ::: 14 | 15 | ### Relations and virtual relations 16 | 17 | TODO 18 | 19 | ### Cyclic references 20 | 21 | TODO 22 | 23 | ### Subset Strategy 24 | 25 | TODO 26 | 27 | ### Performances 28 | 29 | TODO 30 | -------------------------------------------------------------------------------- /examples/source-mysql-bridge-minio.yaml: -------------------------------------------------------------------------------- 1 | source: 2 | connection_uri: mysql://root:password@127.0.0.1:3306/world 3 | transformers: 4 | - database: world 5 | table: city 6 | columns: 7 | - name: Name 8 | transformer_name: random 9 | datastore: 10 | aws: 11 | bucket: replibyte-test 12 | region: us-east-2 13 | credentials: 14 | access_key_id: minioadmin 15 | secret_access_key: minioadmin 16 | endpoint: 17 | custom: 'http://localhost:9000' 18 | destination: 19 | # it's different to the source 20 | connection_uri: mysql://root:password@127.0.0.1:3307/world 21 | -------------------------------------------------------------------------------- /.github/workflows/website.yml: -------------------------------------------------------------------------------- 1 | name: github pages 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | 8 | jobs: 9 | deploy: 10 | runs-on: ubuntu-18.04 11 | steps: 12 | - uses: actions/checkout@v2 13 | 14 | - name: Setup Node 15 | uses: actions/setup-node@v1 16 | with: 17 | node-version: '17.x' 18 | 19 | - run: cd website && npm install && npm run-script build 20 | 21 | - name: Deploy 22 | uses: peaceiris/actions-gh-pages@v3 23 | with: 24 | github_token: ${{ secrets.GITHUB_TOKEN }} 25 | publish_dir: ./website/build 26 | cname: www.replibyte.com 27 | -------------------------------------------------------------------------------- /examples/source-and-destination-postgres.yaml: -------------------------------------------------------------------------------- 1 | source: 2 | connection_uri: postgres://root:password@localhost:5432/root 3 | transformers: 4 | - database: public 5 | table: employees 6 | columns: 7 | - name: fist_name 8 | transformer_name: first-name 9 | - name: last_name 10 | transformer_name: random 11 | destination: 12 | # it's different to the source 13 | connection_uri: postgres://root:password@localhost:5453/root 14 | datastore: 15 | aws: 16 | bucket: replibyte-test 17 | region: us-east-2 18 | credentials: 19 | access_key_id: $AWS_ACCESS_KEY_ID 20 | secret_access_key: $AWS_SECRET_ACCESS_KEY 21 | -------------------------------------------------------------------------------- /examples/source-and-dest-mongodb-bridge-minio.yaml: -------------------------------------------------------------------------------- 1 | source: 2 | connection_uri: mongodb://root:password@localhost:27017/?authSource=admin 3 | transformers: 4 | - database: test 5 | table: users 6 | columns: 7 | - name: name 8 | transformer_name: first-name 9 | - name: age 10 | transformer_name: random 11 | destination: 12 | connection_uri: mongodb://root:password@localhost:27018/?authSource=admin 13 | datastore: 14 | aws: 15 | bucket: replibyte-test 16 | region: us-east-2 17 | credentials: 18 | access_key_id: minioadmin 19 | secret_access_key: minioadmin 20 | endpoint: 21 | custom: 'http://localhost:9000' 22 | -------------------------------------------------------------------------------- /examples/wasm/replibyte.yaml: -------------------------------------------------------------------------------- 1 | source: 2 | connection_uri: mongodb://root:password@localhost:27017/?authSource=admin 3 | transformers: 4 | - database: test 5 | table: users 6 | columns: 7 | - name: name 8 | transformer_name: custom-wasm 9 | transformer_options: 10 | path: "examples/wasm/wasm-transformer-reverse-string.wasm" 11 | datastore: 12 | aws: 13 | bucket: replibyte-test 14 | region: us-east-2 15 | access_key_id: minioadmin 16 | secret_access_key: minioadmin 17 | endpoint: 18 | custom: 'http://localhost:9000' 19 | destination: 20 | connection_uri: mongodb://root:password@localhost:27018/?authSource=admin 21 | -------------------------------------------------------------------------------- /examples/with-transformer-options.yaml: -------------------------------------------------------------------------------- 1 | source: 2 | connection_uri: postgres://root:password@localhost:5432/root 3 | transformers: 4 | - database: public 5 | table: employees 6 | columns: 7 | - name: last_name 8 | transformer_name: redacted 9 | transformer_options: 10 | character: '#' 11 | width: 12 12 | datastore: 13 | aws: 14 | bucket: replibyte-test 15 | region: us-east-2 16 | credentials: 17 | access_key_id: minioadmin 18 | secret_access_key: minioadmin 19 | endpoint: 20 | custom: 'http://localhost:9000' 21 | destination: 22 | connection_uri: postgres://root:password@localhost:5453/root 23 | -------------------------------------------------------------------------------- /examples/with-encryption.yaml: -------------------------------------------------------------------------------- 1 | encryption_key: this is a secret key 2 | source: 3 | connection_uri: postgres://root:password@localhost:5432/root 4 | transformers: 5 | - database: public 6 | table: employees 7 | columns: 8 | - name: fist_name 9 | transformer_name: first-name 10 | - name: last_name 11 | transformer_name: random 12 | destination: 13 | # it's different to the source 14 | connection_uri: postgres://root:password@localhost:5453/root 15 | datastore: 16 | aws: 17 | bucket: replibyte-test 18 | region: us-east-2 19 | credentials: 20 | access_key_id: $AWS_ACCESS_KEY_ID 21 | secret_access_key: $AWS_SECRET_ACCESS_KEY 22 | -------------------------------------------------------------------------------- /examples/source-postgres-bridge-minio.yaml: -------------------------------------------------------------------------------- 1 | encryption_key: 'this is a test' 2 | source: 3 | connection_uri: postgres://root:password@localhost:5432/root 4 | transformers: 5 | - database: public 6 | table: employees 7 | columns: 8 | - name: first_name 9 | transformer_name: first-name 10 | - name: last_name 11 | transformer_name: random 12 | datastore: 13 | aws: 14 | bucket: replibyte-test 15 | region: us-east-2 16 | credentials: 17 | access_key_id: minioadmin 18 | secret_access_key: minioadmin 19 | endpoint: 20 | custom: 'http://localhost:9000' 21 | destination: 22 | connection_uri: postgres://root:password@localhost:5453/root 23 | -------------------------------------------------------------------------------- /db/mongodb/init-mongo.js: -------------------------------------------------------------------------------- 1 | db.createUser({ 2 | user: 'root', 3 | pwd: 'password', 4 | roles: [ 5 | { 6 | role: 'readWrite', 7 | db: 'test', 8 | }, 9 | ], 10 | }); 11 | 12 | db = new Mongo().getDB("test"); 13 | 14 | db.createCollection('users', { capped: false }); 15 | db.createCollection('states', { capped: false }); 16 | db.createCollection('cars', { capped: false }); 17 | 18 | for (let i = 0; i < 10; i++) { 19 | db.users.insertOne({ 20 | name: 'user' + i, 21 | age: i, 22 | }); 23 | db.states.insertOne({ 24 | name: 'state' + i, 25 | number: i, 26 | }); 27 | db.cars.insertOne({ 28 | model: 'car' + i, 29 | year: 2010 + i, 30 | }); 31 | } -------------------------------------------------------------------------------- /examples/replibyte.yaml: -------------------------------------------------------------------------------- 1 | encryption_key: $ENCRYPTION_SECRET 2 | source: 3 | connection_uri: $SOURCE_CONNECTION_URI 4 | transformers: 5 | - database: public # TO CHANGE 6 | table: employees # TO CHANGE 7 | columns: 8 | - name: fist_name # TO CHANGE 9 | transformer_name: first-name # TO CHANGE 10 | - name: last_name # TO CHANGE 11 | transformer_name: random # TO CHANGE 12 | destination: 13 | connection_uri: $DESTINATION_CONNECTION_URI 14 | # Wipe the public schema 15 | # wipe_database: false (default: true) 16 | datastore: 17 | aws: 18 | bucket: $S3_BUCKET 19 | region: $S3_REGION 20 | credentials: 21 | access_key_id: $S3_ACCESS_KEY_ID 22 | secret_access_key: $S3_SECRET_ACCESS_KEY 23 | -------------------------------------------------------------------------------- /examples/with-skip.yaml: -------------------------------------------------------------------------------- 1 | source: 2 | connection_uri: postgres://root:password@localhost:5432/root 3 | skip: 4 | - database: public 5 | table: us_states 6 | - database: public 7 | table: order_details 8 | transformers: 9 | - database: public 10 | table: employees 11 | columns: 12 | - name: fist_name 13 | transformer_name: first-name 14 | - name: last_name 15 | transformer_name: random 16 | datastore: 17 | aws: 18 | bucket: replibyte-test 19 | region: us-east-2 20 | credentials: 21 | access_key_id: minioadmin 22 | secret_access_key: minioadmin 23 | endpoint: 24 | custom: 'http://localhost:9000' 25 | destination: 26 | connection_uri: postgres://root:password@localhost:5453/root 27 | -------------------------------------------------------------------------------- /dump-parser/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "dump-parser" 3 | version = "0.10.0" 4 | edition = "2021" 5 | 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 7 | 8 | [dependencies] 9 | bson = "2.2" 10 | serde = "1.0" 11 | 12 | ########## WARNING ############# 13 | # DO NOT UPGRADE THE CRC CRATE # 14 | # version 2 (or higher) is not compatible with the current crc64 algorithm that 'mongorestore' uses in its archive parser. 15 | # mongorestore ECMA: https://go.dev/src/hash/crc64/crc64.go#L28 16 | # crc-rs 1.8 ECMA: https://github.com/mrhooray/crc-rs/blob/1.8.1/build.rs#L41 (COMPATIBLE) 17 | # crc-rs ^2.0 ECMA: https://github.com/akhilles/crc-catalog/blob/2.0.1/src/catalog.rs#L104 (INCOMPATIBLE) 18 | crc = "1.8" 19 | ################################ 20 | -------------------------------------------------------------------------------- /replibyte/src/destination/generic_stdout.rs: -------------------------------------------------------------------------------- 1 | use std::io::{stdout, Error, Write}; 2 | 3 | use crate::connector::Connector; 4 | use crate::destination::Destination; 5 | use crate::types::Bytes; 6 | 7 | /// Stream dump output on stdout 8 | pub struct GenericStdout {} 9 | 10 | impl GenericStdout { 11 | pub fn new() -> Self { 12 | GenericStdout {} 13 | } 14 | } 15 | 16 | impl Default for GenericStdout { 17 | fn default() -> Self { 18 | GenericStdout {} 19 | } 20 | } 21 | 22 | impl Connector for GenericStdout { 23 | fn init(&mut self) -> Result<(), Error> { 24 | Ok(()) 25 | } 26 | } 27 | 28 | impl<'a> Destination for GenericStdout { 29 | fn write(&self, data: Bytes) -> Result<(), Error> { 30 | let mut stdout = stdout(); 31 | let _ = stdout.write_all(data.as_slice()); 32 | Ok(()) 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /website/sidebars.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Creating a sidebar enables you to: 3 | - create an ordered group of docs 4 | - render a sidebar for each doc of that group 5 | - provide next/previous navigation 6 | 7 | The sidebars can be generated from the filesystem, or explicitly defined here. 8 | 9 | Create as many sidebars as you want. 10 | */ 11 | 12 | // @ts-check 13 | 14 | /** @type {import('@docusaurus/plugin-content-docs').SidebarsConfig} */ 15 | const sidebars = { 16 | // By default, Docusaurus generates a sidebar from the docs folder structure 17 | tutorialSidebar: [{type: 'autogenerated', dirName: '.'}], 18 | 19 | // But you can create a sidebar manually 20 | /* 21 | tutorialSidebar: [ 22 | { 23 | type: 'category', 24 | label: 'Tutorial', 25 | items: ['hello'], 26 | }, 27 | ], 28 | */ 29 | }; 30 | 31 | module.exports = sidebars; 32 | -------------------------------------------------------------------------------- /docker-compose-postgres.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | 3 | services: 4 | source-postgres: 5 | image: postgres:13 6 | restart: always 7 | healthcheck: 8 | test: [ "CMD", "pg_isready", "-q", "-d", "postgres", "-U", "root" ] 9 | timeout: 45s 10 | interval: 10s 11 | retries: 10 12 | environment: 13 | - POSTGRES_USER=root 14 | - POSTGRES_PASSWORD=password 15 | volumes: 16 | - ./db/postgres:/docker-entrypoint-initdb.d/ 17 | ports: 18 | - 5432:5432 19 | dest-postgres: 20 | image: postgres:13 21 | restart: always 22 | healthcheck: 23 | test: [ "CMD", "pg_isready", "-q", "-d", "postgres", "-U", "root" ] 24 | timeout: 45s 25 | interval: 10s 26 | retries: 10 27 | environment: 28 | - POSTGRES_USER=root 29 | - POSTGRES_PASSWORD=password 30 | ports: 31 | - 5453:5432 32 | -------------------------------------------------------------------------------- /examples/with-subset-and-transformer.yaml: -------------------------------------------------------------------------------- 1 | source: 2 | connection_uri: postgres://root:password@localhost:5432/root 3 | database_subset: 4 | database: public 5 | table: orders 6 | strategy_name: random 7 | strategy_options: 8 | percent: 50 9 | passthrough_tables: 10 | - us_states 11 | transformers: 12 | - database: public 13 | table: employees 14 | columns: 15 | - name: last_name 16 | transformer_name: redacted 17 | transformer_options: 18 | character: '#' 19 | width: 12 20 | datastore: 21 | aws: 22 | bucket: replibyte-test 23 | region: us-east-2 24 | credentials: 25 | access_key_id: minioadmin 26 | secret_access_key: minioadmin 27 | endpoint: 28 | custom: 'http://localhost:9000' 29 | destination: 30 | connection_uri: postgres://root:password@localhost:5453/root 31 | -------------------------------------------------------------------------------- /docker-compose-mongodb.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | 3 | services: 4 | source-mongodb: 5 | image: mongo:5 6 | restart: always 7 | healthcheck: 8 | test: echo 'db.runCommand("ping").ok' | mongo localhost:27017/test --quiet 9 | timeout: 45s 10 | interval: 10s 11 | retries: 10 12 | environment: 13 | - MONGO_INITDB_ROOT_USERNAME=root 14 | - MONGO_INITDB_ROOT_PASSWORD=password 15 | volumes: 16 | - ./db/mongo:/docker-entrypoint-initdb.d/ 17 | ports: 18 | - 27017:27017 19 | dest-mongodb: 20 | image: mongo:5 21 | restart: always 22 | healthcheck: 23 | test: echo 'db.runCommand("ping").ok' | mongo localhost:27017/test --quiet 24 | timeout: 45s 25 | interval: 10s 26 | retries: 10 27 | environment: 28 | - MONGO_INITDB_ROOT_USERNAME=root 29 | - MONGO_INITDB_ROOT_PASSWORD=password 30 | ports: 31 | - 27018:27017 -------------------------------------------------------------------------------- /dump-parser/README.md: -------------------------------------------------------------------------------- 1 | # Dump Parser 2 | 3 | Library to parse and edit database dump for Postgres, MySQL and MongoDB. 4 | 5 | 6 | Example for Postgres 7 | ```rust 8 | let q = r" 9 | INSERT INTO public.customers (customer_id, company_name, contact_name, contact_title) 10 | VALUES (1, 'Alfreds Futterkiste', 'Maria Anders', NULL); 11 | "; 12 | 13 | let mut tokenizer = Tokenizer::new(q); 14 | let tokens_result = tokenizer.tokenize(); 15 | assert_eq!(tokens_result.is_ok(), true); 16 | 17 | let tokens = trim_pre_whitespaces(tokens_result.unwrap()); 18 | let column_values = get_column_values_from_insert_into_query(&tokens); 19 | 20 | assert_eq!( 21 | column_values, 22 | vec![ 23 | &Token::Number("1".to_string(), false), 24 | &Token::SingleQuotedString("Alfreds Futterkiste".to_string()), 25 | &Token::SingleQuotedString("Maria Anders".to_string()), 26 | &Token::make_keyword("NULL"), 27 | ] 28 | ); 29 | ``` 30 | -------------------------------------------------------------------------------- /replibyte/src/source/mysql_stdin.rs: -------------------------------------------------------------------------------- 1 | use std::io::{stdin, BufReader, Error}; 2 | 3 | use crate::connector::Connector; 4 | use crate::source::mysql::read_and_transform; 5 | use crate::types::{OriginalQuery, Query}; 6 | use crate::Source; 7 | use crate::SourceOptions; 8 | 9 | /// Source MySQL dump from STDIN 10 | pub struct MysqlStdin {} 11 | 12 | impl Default for MysqlStdin { 13 | fn default() -> Self { 14 | Self {} 15 | } 16 | } 17 | 18 | impl Connector for MysqlStdin { 19 | fn init(&mut self) -> Result<(), Error> { 20 | Ok(()) 21 | } 22 | } 23 | 24 | impl Source for MysqlStdin { 25 | fn read( 26 | &self, 27 | options: SourceOptions, 28 | query_callback: F, 29 | ) -> Result<(), Error> { 30 | let reader = BufReader::new(stdin()); 31 | read_and_transform(reader, options, query_callback); 32 | 33 | Ok(()) 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /website/README.md: -------------------------------------------------------------------------------- 1 | # Replibyte Website 2 | 3 | This website is built using [Docusaurus 2](https://docusaurus.io/), a modern static website generator. 4 | 5 | ### Installation 6 | 7 | ``` 8 | $ yarn 9 | ``` 10 | 11 | ### Local Development 12 | 13 | ``` 14 | $ yarn start 15 | ``` 16 | 17 | This command starts a local development server and opens up a browser window. Most changes are reflected live without having to restart the server. 18 | 19 | ### Build 20 | 21 | ``` 22 | $ yarn build 23 | ``` 24 | 25 | This command generates static content into the `build` directory and can be served using any static contents hosting service. 26 | 27 | ### Deployment 28 | 29 | Using SSH: 30 | 31 | ``` 32 | $ USE_SSH=true yarn deploy 33 | ``` 34 | 35 | Not using SSH: 36 | 37 | ``` 38 | $ GIT_USER= yarn deploy 39 | ``` 40 | 41 | If you are using GitHub pages for hosting, this command is a convenient way to build the website and push to the `gh-pages` branch. 42 | -------------------------------------------------------------------------------- /replibyte/src/source/mod.rs: -------------------------------------------------------------------------------- 1 | use std::io::Error; 2 | 3 | use crate::config::{DatabaseSubsetConfig, OnlyTablesConfig, SkipConfig}; 4 | use crate::connector::Connector; 5 | use crate::transformer::Transformer; 6 | use crate::types::{OriginalQuery, Query}; 7 | 8 | pub mod mongodb; 9 | pub mod mongodb_stdin; 10 | pub mod mysql; 11 | pub mod mysql_stdin; 12 | pub mod postgres; 13 | pub mod postgres_stdin; 14 | 15 | pub trait Explain: Connector { 16 | fn schema(&self) -> Result<(), Error>; 17 | } 18 | 19 | pub trait Source: Connector { 20 | fn read( 21 | &self, 22 | options: SourceOptions, 23 | query_callback: F, 24 | ) -> Result<(), Error>; 25 | } 26 | 27 | pub struct SourceOptions<'a> { 28 | pub transformers: &'a Vec>, 29 | pub skip_config: &'a Vec, 30 | pub database_subset: &'a Option, 31 | pub only_tables: &'a Vec, 32 | } 33 | -------------------------------------------------------------------------------- /docker-compose-mysql.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | 3 | services: 4 | source-mysql: 5 | platform: linux/x86_64 6 | image: mysql:8 7 | restart: always 8 | command: --default-authentication-plugin=mysql_native_password 9 | healthcheck: 10 | test: ['CMD', 'mysqladmin', 'ping', '-u', 'root', '--password=password'] 11 | timeout: 45s 12 | interval: 10s 13 | retries: 10 14 | environment: 15 | - MYSQL_ROOT_PASSWORD=password 16 | volumes: 17 | - ./db/mysql:/docker-entrypoint-initdb.d 18 | ports: 19 | - 3306:3306 20 | dest-mysql: 21 | platform: linux/x86_64 22 | image: mysql:8 23 | restart: always 24 | command: --default-authentication-plugin=mysql_native_password 25 | healthcheck: 26 | test: ['CMD', 'mysqladmin', 'ping', '-u', 'root', '--password=password'] 27 | timeout: 45s 28 | interval: 10s 29 | retries: 10 30 | environment: 31 | - MYSQL_ROOT_PASSWORD=password 32 | ports: 33 | - 3307:3306 34 | -------------------------------------------------------------------------------- /.github/workflows/publish-image.yaml: -------------------------------------------------------------------------------- 1 | on: 2 | release: 3 | types: 4 | - published 5 | 6 | jobs: 7 | build-linux: 8 | runs-on: ubuntu-latest 9 | permissions: 10 | packages: write 11 | 12 | steps: 13 | - name: Set up Docker Buildx 14 | uses: docker/setup-buildx-action@v2 15 | 16 | - uses: docker/login-action@v2 17 | with: 18 | registry: ghcr.io 19 | username: ${{ github.actor }} 20 | password: ${{ secrets.GITHUB_TOKEN }} 21 | 22 | - id: metadata 23 | uses: docker/metadata-action@v3 24 | with: 25 | images: ghcr.io/${{ github.repository }} 26 | tags: | 27 | type=semver,pattern={{version}},value=${{ github.event.release.tag_name }} 28 | # shortcut to create `latest` tag 29 | flavor: latest=true 30 | 31 | - uses: docker/build-push-action@v3 32 | with: 33 | push: true 34 | tags: ${{ steps.metadata.outputs.tags }} 35 | labels: ${{ steps.metadata.outputs.labels }} 36 | 37 | -------------------------------------------------------------------------------- /replibyte/src/source/mongodb_stdin.rs: -------------------------------------------------------------------------------- 1 | use std::io::{stdin, BufReader, Error}; 2 | 3 | use crate::connector::Connector; 4 | use crate::source::mongodb::read_and_transform; 5 | use crate::types::{OriginalQuery, Query}; 6 | use crate::Source; 7 | use crate::SourceOptions; 8 | 9 | pub struct MongoDBStdin {} 10 | 11 | impl Default for MongoDBStdin { 12 | fn default() -> Self { 13 | Self {} 14 | } 15 | } 16 | 17 | impl Connector for MongoDBStdin { 18 | fn init(&mut self) -> Result<(), Error> { 19 | Ok(()) 20 | } 21 | } 22 | 23 | impl Source for MongoDBStdin { 24 | fn read( 25 | &self, 26 | options: SourceOptions, 27 | query_callback: F, 28 | ) -> Result<(), Error> { 29 | let reader = BufReader::new(stdin()); 30 | 31 | if let Some(_database_subset) = &options.database_subset { 32 | todo!("database subset not supported yet for MongoDB source") 33 | } 34 | 35 | let _ = read_and_transform(reader, options, query_callback)?; 36 | Ok(()) 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /website/src/components/HomepageFeatures/index.tsx: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import clsx from 'clsx'; 3 | import styles from './styles.module.css'; 4 | 5 | type FeatureItem = { 6 | title: string; 7 | Svg: React.ComponentType>; 8 | description: JSX.Element; 9 | }; 10 | 11 | const FeatureList: FeatureItem[] = [ 12 | ]; 13 | 14 | function Feature({title, Svg, description}: FeatureItem) { 15 | return ( 16 |
17 |
18 | 19 |
20 |
21 |

{title}

22 |

{description}

23 |
24 |
25 | ); 26 | } 27 | 28 | export default function HomepageFeatures(): JSX.Element { 29 | return ( 30 |
31 |
32 |
33 | {FeatureList.map((props, idx) => ( 34 | 35 | ))} 36 |
37 |
38 |
39 | ); 40 | } 41 | -------------------------------------------------------------------------------- /replibyte/src/source/postgres_stdin.rs: -------------------------------------------------------------------------------- 1 | use std::io::{stdin, BufReader, Error}; 2 | 3 | use crate::connector::Connector; 4 | use crate::source::postgres::{read_and_transform, subset}; 5 | use crate::types::{OriginalQuery, Query}; 6 | use crate::Source; 7 | use crate::SourceOptions; 8 | 9 | /// Source Postgres dump from STDIN 10 | pub struct PostgresStdin {} 11 | 12 | impl Default for PostgresStdin { 13 | fn default() -> Self { 14 | Self {} 15 | } 16 | } 17 | 18 | impl Connector for PostgresStdin { 19 | fn init(&mut self) -> Result<(), Error> { 20 | Ok(()) 21 | } 22 | } 23 | 24 | impl Source for PostgresStdin { 25 | fn read( 26 | &self, 27 | options: SourceOptions, 28 | query_callback: F, 29 | ) -> Result<(), Error> { 30 | match &options.database_subset { 31 | None => { 32 | let reader = BufReader::new(stdin()); 33 | read_and_transform(reader, options, query_callback); 34 | } 35 | Some(subset_config) => { 36 | let dump_reader = BufReader::new(stdin()); 37 | let reader = subset(dump_reader, subset_config)?; 38 | read_and_transform(reader, options, query_callback); 39 | } 40 | }; 41 | 42 | Ok(()) 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /website/docs/guides/4-delete-a-dump.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: IV. Delete a dump 3 | sidebar_position: 4 4 | --- 5 | 6 | # Delete a dump 7 | 8 | The `dump delete` command comes with 3 different deleting strategies. 9 | 10 | 1. Delete a dump by its name 11 | 2. Delete dumps older than a specified number of days 12 | 3. Keep only a maximum number of dumps 13 | 14 | ### Delete by dump name 15 | 16 | ```shell 17 | replibyte -c conf.yaml dump delete 18 | ``` 19 | 20 | This is the simplest strategy you can find. 21 | 22 | The list of available dumps can be retrieved by running the following command: 23 | 24 | ```shell 25 | replibyte -c conf.yaml dump list 26 | 27 | type name size when compressed encrypted 28 | PostgreSQL dump-1647706359405 154MB Yesterday at 03:00 am true true 29 | PostgreSQL dump-1647731334517 152MB 2 days ago at 03:00 am true true 30 | PostgreSQL dump-1647734369306 149MB 3 days ago at 03:00 am true true 31 | ``` 32 | 33 | ### Delete dumps older than 2 days 34 | 35 | ```shell 36 | replibyte -c conf.yaml dump delete --older-than=2d 37 | ``` 38 | 39 | Only the day unit is supported for now, other units could come in the future. 40 | 41 | ### Keep only the last 10 dumps 42 | 43 | ```shell 44 | replibyte -c conf.yaml dump delete --keep-last=10 45 | ``` 46 | -------------------------------------------------------------------------------- /website/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "replibyte", 3 | "version": "1.0.0", 4 | "private": true, 5 | "scripts": { 6 | "docusaurus": "docusaurus", 7 | "start": "docusaurus start", 8 | "build": "docusaurus build", 9 | "swizzle": "docusaurus swizzle", 10 | "deploy": "docusaurus deploy", 11 | "clear": "docusaurus clear", 12 | "serve": "docusaurus serve", 13 | "write-translations": "docusaurus write-translations", 14 | "write-heading-ids": "docusaurus write-heading-ids", 15 | "typecheck": "tsc" 16 | }, 17 | "dependencies": { 18 | "@docusaurus/core": "2.0.0-beta.18", 19 | "@docusaurus/preset-classic": "2.0.0-beta.18", 20 | "@mdx-js/react": "^1.6.22", 21 | "clsx": "^1.1.1", 22 | "prism-react-renderer": "^1.3.1", 23 | "react": "^17.0.2", 24 | "react-dom": "^17.0.2", 25 | "mermaid": "^8.11.5", 26 | "mdx-mermaid": "^1.2.2", 27 | "@cmfcmf/docusaurus-search-local": "^0.10.0" 28 | }, 29 | "devDependencies": { 30 | "@docusaurus/module-type-aliases": "2.0.0-beta.18", 31 | "@tsconfig/docusaurus": "^1.0.5", 32 | "typescript": "^4.6.3" 33 | }, 34 | "browserslist": { 35 | "production": [ 36 | ">0.5%", 37 | "not dead", 38 | "not op_mini all" 39 | ], 40 | "development": [ 41 | "last 1 chrome version", 42 | "last 1 firefox version", 43 | "last 1 safari version" 44 | ] 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /website/src/pages/index.tsx: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import clsx from 'clsx'; 3 | import Link from '@docusaurus/Link'; 4 | import useDocusaurusContext from '@docusaurus/useDocusaurusContext'; 5 | import styles from './index.module.css'; 6 | import {Redirect} from "@docusaurus/router"; 7 | 8 | function HomepageHeader() { 9 | const {siteConfig} = useDocusaurusContext(); 10 | return ( 11 |
12 |
13 |

{siteConfig.title}

14 |

{siteConfig.tagline}

15 |
16 | 19 | Getting Started 20 | 21 |
22 |
23 |
24 | ); 25 | } 26 | 27 | // export default function Home(): JSX.Element { 28 | // const {siteConfig} = useDocusaurusContext(); 29 | // return ( 30 | // 33 | // 34 | //
35 | // 36 | //
37 | //
38 | // ); 39 | // } 40 | 41 | export default function Home() { 42 | return ; 43 | } 44 | -------------------------------------------------------------------------------- /replibyte/src/transformer/transient.rs: -------------------------------------------------------------------------------- 1 | use crate::transformer::Transformer; 2 | use crate::types::Column; 3 | 4 | /// This transformer will not make any changes. 5 | pub struct TransientTransformer { 6 | database_name: String, 7 | table_name: String, 8 | column_name: String, 9 | } 10 | 11 | impl Default for TransientTransformer { 12 | fn default() -> Self { 13 | TransientTransformer { 14 | database_name: String::default(), 15 | table_name: String::default(), 16 | column_name: String::default(), 17 | } 18 | } 19 | } 20 | 21 | impl TransientTransformer { 22 | pub fn new(database_name: S, table_name: S, column_name: S) -> Self 23 | where 24 | S: Into, 25 | { 26 | TransientTransformer { 27 | table_name: table_name.into(), 28 | column_name: column_name.into(), 29 | database_name: database_name.into(), 30 | } 31 | } 32 | } 33 | 34 | impl Transformer for TransientTransformer { 35 | fn id(&self) -> &str { 36 | "transient" 37 | } 38 | 39 | fn description(&self) -> &str { 40 | "Does not modify the value." 41 | } 42 | 43 | fn database_name(&self) -> &str { 44 | self.database_name.as_str() 45 | } 46 | 47 | fn table_name(&self) -> &str { 48 | self.table_name.as_str() 49 | } 50 | 51 | fn column_name(&self) -> &str { 52 | self.column_name.as_str() 53 | } 54 | 55 | fn transform(&self, column: Column) -> Column { 56 | column 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /website/src/css/custom.css: -------------------------------------------------------------------------------- 1 | /** 2 | * Any CSS included here will be global. The classic template 3 | * bundles Infima by default. Infima is a CSS framework designed to 4 | * work well for content-centric websites. 5 | */ 6 | 7 | /* You can override the default Infima variables here. */ 8 | :root { 9 | --ifm-color-primary: #5B50D6; 10 | --ifm-color-primary-dark: #433AB8; 11 | --ifm-color-primary-darker: #2F289A; 12 | --ifm-color-primary-darkest: #130F66; 13 | --ifm-color-primary-light: #847AE6; 14 | --ifm-color-primary-lighter: #C2BCFA; 15 | --ifm-color-primary-lightest: #F2F3FE; 16 | --ifm-footer-background-color: #f8f8fd; 17 | --ifm-code-font-size: 95%; 18 | } 19 | 20 | /* For readability concerns, you should choose a lighter palette in dark mode. */ 21 | [data-theme='dark'] { 22 | --ifm-color-primary: #C2BCFA; 23 | --ifm-color-primary-dark: #5B50D6; 24 | --ifm-color-primary-darker: #433AB8; 25 | --ifm-color-primary-darkest: #2F289A; 26 | --ifm-color-primary-light: #C2BCFA; 27 | --ifm-color-primary-lighter: #E0DDFC; 28 | --ifm-color-primary-lightest: #F2F3FE; 29 | --ifm-footer-background-color: #151B2B; 30 | } 31 | 32 | .docusaurus-highlight-code-line { 33 | background-color: #090c13; 34 | display: block; 35 | margin: 0 calc(-1 * var(--ifm-pre-padding)); 36 | padding: 0 var(--ifm-pre-padding); 37 | } 38 | 39 | [data-theme='dark'] .docusaurus-highlight-code-line { 40 | background-color: #151B2B; 41 | } 42 | 43 | [data-theme='dark'] .navbar { 44 | background-color: #151B2B; 45 | } 46 | 47 | [data-theme='dark'] { 48 | background-color: #151B2B; 49 | } 50 | -------------------------------------------------------------------------------- /dump-parser/src/lib.rs: -------------------------------------------------------------------------------- 1 | use std::io::{BufReader, Read}; 2 | 3 | use crate::errors::DumpFileError; 4 | 5 | pub mod errors; 6 | pub mod mongodb; 7 | pub mod mysql; 8 | pub mod postgres; 9 | pub mod utils; 10 | 11 | #[derive(Debug, PartialOrd, PartialEq, Ord, Eq)] 12 | pub enum Type { 13 | Postgres, 14 | Mysql, 15 | } 16 | 17 | pub trait LogicalDatabase<'a, T> 18 | where 19 | T: Table, 20 | { 21 | fn name(&self) -> &str; 22 | fn tables(&self) -> Result, DumpFileError>; 23 | } 24 | 25 | pub trait Table { 26 | fn rows(&self) -> &'static Vec; 27 | } 28 | 29 | #[derive(Debug, Hash, Eq, PartialEq)] 30 | pub struct Row { 31 | columns: Vec, 32 | } 33 | 34 | #[derive(Debug, Hash, Eq, PartialEq)] 35 | pub struct Column { 36 | name: String, 37 | value: Vec, 38 | } 39 | 40 | pub trait Database<'a, LD, T> 41 | where 42 | LD: LogicalDatabase<'a, T>, 43 | T: Table, 44 | { 45 | fn database_type(&self) -> Type; 46 | /// list logical databases available 47 | fn databases(&self, dump_reader: BufReader) -> Result, DumpFileError>; 48 | /// find a logical database by name 49 | fn get_database, R: Read>( 50 | &self, 51 | name: S, 52 | dump_reader: BufReader, 53 | ) -> Result, DumpFileError> { 54 | let databases = self.databases(dump_reader)?; 55 | 56 | let db_name = name.into(); 57 | for db in databases { 58 | if db.name() == db_name { 59 | return Ok(Some(db)); 60 | } 61 | } 62 | 63 | Ok(None) 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /replibyte/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | edition = "2021" 3 | version = "0.10.0" 4 | name = "replibyte" 5 | authors = ["Qovery Team", "Fab", "Benny", "Contributos"] 6 | 7 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 8 | 9 | [dependencies] 10 | dump-parser = { path = "../dump-parser" } 11 | subset = { path = "../subset" } 12 | rand = "0.8.5" 13 | anyhow = "1.0.56" 14 | serde_yaml = "0.8" 15 | serde_json = "1.0" 16 | aws-config = "0.9.0" 17 | aws-smithy-client = "0.39.0" 18 | aws-smithy-http = "0.39.0" 19 | aws-sdk-s3 = "0.9.0" 20 | aws-types = "0.9.0" 21 | tokio = { version = "1", features = ["full"] } 22 | rustls = "0.20.4" 23 | clap = { version = "3.1", features = ["derive"] } 24 | serde = { version = "1.0", features = ["derive"] } 25 | lazy_static = "1.4.0" 26 | fake = "2.4" 27 | log = "0.4" 28 | env_logger = "0.9" 29 | prettytable-rs = "0.8" 30 | timeago = "0.3" 31 | indicatif = "0.16" 32 | http = "0.2" 33 | flate2 = "1.0" 34 | bson = "2.2" 35 | aes-gcm = "0.9" 36 | which = "4.2.5" 37 | mongodb-schema-parser = { git = "https://github.com/mongodb-rust/mongodb-schema-parser.git", rev = "2d489307dd70b63b216a9968f7dec7c217108b32" } 38 | url = "2.2.2" 39 | tempfile = "3.3" 40 | ctrlc = "3.2.1" 41 | reqwest = { version = "0.11", features = ["blocking"] } 42 | chrono = {version = "0.4", features = ["serde"] } 43 | machine-uid = "0.2" 44 | percent-encoding = "2.1.0" 45 | 46 | # FIXME removed until the CI release pipeline is fixed 47 | #wasmer = { version = "2.2", optional = true } 48 | wasmer = { version = "2.2" } 49 | # FIXME same as above 50 | #wasmer-wasi = { version = "2.2", optional = true } 51 | wasmer-wasi = { version = "2.2" } 52 | 53 | # FIXME same as above 54 | #[features] 55 | #wasm = ["wasmer", "wasmer-wasi"] 56 | -------------------------------------------------------------------------------- /docker-compose-postgres-minio.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | 3 | services: 4 | source-postgres: 5 | image: postgres:13 6 | restart: always 7 | healthcheck: 8 | test: [ "CMD", "pg_isready", "-q", "-d", "postgres", "-U", "root" ] 9 | timeout: 45s 10 | interval: 10s 11 | retries: 10 12 | environment: 13 | - POSTGRES_USER=root 14 | - POSTGRES_PASSWORD=password 15 | volumes: 16 | - ./db/postgres:/docker-entrypoint-initdb.d/ 17 | ports: 18 | - 5432:5432 19 | dest-postgres: 20 | image: postgres:13 21 | restart: always 22 | healthcheck: 23 | test: [ "CMD", "pg_isready", "-q", "-d", "postgres", "-U", "root" ] 24 | timeout: 45s 25 | interval: 10s 26 | retries: 10 27 | environment: 28 | - POSTGRES_USER=root 29 | - POSTGRES_PASSWORD=password 30 | ports: 31 | - 5453:5432 32 | bridge-minio: 33 | image: minio/minio:RELEASE.2022-03-17T06-34-49Z 34 | restart: always 35 | command: server --console-address ":9001" /data/minio/ 36 | healthcheck: 37 | test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"] 38 | interval: 30s 39 | timeout: 20s 40 | retries: 3 41 | environment: 42 | MINIO_ROOT_USER: minioadmin 43 | MINIO_ROOT_PASSWORD: minioadmin 44 | ports: 45 | - 9000:9000 46 | - 9001:9001 47 | create-minio-bucket: 48 | image: minio/mc 49 | depends_on: 50 | - bridge-minio 51 | entrypoint: > 52 | /bin/sh -c " 53 | /usr/bin/mc config host add myminio http://bridge-minio:9000 minioadmin minioadmin; 54 | /usr/bin/mc rm -r --force myminio/replibyte-test; 55 | /usr/bin/mc mb myminio/replibyte-test; 56 | /usr/bin/mc policy download myminio/replibyte-test; 57 | exit 0; 58 | " 59 | -------------------------------------------------------------------------------- /website/docs/contributing.md: -------------------------------------------------------------------------------- 1 | --- 2 | sidebar_position: 11 3 | --- 4 | 5 | # Contributing 6 | 7 | :::tip 8 | 9 | If you are non-experienced in Rust, consider picking issues with label [good first issue](https://github.com/Qovery/replibyte/issues?q=is%3Aopen+is%3Aissue+label%3A%22good+first+issue%22). 10 | 11 | ::: 12 | 13 | ## Local development 14 | 15 | For local development, you will need to: 16 | 17 | 1. Install [Rust](https://www.rust-lang.org/). 18 | 2. Install [Docker](https://www.docker.com). 19 | 3. Run `docker compose -f ./docker-compose-dev.yml up` to 20 | start the local databases. At the moment, `docker-compose` includes 2 PostgreSQL database instances, 2 MySQL instances, 2 MongoDB instances 21 | and a [MinIO](https://min.io) datastore. One source, one destination by database and one datastore. In the future, we will provide more options. 22 | 23 | The Minio console is accessible at http://localhost:9001. 24 | 25 | Once your Docker instances are running, you can run the RepliByte tests, to check if everything is configured correctly: 26 | 27 | ```shell 28 | AWS_ACCESS_KEY_ID=minioadmin AWS_SECRET_ACCESS_KEY=minioadmin cargo test 29 | ``` 30 | 31 | To check that your development environment works well, you can run all the tests locally with: 32 | 33 | ```shell 34 | cargo test --all 35 | ``` 36 | 37 | ## How to contribute 38 | 39 | RepliByte is in its early stage of development and need some time to be usable in production. We need some help, and you are welcome to 40 | contribute. To better synchronize consider joining our #replibyte channel on our [Discord](https://discord.qovery.com). Otherwise, you can 41 | pick [opened issues](https://github.com/Qovery/replibyte/issues) and contribute. 42 | 43 | ## Where should I start? 44 | 45 | Check [opened issues](https://github.com/Qovery/replibyte/issues). 46 | 47 | -------------------------------------------------------------------------------- /docker-compose-mongodb-minio.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | 3 | services: 4 | source-mongodb: 5 | image: mongo:5 6 | restart: always 7 | healthcheck: 8 | test: echo 'db.runCommand("ping").ok' | mongo localhost:27017/test --quiet 9 | timeout: 45s 10 | interval: 10s 11 | retries: 10 12 | environment: 13 | - MONGO_INITDB_ROOT_USERNAME=root 14 | - MONGO_INITDB_ROOT_PASSWORD=password 15 | volumes: 16 | - ./db/mongodb:/docker-entrypoint-initdb.d/ 17 | ports: 18 | - 27017:27017 19 | dest-mongodb: 20 | image: mongo:5 21 | restart: always 22 | healthcheck: 23 | test: echo 'db.runCommand("ping").ok' | mongo localhost:27017/test --quiet 24 | timeout: 45s 25 | interval: 10s 26 | retries: 10 27 | environment: 28 | - MONGO_INITDB_ROOT_USERNAME=root 29 | - MONGO_INITDB_ROOT_PASSWORD=password 30 | ports: 31 | - 27018:27017 32 | bridge-minio: 33 | image: minio/minio:RELEASE.2022-03-17T06-34-49Z 34 | restart: always 35 | command: server --console-address ":9001" /data/minio/ 36 | healthcheck: 37 | test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"] 38 | interval: 30s 39 | timeout: 20s 40 | retries: 3 41 | environment: 42 | MINIO_ROOT_USER: minioadmin 43 | MINIO_ROOT_PASSWORD: minioadmin 44 | ports: 45 | - 9000:9000 46 | - 9001:9001 47 | create-minio-bucket: 48 | image: minio/mc 49 | depends_on: 50 | - bridge-minio 51 | entrypoint: > 52 | /bin/sh -c " 53 | /usr/bin/mc config host add myminio http://bridge-minio:9000 minioadmin minioadmin; 54 | /usr/bin/mc rm -r --force myminio/replibyte-test; 55 | /usr/bin/mc mb myminio/replibyte-test; 56 | /usr/bin/mc policy download myminio/replibyte-test; 57 | exit 0; 58 | " -------------------------------------------------------------------------------- /website/docs/guides/3-subset-a-dump.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: III. Subset a dump 3 | sidebar_position: 3 4 | --- 5 | 6 | # Subset a dump 7 | 8 | :::caution 9 | 10 | Only PostgreSQL supports *Subsetting* at the moment. Feel free to [contribute](/docs/contributing) to accelerate the support of MySQL and MongoDB 11 | 12 | ::: 13 | 14 | Subsetting is a powerful feature to only import a smaller consistent part from your production database. 15 | 16 | ## How Subsetting works 17 | 18 | Check out how subsetting works under the hood [here](/docs/design/how-database-subset-works). 19 | 20 | ## Configuration 21 | 22 | Using Subsetting feature is as simple as adding new parameters in your `conf.yaml` 23 | 24 | ```yaml title="add database_subset object" 25 | source: 26 | connection_uri: postgres://user:password@host:port/db 27 | transformers: 28 | - database: public 29 | table: customers 30 | columns: 31 | - name: first_name 32 | transformer_name: first-name 33 | - name: last_name 34 | transformer_name: random 35 | - name: contact_phone 36 | transformer_name: phone-number 37 | - name: contact_email 38 | transformer_name: email 39 | database_subset: 40 | database: public 41 | table: customers 42 | strategy_name: random 43 | strategy_options: 44 | percent: 10 45 | passthrough_tables: 46 | - product_catalog 47 | ``` 48 | 49 | By applying this configuration, Replibyte will: 50 | 51 | * Keep around 10% of the full database 52 | * Go down the whole tables linked to `public.customers` 53 | * Keep the whole rows from product_catalog 54 | 55 | ## Subset Strategy 56 | 57 | TODO 58 | 59 | ## Considerations 60 | 61 | This feature is still under active improvement. Feel free to [open an issue](https://github.com/Qovery/Replibyte/issues/new) if you face any trouble. 62 | -------------------------------------------------------------------------------- /docker-compose-mysql-minio.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | 3 | services: 4 | source-mysql: 5 | image: mysql:8 6 | restart: always 7 | command: --default-authentication-plugin=mysql_native_password 8 | healthcheck: 9 | test: ['CMD', 'mysqladmin', 'ping', '-u', 'root', '--password=password'] 10 | timeout: 45s 11 | interval: 10s 12 | retries: 10 13 | environment: 14 | - MYSQL_ROOT_PASSWORD=password 15 | volumes: 16 | - ./db/mysql:/docker-entrypoint-initdb.d 17 | ports: 18 | - 3306:3306 19 | dest-mysql: 20 | image: mysql:8 21 | restart: always 22 | command: --default-authentication-plugin=mysql_native_password 23 | healthcheck: 24 | test: ['CMD', 'mysqladmin', 'ping', '-u', 'root', '--password=password'] 25 | timeout: 45s 26 | interval: 10s 27 | retries: 10 28 | environment: 29 | - MYSQL_ROOT_PASSWORD=password 30 | ports: 31 | - 3307:3306 32 | bridge-minio: 33 | image: minio/minio:RELEASE.2022-03-17T06-34-49Z 34 | restart: always 35 | command: server --console-address ":9001" /data/minio/ 36 | healthcheck: 37 | test: ['CMD', 'curl', '-f', 'http://localhost:9000/minio/health/live'] 38 | interval: 30s 39 | timeout: 20s 40 | retries: 3 41 | environment: 42 | MINIO_ROOT_USER: minioadmin 43 | MINIO_ROOT_PASSWORD: minioadmin 44 | ports: 45 | - 9000:9000 46 | - 9001:9001 47 | create-minio-bucket: 48 | image: minio/mc 49 | depends_on: 50 | - bridge-minio 51 | entrypoint: > 52 | /bin/sh -c " 53 | /usr/bin/mc config host add myminio http://bridge-minio:9000 minioadmin minioadmin; 54 | /usr/bin/mc rm -r --force myminio/replibyte-test; 55 | /usr/bin/mc mb myminio/replibyte-test; 56 | /usr/bin/mc policy download myminio/replibyte-test; 57 | exit 0; 58 | " 59 | -------------------------------------------------------------------------------- /website/docs/faq.md: -------------------------------------------------------------------------------- 1 | --- 2 | sidebar_position: 12 3 | --- 4 | 5 | # FAQ 6 | 7 | :::tip 8 | 9 | [Open an issue](https://github.com/Qovery/replibyte/issues/new) if you don't find the answer to your question. 10 | 11 | ::: 12 | 13 | ### What language is used for Replibyte? 14 | 15 | [Rust](https://www.rust-lang.org/) 16 | 17 | ### Why using Rust? 18 | 19 | Replibyte is a IO intensive tool that need to process data as fast as possible. Rust is a perfect candidate for high throughput and low 20 | memory consumption. 21 | 22 | ### Does RepliByte is an ETL? 23 | 24 | RepliByte is not an ETL like [AirByte](https://github.com/airbytehq/airbyte), [AirFlow](https://airflow.apache.org/), Talend, and it will 25 | never be. If you need to synchronize versatile data sources, you are better choosing a classic ETL. RepliByte is a tool for software 26 | engineers to help them to synchronize data from the same databases. With RepliByte, you can only replicate data from the same type of 27 | databases. As mentioned above, the primary purpose of RepliByte is to duplicate into different environments. You can see RepliByte as a 28 | specific use case of an ETL, where an ETL is more generic. 29 | 30 | ### Do you support backup from a dump file? 31 | 32 | absolutely, 33 | 34 | ```shell 35 | cat dump.sql | replibyte -c conf.yaml backup run -s postgres -i 36 | ``` 37 | 38 | and 39 | 40 | ```shell 41 | replibyte -c conf.yaml backup run -s postgres -f dump.sql 42 | ``` 43 | 44 | ### How RepliByte can list the dumps? Is there an API? 45 | 46 | There is no API, RepliByte is fully stateless and store the dump list into the datastore (E.g. S3) via an metadata file. 47 | 48 | ### How can I contact you? 49 | 50 | 3 options: 51 | 52 | 1. [Open an issue](https://github.com/Qovery/replibyte/issues/new). 53 | 2. Join our #replibyte channel on [our discord](https://discord.qovery.com). 54 | 3. Drop us an email to `github+replibyte {at} qovery {dot} com`. 55 | -------------------------------------------------------------------------------- /subset/src/dedup.rs: -------------------------------------------------------------------------------- 1 | use std::fs::{File, OpenOptions}; 2 | use std::io::{BufRead, BufReader, Error, Write}; 3 | use std::path::Path; 4 | 5 | pub type Line<'a> = &'a str; 6 | pub type GroupHash = String; 7 | 8 | /// Create or find the appropriate file based on the `group_hash` and append the line if it does not already exist. 9 | pub fn does_line_exist_and_set( 10 | temp_directory: &Path, 11 | group_hash: &GroupHash, 12 | line: Line, 13 | ) -> Result { 14 | if does_line_exist(temp_directory, group_hash, line)? { 15 | return Ok(true); 16 | } 17 | 18 | let file_path = temp_directory.join(group_hash); 19 | 20 | // append the line because it does not exist 21 | let mut file = OpenOptions::new() 22 | .write(true) 23 | .append(true) 24 | .truncate(false) 25 | .open(file_path.as_path())?; 26 | 27 | let line = format!("{}\n", line.trim_start().trim_end()); 28 | let _ = file.write(line.as_bytes())?; 29 | 30 | Ok(false) 31 | } 32 | 33 | pub fn does_line_exist( 34 | temp_directory: &Path, 35 | group_hash: &GroupHash, 36 | line: Line, 37 | ) -> Result { 38 | let file_path = temp_directory.join(group_hash); 39 | let file = match File::open(file_path.as_path()) { 40 | Ok(file) => file, 41 | Err(_) => File::create(file_path.as_path())?, 42 | }; 43 | 44 | let mut buf = String::new(); 45 | let mut reader = BufReader::new(&file); 46 | // remove potential whitespaces and \n 47 | let line = line.trim_start().trim_end(); 48 | while let Ok(amount) = reader.read_line(&mut buf) { 49 | if amount == 0 { 50 | // EOF 51 | break; 52 | } 53 | 54 | if buf.as_str().trim_start().trim_end() == line { 55 | // the line already exist in the file, we can stop here 56 | return Ok(true); 57 | } 58 | 59 | let _ = buf.clear(); 60 | } 61 | 62 | Ok(false) 63 | } 64 | -------------------------------------------------------------------------------- /replibyte/src/commands/source.rs: -------------------------------------------------------------------------------- 1 | use std::io::{Error, ErrorKind}; 2 | 3 | use crate::config::{Config, ConnectionUri}; 4 | use crate::source::Explain; 5 | use crate::source::mongodb::MongoDB; 6 | use crate::source::mysql::Mysql; 7 | use crate::source::postgres::Postgres; 8 | 9 | /// show the database schema 10 | pub fn schema(config: Config) -> anyhow::Result<()> { 11 | match config.source { 12 | Some(source) => { 13 | match source.connection_uri()? { 14 | ConnectionUri::Postgres(host, port, username, password, database) => { 15 | let postgres = Postgres::new( 16 | host.as_str(), 17 | port, 18 | database.as_str(), 19 | username.as_str(), 20 | password.as_str(), 21 | ); 22 | 23 | postgres.schema()?; 24 | 25 | Ok(()) 26 | } 27 | ConnectionUri::Mysql(host, port, username, password, database) => { 28 | let mysql = Mysql::new( 29 | host.as_str(), 30 | port, 31 | database.as_str(), 32 | username.as_str(), 33 | password.as_str(), 34 | ); 35 | 36 | mysql.schema()?; 37 | 38 | Ok(()) 39 | } 40 | ConnectionUri::MongoDB(uri, database) => { 41 | let mongodb = MongoDB::new(uri.as_str(), database.as_str()); 42 | 43 | mongodb.schema()?; 44 | 45 | Ok(()) 46 | } 47 | } 48 | } 49 | None => { 50 | Err(anyhow::Error::from(Error::new( 51 | ErrorKind::Other, 52 | "missing object in the configuration file", 53 | ))) 54 | } 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /website/docs/getting-started/concepts.md: -------------------------------------------------------------------------------- 1 | --- 2 | sidebar_position: 3 3 | --- 4 | 5 | # Concepts 6 | 7 | To better use Replibyte, here are a list of the different concept to have in mind. 8 | 9 | :::tip 10 | 11 | I assume you read "[How Replibyte works](/docs/how-replibyte-works)" 12 | 13 | ::: 14 | 15 | ```mermaid 16 | flowchart LR 17 | Source --> Subset --> Transformer --> Datastore --> Destination 18 | ``` 19 | 20 | ## Source 21 | 22 | A source is a database from where Replibyte will create the dump. The database dump can be provided from a file, as a process 23 | input or be created by Replibyte. 24 | 25 | ## Subset 26 | 27 | Subsetting data is the process of taking a representative sample of your data in a manner that preserves the integrity of your database, e.g., give me 5% of all transactions or pull all data associated with customers who live in California. If you do this naively, your database will break foreign key constraints, or you’ll end up with a statistically non-representative data sample. Here are a few situations in which you might find subsetting data to be important or necessary: 28 | 1. You’d like to use your production database in staging or test environments but the database is very large so you want to use only a portion of it. 29 | 2. You’d like a test database that contains a few specific rows from production (and related rows from other tables) so you can reproduce a bug. 30 | 3. You want to share data with others, but you don’t want them to have all of it. A common scenario is providing developers an anonymized subset which also enables them to run the test database locally on their own machines. 31 | 32 | ## Transformer 33 | 34 | A transformer is a module to alternate the value of a specified column. Replibyte provides pre-made [transformers](/docs/transformers). You can also [build your own Transformer in web assembly](/docs/transformers#wasm). 35 | 36 | ## Datastore 37 | 38 | A datastore is where the source dump is stored. E.g a dump can store into a S3 datastore or on a local hard drive. 39 | 40 | ## Destination 41 | 42 | A destination is a database where the dump will is restored. 43 | -------------------------------------------------------------------------------- /.github/workflows/build-and-test.yml: -------------------------------------------------------------------------------- 1 | on: [push, pull_request] 2 | 3 | name: Build and Test 4 | 5 | jobs: 6 | test: 7 | strategy: 8 | matrix: 9 | os: [ubuntu-latest] 10 | rust-toolchain: [stable] 11 | fail-fast: false 12 | 13 | runs-on: ${{ matrix.os }} 14 | 15 | steps: 16 | - name: Checkout code 17 | uses: actions/checkout@v2 18 | 19 | - name: Install Rust toolchain 20 | uses: actions-rs/toolchain@v1 21 | with: 22 | toolchain: ${{ matrix.rust-toolchain }} 23 | components: rustfmt 24 | override: true 25 | 26 | - name: Install Build Essentials 27 | run: sudo apt-get install build-essential mingw-w64 gcc 28 | 29 | - name: Verify versions 30 | run: rustc --version && rustup --version && cargo --version 31 | 32 | - name: Cache build artifacts 33 | id: cache-cargo 34 | uses: actions/cache@v2 35 | with: 36 | path: | 37 | ~/.cargo/registry 38 | ~/.cargo/git 39 | target 40 | key: ${{ runner.os }}-cargo-${{ matrix.rust-toolchain }} 41 | 42 | - name: Cache integration artifacts 43 | id: cache-integration 44 | uses: actions/cache@v2 45 | with: 46 | path: | 47 | tests/integration/runner/node_modules 48 | key: ${{ runner.os }}-integration-${{ matrix.rust-toolchain }} 49 | 50 | - name: Build RepliByte 51 | run: cargo build --release --all-features 52 | 53 | - name: Start Postgres, MySQL, MongoDB and MinIO Containers 54 | run: docker-compose -f "docker-compose-dev.yml" up -d --build 55 | 56 | - name: Test RepliByte 57 | env: 58 | AWS_REGION: ${{ secrets.AWS_REGION }} 59 | run: cargo test --all-features 60 | 61 | # - name: Bench RepliByte 62 | # run: cargo bench 63 | 64 | #- name: Check RepliByte code style 65 | # run: cargo fmt -- --check 66 | 67 | - name: Stop Postgres, MySQL, MongoDB and MinIO Containers 68 | if: always() 69 | run: docker-compose -f "docker-compose-dev.yml" down --remove-orphans 70 | -------------------------------------------------------------------------------- /website/docs/databases.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | sidebar_position: 5 3 | --- 4 | 5 | # Databases 6 | 7 | Replibyte supports [PostgreSQL](#postgresql), [MySQL](#mysql--mariadb) and [MongoDB](#mongodb) databases. 8 | 9 | ## PostgreSQL 10 | 11 | :::caution requirements 12 | 13 | You need `pg_dump` binary locally installed for running `replibyte backup --remote` and `replibyte restore --remote` 14 | 15 | ::: 16 | 17 | To use PostgreSQL it's as simple as using prefixed connection URI with `postgres://` 18 | 19 | ```yaml 20 | source: 21 | connection_uri: postgres://:@:/ # you can use $DATABASE_URL 22 | #... 23 | destination: 24 | connection_uri: postgres://:@:/ # you can use $DATABASE_URL 25 | ``` 26 | 27 | ## MySQL / MariaDB 28 | 29 | :::caution requirements 30 | 31 | You need `mysqldump` binary locally installed for running `replibyte backup --remote` and `replibyte restore --remote` 32 | 33 | ::: 34 | 35 | To use MySQL or MariaDB it's as simple as using prefixed connection URI with `mysql://` 36 | 37 | ```yaml 38 | source: 39 | connection_uri: mysql://:@:/ # you can use $DATABASE_URL 40 | #... 41 | destination: 42 | connection_uri: mysql://:@:/ # you can use $DATABASE_URL 43 | ``` 44 | 45 | ## MongoDB 46 | 47 | :::caution requirements 48 | 49 | You need `mongodump` binary locally installed for running `replibyte backup --remote` and `replibyte restore --remote` 50 | 51 | ::: 52 | 53 | To use MongoDB it's as simple as using prefixed connection URI with `mongodb://` 54 | 55 | ```yaml 56 | source: 57 | connection_uri: mongodb://:@:/? # you can use $DATABASE_URL 58 | #... 59 | destination: 60 | connection_uri: mongodb://:@:/? # you can use $DATABASE_URL 61 | ``` 62 | 63 | 64 | ## Add another database 65 | 66 | If you don't find your database, Replibyte is extensible and any database can be supported. You are free to contribute by opening an issue or/and a pull request. 67 | 68 | To contribute, please see the [contributing](/docs/contributing) page. 69 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM rust:1.59-buster as build 2 | 3 | # create a new empty shell project 4 | RUN USER=root cargo new --bin replibyte 5 | WORKDIR /replibyte 6 | RUN USER=root cargo new --lib replibyte 7 | RUN USER=root cargo new --lib dump-parser 8 | RUN USER=root cargo new --lib subset 9 | 10 | # copy over your manifests 11 | # root 12 | COPY ./Cargo.lock ./Cargo.lock 13 | COPY ./Cargo.toml ./Cargo.toml 14 | 15 | # dump-parser 16 | COPY ./dump-parser ./dump-parser 17 | 18 | # subset 19 | COPY ./subset ./subset 20 | 21 | # replibyte 22 | COPY ./replibyte/Cargo.toml ./replibyte/Cargo.toml 23 | COPY ./replibyte/Cargo.lock ./replibyte/Cargo.lock 24 | 25 | # this build step will cache your dependencies 26 | RUN cargo build --release 27 | RUN rm src/*.rs 28 | 29 | # copy your source tree 30 | COPY ./replibyte/src ./replibyte/src 31 | COPY ./dump-parser/src ./dump-parser/src 32 | COPY ./subset/src ./subset/src 33 | 34 | # build for release 35 | RUN rm ./target/release/deps/replibyte* 36 | RUN cargo build --release 37 | 38 | # our final base 39 | FROM debian:buster-slim 40 | 41 | # used to configure Github Packages 42 | LABEL org.opencontainers.image.source https://github.com/qovery/replibyte 43 | 44 | # Install Postgres and MySQL binaries 45 | RUN apt-get clean && apt-get update && apt-get install -y \ 46 | wget \ 47 | postgresql-client \ 48 | default-mysql-client 49 | 50 | # Install MongoDB tools 51 | RUN wget https://fastdl.mongodb.org/tools/db/mongodb-database-tools-debian92-x86_64-100.5.2.deb && \ 52 | apt install ./mongodb-database-tools-*.deb && \ 53 | rm -f mongodb-database-tools-*.deb && \ 54 | rm -rf /var/lib/apt/lists/* 55 | 56 | # copy the build artifact from the build stage 57 | COPY --from=build /replibyte/target/release/replibyte . 58 | 59 | COPY ./docker/* / 60 | RUN chmod +x exec.sh && chmod +x replibyte 61 | 62 | ARG S3_ACCESS_KEY_ID 63 | ENV S3_ACCESS_KEY_ID $S3_ACCESS_KEY_ID 64 | 65 | ARG S3_SECRET_ACCESS_KEY 66 | ENV S3_SECRET_ACCESS_KEY $S3_SECRET_ACCESS_KEY 67 | 68 | ARG S3_REGION 69 | ENV S3_REGION $S3_REGION 70 | 71 | ARG S3_BUCKET 72 | ENV S3_BUCKET $S3_BUCKET 73 | 74 | ARG SOURCE_CONNECTION_URI 75 | ENV SOURCE_CONNECTION_URI $SOURCE_CONNECTION_URI 76 | 77 | ARG DESTINATION_CONNECTION_URI 78 | ENV DESTINATION_CONNECTION_URI $DESTINATION_CONNECTION_URI 79 | 80 | ARG ENCRYPTION_SECRET 81 | ENV ENCRYPTION_SECRET $ENCRYPTION_SECRET 82 | 83 | ENTRYPOINT ["sh", "exec.sh"] 84 | -------------------------------------------------------------------------------- /replibyte/src/types.rs: -------------------------------------------------------------------------------- 1 | pub type Bytes = Vec; 2 | pub type OriginalQuery = Query; 3 | 4 | pub type Queries = Vec; 5 | 6 | pub fn to_bytes(queries: Queries) -> Bytes { 7 | queries 8 | .into_iter() 9 | .flat_map(|query| { 10 | let mut bytes = query.0; 11 | bytes.push(b'\n'); 12 | bytes 13 | }) 14 | .collect::>() 15 | } 16 | 17 | #[derive(Debug, Clone, Eq, PartialEq, Hash)] 18 | pub struct Query(pub Vec); 19 | 20 | impl Query { 21 | pub fn data(&self) -> &Vec { 22 | &self.0 23 | } 24 | } 25 | 26 | #[derive(Clone)] 27 | pub struct InsertIntoQuery { 28 | pub table_name: String, 29 | pub columns: Vec, 30 | } 31 | 32 | #[derive(Clone)] 33 | pub enum Column { 34 | NumberValue(String, i128), 35 | FloatNumberValue(String, f64), 36 | StringValue(String, String), 37 | CharValue(String, char), 38 | BooleanValue(String, bool), 39 | None(String), 40 | } 41 | 42 | impl Column { 43 | pub fn name(&self) -> &str { 44 | match self { 45 | Column::NumberValue(name, _) => name.as_str(), 46 | Column::FloatNumberValue(name, _) => name.as_str(), 47 | Column::StringValue(name, _) => name.as_str(), 48 | Column::CharValue(name, _) => name.as_str(), 49 | Column::BooleanValue(name, _) => name.as_str(), 50 | Column::None(name) => name.as_str(), 51 | } 52 | } 53 | 54 | pub fn number_value(&self) -> Option<&i128> { 55 | match self { 56 | Column::NumberValue(_, value) => Some(value), 57 | _ => None, 58 | } 59 | } 60 | 61 | pub fn string_value(&self) -> Option<&str> { 62 | match self { 63 | Column::StringValue(_, value) => Some(value.as_str()), 64 | _ => None, 65 | } 66 | } 67 | 68 | pub fn float_number_value(&self) -> Option<&f64> { 69 | match self { 70 | Column::FloatNumberValue(_, value) => Some(value), 71 | _ => None, 72 | } 73 | } 74 | 75 | pub fn char_value(&self) -> Option<&char> { 76 | match self { 77 | Column::CharValue(_, value) => Some(value), 78 | _ => None, 79 | } 80 | } 81 | 82 | pub fn boolean_value(&self) -> Option<&bool> { 83 | match self { 84 | Column::BooleanValue(_, value) => Some(value), 85 | _ => None, 86 | } 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /website/docs/how-replibyte-works.md: -------------------------------------------------------------------------------- 1 | --- 2 | sidebar_position: 2 3 | --- 4 | 5 | # How Replibyte works 6 | 7 | RepliByte is built to seed a development database with production data. Replibyte helps you to: 8 | 9 | 1. Create a development dump from production 10 | 2. [Subset](/docs/design/how-database-subset-works) your production dump into a more reasonable size. 11 | 3. Hide sensitive data via customizable [Transformers](/docs/transformers). 12 | 4. Make your development dump easily accessible from any remote and local databases. 13 | 14 | ## How creating a Replibyte dump works 15 | 16 | Here is an example of what happens while replicating a PostgreSQL database. 17 | 18 | ```mermaid 19 | sequenceDiagram 20 | participant RepliByte 21 | participant PostgreSQL (Source) 22 | participant AWS S3 (Datastore) 23 | PostgreSQL (Source)->>RepliByte: 1. Dump data 24 | loop 25 | RepliByte->>RepliByte: 2. Subsetting (optional) 26 | RepliByte->>RepliByte: 3. Transform sensitive data (optional) 27 | RepliByte->>RepliByte: 4. Compress data (optional) 28 | RepliByte->>RepliByte: 5. Encrypt data (optional) 29 | end 30 | RepliByte->>AWS S3 (Datastore): 6. Upload dump data 31 | RepliByte->>AWS S3 (Datastore): 7. Write index file 32 | ``` 33 | 34 | 1. RepliByte connects to the PostgreSQL Source database and makes a full SQL dump of it. 35 | 2. RepliByte receives the SQL dump, parse it, and generates random/fake information in real-time. 36 | 3. RepliByte streams and uploads the modified SQL dumps in real-time on AWS S3. 37 | 4. RepliByte keeps track of the uploaded SQL dumps by writing it into an index file. 38 | 39 | 40 | ## How loading a Replibyte dump works 41 | 42 | Once at least a replica from the source PostgreSQL database is available in the S3 bucket, RepliByte can use and inject it into the 43 | destination PostgreSQL database. 44 | 45 | ```mermaid 46 | sequenceDiagram 47 | participant RepliByte 48 | participant PostgreSQL (Destination) 49 | participant AWS S3 (Datastore) 50 | AWS S3 (Datastore)->>RepliByte: 1. Read index file 51 | AWS S3 (Datastore)->>RepliByte: 2. Download dump SQL file 52 | loop 53 | RepliByte->>RepliByte: 3. Decrypt data (if required) 54 | RepliByte->>RepliByte: 4. Uncompress data (if required) 55 | end 56 | RepliByte->>PostgreSQL (Destination): 5. Restore dump SQL 57 | ``` 58 | 59 | 1. RepliByte connects to the S3 bucket and reads the index file to retrieve the latest SQL to download. 60 | 2. RepliByte downloads the SQL dump in a stream bytes. 61 | 3. RepliByte restores the SQL dump in the destination PostgreSQL database in real-time. 62 | 63 | 64 | -------------------------------------------------------------------------------- /replibyte/src/transformer/random.rs: -------------------------------------------------------------------------------- 1 | use crate::transformer::Transformer; 2 | use crate::types::Column; 3 | use rand::distributions::Alphanumeric; 4 | use rand::Rng; 5 | 6 | /// This struct is dedicated to generating random elements. 7 | pub struct RandomTransformer { 8 | database_name: String, 9 | table_name: String, 10 | column_name: String, 11 | } 12 | 13 | impl RandomTransformer { 14 | pub fn new(database_name: S, table_name: S, column_name: S) -> Self 15 | where 16 | S: Into, 17 | { 18 | RandomTransformer { 19 | table_name: table_name.into(), 20 | column_name: column_name.into(), 21 | database_name: database_name.into(), 22 | } 23 | } 24 | } 25 | 26 | impl Default for RandomTransformer { 27 | fn default() -> Self { 28 | RandomTransformer { 29 | database_name: String::default(), 30 | table_name: String::default(), 31 | column_name: String::default(), 32 | } 33 | } 34 | } 35 | 36 | impl Transformer for RandomTransformer { 37 | fn id(&self) -> &str { 38 | "random" 39 | } 40 | 41 | fn description(&self) -> &str { 42 | "Randomize value but keep the same length (string only). [AAA]->[BBB]" 43 | } 44 | 45 | fn database_name(&self) -> &str { 46 | self.database_name.as_str() 47 | } 48 | 49 | fn table_name(&self) -> &str { 50 | self.table_name.as_str() 51 | } 52 | 53 | fn column_name(&self) -> &str { 54 | self.column_name.as_str() 55 | } 56 | 57 | fn transform(&self, column: Column) -> Column { 58 | let mut random = rand::thread_rng(); 59 | 60 | match column { 61 | Column::NumberValue(column_name, _) => { 62 | Column::NumberValue(column_name, random.gen::()) 63 | } 64 | Column::FloatNumberValue(column_name, _) => { 65 | Column::FloatNumberValue(column_name, random.gen::()) 66 | } 67 | Column::StringValue(column_name, value) => { 68 | let new_value = random 69 | .sample_iter(&Alphanumeric) 70 | .take(value.len()) 71 | .map(char::from) 72 | .collect::(); 73 | 74 | Column::StringValue(column_name, new_value) 75 | } 76 | Column::CharValue(column_name, _) => { 77 | Column::CharValue(column_name, random.gen::()) 78 | } 79 | Column::BooleanValue(column_name, value) => Column::BooleanValue(column_name, value), 80 | Column::None(column_name) => Column::None(column_name), 81 | } 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /replibyte/src/transformer/phone_number.rs: -------------------------------------------------------------------------------- 1 | use crate::transformer::Transformer; 2 | use crate::types::Column; 3 | use fake::faker::phone_number::raw::PhoneNumber; 4 | use fake::locales::EN; 5 | use fake::Fake; 6 | 7 | /// This struct is dedicated to replacing a string by an email address. 8 | pub struct PhoneNumberTransformer { 9 | database_name: String, 10 | table_name: String, 11 | column_name: String, 12 | } 13 | 14 | impl PhoneNumberTransformer { 15 | pub fn new(database_name: S, table_name: S, column_name: S) -> Self 16 | where 17 | S: Into, 18 | { 19 | PhoneNumberTransformer { 20 | database_name: database_name.into(), 21 | table_name: table_name.into(), 22 | column_name: column_name.into(), 23 | } 24 | } 25 | } 26 | 27 | impl Default for PhoneNumberTransformer { 28 | fn default() -> Self { 29 | PhoneNumberTransformer { 30 | database_name: String::default(), 31 | table_name: String::default(), 32 | column_name: String::default(), 33 | } 34 | } 35 | } 36 | 37 | impl Transformer for PhoneNumberTransformer { 38 | fn id(&self) -> &str { 39 | "phone-number" 40 | } 41 | 42 | fn description(&self) -> &str { 43 | "Generate a phone number (string only)." 44 | } 45 | 46 | fn database_name(&self) -> &str { 47 | self.database_name.as_str() 48 | } 49 | 50 | fn table_name(&self) -> &str { 51 | self.table_name.as_str() 52 | } 53 | 54 | fn column_name(&self) -> &str { 55 | self.column_name.as_str() 56 | } 57 | 58 | fn transform(&self, column: Column) -> Column { 59 | match column { 60 | Column::StringValue(column_name, _) => { 61 | Column::StringValue(column_name, PhoneNumber(EN).fake()) 62 | } 63 | column => column, 64 | } 65 | } 66 | } 67 | 68 | #[cfg(test)] 69 | mod tests { 70 | use crate::{transformer::Transformer, types::Column}; 71 | 72 | use super::PhoneNumberTransformer; 73 | 74 | #[test] 75 | fn transform_string_with_a_phone_number() { 76 | let transformer = get_transformer(); 77 | let column = Column::StringValue("phone_number".to_string(), "+123456789".to_string()); 78 | let transformed_column = transformer.transform(column); 79 | let transformed_value = transformed_column.string_value().unwrap(); 80 | 81 | assert!(!transformed_value.is_empty()); 82 | assert_ne!(transformed_value, "+123456789".to_string()); 83 | } 84 | 85 | fn get_transformer() -> PhoneNumberTransformer { 86 | PhoneNumberTransformer::new("github", "users", "phone_number") 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /replibyte/src/transformer/credit_card.rs: -------------------------------------------------------------------------------- 1 | use crate::transformer::Transformer; 2 | use crate::types::Column; 3 | use fake::faker::creditcard::raw::CreditCardNumber; 4 | use fake::locales::EN; 5 | use fake::Fake; 6 | 7 | /// This struct is dedicated to replacing a credit card string. 8 | pub struct CreditCardTransformer { 9 | database_name: String, 10 | table_name: String, 11 | column_name: String, 12 | } 13 | 14 | impl CreditCardTransformer { 15 | pub fn new(database_name: S, table_name: S, column_name: S) -> Self 16 | where 17 | S: Into, 18 | { 19 | CreditCardTransformer { 20 | database_name: database_name.into(), 21 | table_name: table_name.into(), 22 | column_name: column_name.into(), 23 | } 24 | } 25 | } 26 | 27 | impl Default for CreditCardTransformer { 28 | fn default() -> Self { 29 | CreditCardTransformer { 30 | database_name: String::default(), 31 | table_name: String::default(), 32 | column_name: String::default(), 33 | } 34 | } 35 | } 36 | 37 | impl Transformer for CreditCardTransformer { 38 | fn id(&self) -> &str { 39 | "credit-card" 40 | } 41 | 42 | fn description(&self) -> &str { 43 | "Generate a credit card number (string only)." 44 | } 45 | 46 | fn database_name(&self) -> &str { 47 | self.database_name.as_str() 48 | } 49 | 50 | fn table_name(&self) -> &str { 51 | self.table_name.as_str() 52 | } 53 | 54 | fn column_name(&self) -> &str { 55 | self.column_name.as_str() 56 | } 57 | 58 | fn transform(&self, column: Column) -> Column { 59 | match column { 60 | Column::StringValue(column_name, _value) => { 61 | Column::StringValue(column_name, CreditCardNumber(EN).fake()) 62 | } 63 | column => column, 64 | } 65 | } 66 | } 67 | 68 | #[cfg(test)] 69 | mod tests { 70 | use crate::{transformer::Transformer, types::Column}; 71 | 72 | use super::CreditCardTransformer; 73 | 74 | #[test] 75 | fn transform_string_with_a_credit_card() { 76 | let transformer = get_transformer(); 77 | let column = Column::StringValue("credit_card".to_string(), "4242424242424242".to_string()); 78 | let transformed_column = transformer.transform(column); 79 | let transformed_value = transformed_column.string_value().unwrap(); 80 | 81 | assert!(!transformed_value.is_empty()); 82 | assert_ne!(transformed_value, "4242424242424242".to_string()); 83 | } 84 | 85 | fn get_transformer() -> CreditCardTransformer { 86 | CreditCardTransformer::new("github", "users", "credit_card") 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /replibyte/src/migration/update_version_number.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | io::{Error, ErrorKind}, 3 | str::FromStr, 4 | }; 5 | 6 | use log::info; 7 | use serde_json::{json, Value}; 8 | 9 | use crate::datastore::Datastore; 10 | 11 | use super::{Migration, Version}; 12 | 13 | pub struct UpdateVersionNumber<'a> { 14 | version: &'a str, 15 | } 16 | 17 | impl<'a> UpdateVersionNumber<'a> { 18 | pub fn new(version: &'a str) -> Self { 19 | Self { version } 20 | } 21 | } 22 | 23 | impl<'a> Migration for UpdateVersionNumber<'a> { 24 | fn minimal_version(&self) -> Version { 25 | Version::from_str("0.7.3").unwrap() 26 | } 27 | 28 | fn run(&self, datastore: &Box) -> Result<(), Error> { 29 | info!("migrate: update version number"); 30 | 31 | let mut raw_index_file = datastore.raw_index_file()?; 32 | let _ = update_version(&mut raw_index_file, self.version)?; 33 | datastore.write_raw_index_file(&raw_index_file) 34 | } 35 | } 36 | 37 | fn update_version(metadata_json: &mut Value, version: &str) -> Result<(), Error> { 38 | match metadata_json.as_object_mut() { 39 | Some(metadata) => { 40 | metadata.insert("v".to_string(), json!(version)); 41 | Ok(()) 42 | } 43 | None => Err(Error::new( 44 | ErrorKind::Other, 45 | "migrate: metadata.json is not an object", 46 | )), 47 | } 48 | } 49 | 50 | #[cfg(test)] 51 | mod tests { 52 | use serde_json::json; 53 | 54 | use crate::migration::update_version_number::update_version; 55 | 56 | #[test] 57 | fn test_update_version() { 58 | let mut metadata_json = json!({"backups": []}); 59 | 60 | assert!(update_version(&mut metadata_json, "0.1.0").is_ok()); 61 | assert!(metadata_json.get("v").is_some()); 62 | assert_eq!(metadata_json.get("v").unwrap(), "0.1.0"); 63 | 64 | let mut metadata_json = json!({ 65 | "backups": [ 66 | { 67 | "directory_name":"dump-1653170039392", 68 | "size":62279, 69 | "created_at":1234, 70 | "compressed":true, 71 | "encrypted":false 72 | } 73 | ] 74 | }); 75 | assert!(update_version(&mut metadata_json, "0.2.0").is_ok()); 76 | assert!(metadata_json.get("v").is_some()); 77 | assert_eq!(metadata_json.get("v").unwrap(), "0.2.0"); 78 | 79 | let mut metadata_json = json!({"v": "0.7.3", "backups": []}); 80 | assert!(update_version(&mut metadata_json, "0.7.4").is_ok()); 81 | assert!(metadata_json.get("v").is_some()); 82 | assert_eq!(metadata_json.get("v").unwrap(), "0.7.4"); 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ### JetBrains template 2 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider 3 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 4 | 5 | # User-specific stuff 6 | .idea/**/workspace.xml 7 | .idea/**/tasks.xml 8 | .idea/**/usage.statistics.xml 9 | .idea/**/dictionaries 10 | .idea/**/shelf 11 | 12 | # Generated files 13 | .idea/**/contentModel.xml 14 | 15 | # Sensitive or high-churn files 16 | .idea/**/dataSources/ 17 | .idea/**/dataSources.ids 18 | .idea/**/dataSources.local.xml 19 | .idea/**/sqlDataSources.xml 20 | .idea/**/dynamic.xml 21 | .idea/**/uiDesigner.xml 22 | .idea/**/dbnavigator.xml 23 | 24 | # Gradle 25 | .idea/**/gradle.xml 26 | .idea/**/libraries 27 | 28 | # Gradle and Maven with auto-import 29 | # When using Gradle or Maven with auto-import, you should exclude module files, 30 | # since they will be recreated, and may cause churn. Uncomment if using 31 | # auto-import. 32 | # .idea/artifacts 33 | # .idea/compiler.xml 34 | # .idea/jarRepositories.xml 35 | # .idea/modules.xml 36 | # .idea/*.iml 37 | # .idea/modules 38 | # *.iml 39 | # *.ipr 40 | 41 | # CMake 42 | cmake-build-*/ 43 | 44 | # Mongo Explorer plugin 45 | .idea/**/mongoSettings.xml 46 | 47 | # File-based project format 48 | *.iws 49 | 50 | # IntelliJ 51 | out/ 52 | 53 | # mpeltonen/sbt-idea plugin 54 | .idea_modules/ 55 | 56 | # JIRA plugin 57 | atlassian-ide-plugin.xml 58 | 59 | # Cursive Clojure plugin 60 | .idea/replstate.xml 61 | 62 | # Crashlytics plugin (for Android Studio and IntelliJ) 63 | com_crashlytics_export_strings.xml 64 | crashlytics.properties 65 | crashlytics-build.properties 66 | fabric.properties 67 | 68 | # Editor-based Rest Client 69 | .idea/httpRequests 70 | 71 | # Android studio 3.1+ serialized cache file 72 | .idea/caches/build_file_checksums.ser 73 | 74 | ### Go template 75 | # Binaries for programs and plugins 76 | *.exe 77 | *.exe~ 78 | *.dll 79 | *.so 80 | *.dylib 81 | 82 | # Test binary, built with `go test -c` 83 | *.test 84 | 85 | # Output of the go coverage tool, specifically when used with LiteIDE 86 | *.out 87 | 88 | # Dependency directories (remove the comment below to include it) 89 | # vendor/ 90 | 91 | /target/ 92 | 93 | # Configuration files 94 | prod-conf.yaml 95 | .prod-conf.yaml 96 | 97 | 98 | # Dependencies 99 | /node_modules 100 | 101 | # Production 102 | /build 103 | 104 | # Generated files 105 | .docusaurus 106 | .cache-loader 107 | 108 | # Misc 109 | .DS_Store 110 | .env.local 111 | .env.development.local 112 | .env.test.local 113 | .env.production.local 114 | 115 | npm-debug.log* 116 | yarn-debug.log* 117 | yarn-error.log* 118 | 119 | /my-datastore 120 | 121 | *.release -------------------------------------------------------------------------------- /website/docs/getting-started/installation.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | sidebar_position: 2 3 | --- 4 | 5 | # Installation 6 | 7 |
8 | 9 | Install on MacOSX 10 | 11 | ```shell 12 | brew tap Qovery/replibyte 13 | brew install replibyte 14 | ``` 15 | 16 | Or [manually](https://github.com/Qovery/replibyte/releases). 17 | 18 |
19 | 20 |
21 | 22 | Install on Linux 23 | 24 | ```shell 25 | # download latest replibyte archive for Linux 26 | curl -s https://api.github.com/repos/Qovery/replibyte/releases/latest | \ 27 | jq -r '.assets[].browser_download_url' | \ 28 | grep -i 'linux-musl.tar.gz$' | wget -qi - && \ 29 | 30 | # unarchive 31 | tar zxf *.tar.gz 32 | 33 | # make replibyte executable 34 | chmod +x replibyte 35 | 36 | # make it accessible from everywhere 37 | mv replibyte /usr/local/bin/ 38 | ``` 39 | 40 |
41 | 42 |
43 | 44 | Install on Windows 45 | 46 | Download [the latest Windows release](https://github.com/Qovery/replibyte/releases) and install it. 47 | 48 |
49 | 50 |
51 | 52 | Install from source 53 | 54 | ```shell 55 | git clone https://github.com/Qovery/replibyte.git && cd replibyte 56 | 57 | # Install cargo 58 | # visit: https://doc.rust-lang.org/cargo/getting-started/installation.html 59 | 60 | # Build with cargo 61 | cargo build --release 62 | 63 | # Run RepliByte 64 | ./target/release/replibyte -h 65 | ``` 66 | 67 |
68 | 69 |
70 | 71 | Run replibyte with Docker 72 | 73 | This example assume you have a configuration file named replibyte.yaml in the directory you're running the docker command. 74 | 75 | ```shell 76 | docker run -it --rm --name replibyte \ 77 | -v "$(pwd)/replibyte.yaml:/replibyte.yaml:ro" \ 78 | ghcr.io/qovery/replibyte --config replibyte.yaml 79 | ``` 80 | 81 | If you're using the `local_disk` datastore, you must mount a volume by adding `-v "$(pwd)/my-datastore:/datastore"`. 82 | This assumes that the "datastore" part of your config file is as follows: 83 | 84 | ```yaml 85 | datastore: 86 | local_disk: 87 | dir: ./my-datastore 88 | ``` 89 | 90 |
91 | 92 | ## Telemetry 93 | 94 | RepliByte collects anonymized data from users in order to improve our product. Feel free to inspect the 95 | code [here](https://github.com/Qovery/replibyte/blob/main/replibyte/src/telemetry.rs). This can be deactivated at any time, and any data that has already been collected can be deleted on 96 | request (hello+replibyte {at} qovery {dot} com). 97 | 98 | ### Collected data 99 | 100 | - Command line parameters 101 | - Options used (subset, transformer, compression) in the configuration file. 102 | -------------------------------------------------------------------------------- /release.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | 5 | TOML_FILES="\ 6 | replibyte/Cargo.toml \ 7 | subset/Cargo.toml \ 8 | dump-parser/Cargo.toml 9 | " 10 | 11 | old=$1 12 | new=$2 13 | 14 | if [ -z "${old}" ] || [ -z "${new}" ] 15 | then 16 | echo "please run: $0 " 17 | exit 1 18 | fi 19 | 20 | if [ "$(git status --porcelain=v1 2>/dev/null | wc -l)" -ne 0 ] 21 | then 22 | git status 23 | echo "There are unsaved changes in the repository, press CTRL-C to abort now or return to continue." 24 | read -r answer 25 | fi 26 | 27 | echo -n "Release process starting from '${old}' -> '${new}', do you want to continue? [y/N] " 28 | read -r answer 29 | 30 | 31 | case "${answer}" in 32 | Y*|y*) 33 | ;; 34 | *) 35 | echo "Aborting" 36 | exit 0 37 | ;; 38 | esac; 39 | 40 | echo "==> ${answer}" 41 | 42 | echo -n "Updating TOML files:" 43 | for toml in ${TOML_FILES} 44 | do 45 | echo -n " ${toml}" 46 | sed -e "s/^version = \"${old}\"$/version = \"${new}\"/" -i.release "${toml}" 47 | done 48 | echo "." 49 | 50 | echo "Please review the following changes. (return to continue)" 51 | read -r answer 52 | 53 | git diff 54 | 55 | echo "Do you want to Continue or Rollback? [c/R]" 56 | read -r answer 57 | 58 | case "${answer}" in 59 | C*|c*) 60 | git checkout -b "release-v${new}" 61 | git commit -sa -m "Release v${new}" 62 | git push --set-upstream origin "release-v${new}" 63 | ;; 64 | *) 65 | git checkout . 66 | exit 67 | ;; 68 | esac; 69 | 70 | echo "Please open the following pull request we'll wait here continue when it is merged." 71 | echo 72 | echo " >> https://github.com/qovery/replibyte/pull/new/release-v${new} <<" 73 | echo 74 | echo "Once you continue we'll generate and push the release tag with the latest 'main'" 75 | echo 76 | echo "WARNING: Review and wait until the pull request is merged before continuing to create the release" 77 | read -r answer 78 | 79 | echo "Generating release tag v${new}" 80 | 81 | git checkout main 82 | git pull 83 | 84 | # The version is correctly updated in the replibyte crate cargo.toml (aka the PR is merged) 85 | if grep -q "version = \"${new}\"" ${TOML_FILES[0]}; then 86 | git tag -a -m"Release v${new}" "v${new}" 87 | git push --tags 88 | 89 | echo "Congrats release v${new} is done!" 90 | else 91 | echo 92 | echo "It seems the version is not updated, are you sure you have merged the pull request as stated before?" 93 | echo "If that's not the case, you're invited to run again the release script and wait for the PR is merged before continuing." 94 | echo 95 | echo "Rollback changes" 96 | 97 | git branch -d "release-v${new}" 98 | git push origin --delete "release-v${new}" 99 | fi 100 | -------------------------------------------------------------------------------- /replibyte/src/utils.rs: -------------------------------------------------------------------------------- 1 | use prettytable::{format, Table}; 2 | use std::io::{Error, ErrorKind, Read}; 3 | use std::process::Child; 4 | use std::time::{SystemTime, UNIX_EPOCH}; 5 | use which::which; 6 | 7 | pub fn epoch_millis() -> u128 { 8 | SystemTime::now() 9 | .duration_since(UNIX_EPOCH) 10 | .unwrap() 11 | .as_millis() 12 | } 13 | 14 | pub fn table() -> Table { 15 | // Create the table 16 | let mut table = Table::new(); 17 | 18 | table.set_format(*format::consts::FORMAT_NO_BORDER_LINE_SEPARATOR); 19 | 20 | table 21 | } 22 | 23 | /// converts Bytes into Bytes, KB, MB, GB or TB 24 | pub fn to_human_readable_unit(bytes: usize) -> String { 25 | match bytes { 26 | 0..=1023 => format!("{} Bytes", bytes), 27 | 1024..=1023_999 => format!("{:.2} kB", bytes / 1000), 28 | 1024_000..=1023_999_999 => format!("{:.2} MB", bytes / 1_000_000), 29 | 1024_000_000..=1023_999_999_999 => format!("{:.2} MB", bytes / 1_000_000_000), 30 | 1024_000_000_000..=1023_999_999_999_999 => format!("{:.2} GB", bytes / 1_000_000_000_000), 31 | _ => format!("{:.2} TB", bytes / 1_000_000_000_000_000), 32 | } 33 | } 34 | 35 | /// check for binary presence in PATH 36 | pub fn binary_exists(binary_name: &str) -> Result<(), Error> { 37 | let _ = which(binary_name).map_err(|_| { 38 | Error::new( 39 | ErrorKind::Other, 40 | format!("cannot find '{}' binary in path", binary_name), 41 | ) 42 | })?; 43 | 44 | Ok(()) 45 | } 46 | 47 | // wait for the end of a process and handle errors 48 | pub fn wait_for_command(process: &mut Child) -> Result<(), Error> { 49 | match process.wait() { 50 | Ok(exit_status) => { 51 | if !exit_status.success() { 52 | if let Some(stderr) = process.stderr.take().as_mut() { 53 | let mut buffer = String::new(); 54 | let error = match stderr.read_to_string(&mut buffer) { 55 | Ok(_) => Error::new(ErrorKind::Other, format!("{}", buffer)), 56 | Err(err) => Error::new(ErrorKind::Other, format!("{}", err)), 57 | }; 58 | 59 | return Err(Error::new( 60 | ErrorKind::Other, 61 | format!("command error: {}", error), 62 | )); 63 | } 64 | 65 | return Err(Error::new( 66 | ErrorKind::Other, 67 | format!("command error: {}", exit_status.to_string()), 68 | )); 69 | } 70 | 71 | Ok(()) 72 | } 73 | Err(err) => Err(Error::new( 74 | ErrorKind::Other, 75 | format!("command error: {}", err), 76 | )), 77 | } 78 | } 79 | 80 | pub fn get_replibyte_version() -> &'static str { 81 | env!("CARGO_PKG_VERSION") 82 | } 83 | -------------------------------------------------------------------------------- /replibyte/src/tasks/full_restore.rs: -------------------------------------------------------------------------------- 1 | use std::io::Error; 2 | use std::sync::mpsc; 3 | use std::thread; 4 | 5 | use crate::datastore::{Datastore, ReadOptions}; 6 | use crate::destination::Destination; 7 | use crate::tasks::{MaxBytes, Message, Task, TransferredBytes}; 8 | use crate::types::Bytes; 9 | 10 | /// FullRestoreTask is a wrapping struct to execute the synchronization between a *Datastore* and a *Source*. 11 | pub struct FullRestoreTask<'a, D> 12 | where 13 | D: Destination, 14 | { 15 | destination: &'a mut D, 16 | datastore: Box, 17 | read_options: ReadOptions, 18 | } 19 | 20 | impl<'a, D> FullRestoreTask<'a, D> 21 | where 22 | D: Destination, 23 | { 24 | pub fn new( 25 | destination: &'a mut D, 26 | datastore: Box, 27 | read_options: ReadOptions, 28 | ) -> Self { 29 | FullRestoreTask { 30 | destination, 31 | datastore, 32 | read_options, 33 | } 34 | } 35 | } 36 | 37 | impl<'a, D> Task for FullRestoreTask<'a, D> 38 | where 39 | D: Destination, 40 | { 41 | fn run( 42 | mut self, 43 | mut progress_callback: F, 44 | ) -> Result<(), Error> { 45 | // initialize the destination 46 | let _ = self.destination.init()?; 47 | 48 | // bound to 1 to avoid eating too much memory if we download the dump faster than we ingest it 49 | let (tx, rx) = mpsc::sync_channel::>(1); 50 | let datastore = self.datastore; 51 | 52 | let mut index_file = datastore.index_file()?; 53 | let dump = index_file.find_dump(&self.read_options)?; 54 | 55 | // init progress 56 | progress_callback(0, dump.size); 57 | 58 | let read_options = self.read_options.clone(); 59 | 60 | let join_handle = thread::spawn(move || { 61 | // managing Datastore (S3) download here 62 | let datastore = datastore; 63 | let read_options = read_options; 64 | 65 | let _ = match datastore.read(&read_options, &mut |data| { 66 | let _ = tx.send(Message::Data(data)); 67 | }) { 68 | Ok(_) => {} 69 | Err(err) => panic!("{:?}", err), 70 | }; 71 | 72 | let _ = tx.send(Message::EOF); 73 | }); 74 | 75 | loop { 76 | let data = match rx.recv() { 77 | Ok(Message::Data(data)) => data, 78 | Ok(Message::EOF) => break, 79 | Err(err) => panic!("{:?}", err), // FIXME what should I do here? 80 | }; 81 | 82 | progress_callback(data.len(), dump.size); 83 | 84 | let _ = self.destination.write(data)?; 85 | } 86 | 87 | // wait for end of download execution 88 | let _ = join_handle.join(); // FIXME catch result here 89 | 90 | progress_callback(dump.size, dump.size); 91 | 92 | Ok(()) 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /replibyte/src/transformer/mod.rs: -------------------------------------------------------------------------------- 1 | use crate::transformer::credit_card::CreditCardTransformer; 2 | use crate::transformer::custom_wasm::CustomWasmTransformer; 3 | use crate::transformer::email::EmailTransformer; 4 | use crate::transformer::first_name::FirstNameTransformer; 5 | use crate::transformer::keep_first_char::KeepFirstCharTransformer; 6 | use crate::transformer::phone_number::PhoneNumberTransformer; 7 | use crate::transformer::random::RandomTransformer; 8 | use crate::transformer::redacted::RedactedTransformer; 9 | use crate::transformer::transient::TransientTransformer; 10 | use crate::types::Column; 11 | 12 | pub mod credit_card; 13 | pub mod email; 14 | pub mod first_name; 15 | pub mod keep_first_char; 16 | pub mod phone_number; 17 | pub mod random; 18 | pub mod redacted; 19 | pub mod transient; 20 | 21 | // FIXME: CI release build is broken because of feature flag 22 | //#[cfg(feature = "wasm")] 23 | pub mod custom_wasm; 24 | 25 | pub fn transformers() -> Vec> { 26 | vec![ 27 | Box::new(EmailTransformer::default()), 28 | Box::new(FirstNameTransformer::default()), 29 | Box::new(PhoneNumberTransformer::default()), 30 | Box::new(RandomTransformer::default()), 31 | Box::new(KeepFirstCharTransformer::default()), 32 | Box::new(TransientTransformer::default()), 33 | Box::new(CreditCardTransformer::default()), 34 | Box::new(RedactedTransformer::default()), 35 | Box::new(CustomWasmTransformer::default()), 36 | ] 37 | } 38 | 39 | /// Trait to implement to create a custom Transformer. 40 | pub trait Transformer { 41 | fn id(&self) -> &str; 42 | fn description(&self) -> &str; 43 | fn database_name(&self) -> &str; 44 | fn table_name(&self) -> &str; 45 | fn column_name(&self) -> &str; 46 | fn quoted_table_name(&self) -> String { 47 | let table_name = self.table_name(); 48 | 49 | if table_name.to_lowercase() != table_name { 50 | return format!("\"{}\"", table_name); 51 | } 52 | 53 | String::from(table_name) 54 | } 55 | 56 | fn database_and_table_name(&self) -> String { 57 | format!("{}.{}", self.database_name(), self.table_name()) 58 | } 59 | 60 | fn database_and_table_and_column_name(&self) -> String { 61 | format!( 62 | "{}.{}.{}", 63 | self.database_name(), 64 | self.table_name(), 65 | self.column_name() 66 | ) 67 | } 68 | 69 | fn database_and_quoted_table_and_column_name(&self) -> String { 70 | format!( 71 | "{}.{}.{}", 72 | self.database_name(), 73 | self.quoted_table_name(), 74 | self.column_name() 75 | ) 76 | } 77 | 78 | fn table_and_column_name(&self) -> String { 79 | format!( 80 | "{}.{}", 81 | self.table_name(), 82 | self.column_name() 83 | ) 84 | } 85 | 86 | fn transform(&self, column: Column) -> Column; 87 | } 88 | -------------------------------------------------------------------------------- /replibyte/src/migration/rename_backups_to_dumps.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | io::{Error, ErrorKind}, 3 | str::FromStr, 4 | }; 5 | 6 | use log::info; 7 | use serde_json::{json, Value}; 8 | 9 | use crate::datastore::Datastore; 10 | 11 | use super::{Migration, Version}; 12 | 13 | pub struct RenameBackupsToDump {} 14 | 15 | impl RenameBackupsToDump { 16 | pub fn default() -> Self { 17 | Self {} 18 | } 19 | } 20 | 21 | impl Migration for RenameBackupsToDump { 22 | fn minimal_version(&self) -> Version { 23 | Version::from_str("0.7.3").unwrap() 24 | } 25 | 26 | fn run(&self, datastore: &Box) -> Result<(), Error> { 27 | info!("migrate: rename backups to dumps"); 28 | 29 | let mut raw_index_file = datastore.raw_index_file()?; 30 | let _ = rename_backups_to_dumps(&mut raw_index_file)?; 31 | datastore.write_raw_index_file(&raw_index_file) 32 | } 33 | } 34 | 35 | fn rename_backups_to_dumps(metadata_json: &mut Value) -> Result<(), Error> { 36 | match metadata_json.as_object_mut() { 37 | Some(metadata) => { 38 | // we rename the `backups` key to `dumps` 39 | if metadata.contains_key("backups") { 40 | let backups = metadata.get("backups").unwrap_or(&json!([])).clone(); 41 | metadata.insert("dumps".to_string(), backups); 42 | metadata.remove("backups"); 43 | } 44 | Ok(()) 45 | } 46 | None => Err(Error::new( 47 | ErrorKind::Other, 48 | "migrate: metadata.json is not an object", 49 | )), 50 | } 51 | } 52 | 53 | #[cfg(test)] 54 | mod tests { 55 | use serde_json::json; 56 | 57 | use crate::migration::rename_backups_to_dumps::rename_backups_to_dumps; 58 | 59 | #[test] 60 | fn test_rename_backup_to_dumps() { 61 | let mut metadata_json = json!({"backups": []}); 62 | assert!(rename_backups_to_dumps(&mut metadata_json).is_ok()); 63 | assert!(metadata_json.get("backups").is_none()); 64 | assert!(metadata_json.get("dumps").is_some()); 65 | assert!(metadata_json.get("dumps").unwrap().is_array()); 66 | 67 | let mut metadata_json = json!({ 68 | "backups": [ 69 | { 70 | "directory_name":"dump-1653170039392", 71 | "size":62279, 72 | "created_at":1234, 73 | "compressed":true, 74 | "encrypted":false 75 | } 76 | ] 77 | }); 78 | assert!(rename_backups_to_dumps(&mut metadata_json).is_ok()); 79 | assert!(metadata_json.get("backups").is_none()); 80 | assert!(metadata_json.get("dumps").is_some()); 81 | assert!(metadata_json.get("dumps").unwrap().is_array()); 82 | assert!(metadata_json 83 | .get("dumps") 84 | .unwrap() 85 | .as_array() 86 | .unwrap() 87 | .contains(&json!({ 88 | "directory_name":"dump-1653170039392", 89 | "size":62279, 90 | "created_at":1234, 91 | "compressed":true, 92 | "encrypted":false 93 | }))); 94 | } 95 | } 96 | -------------------------------------------------------------------------------- /replibyte/src/destination/mysql.rs: -------------------------------------------------------------------------------- 1 | use std::io::{Error, Write}; 2 | use std::process::{Command, Stdio}; 3 | 4 | use crate::connector::Connector; 5 | use crate::destination::Destination; 6 | use crate::types::Bytes; 7 | use crate::utils::{binary_exists, wait_for_command}; 8 | 9 | pub struct Mysql<'a> { 10 | host: &'a str, 11 | port: u16, 12 | database: &'a str, 13 | username: &'a str, 14 | password: &'a str, 15 | } 16 | 17 | impl<'a> Mysql<'a> { 18 | pub fn new( 19 | host: &'a str, 20 | port: u16, 21 | database: &'a str, 22 | username: &'a str, 23 | password: &'a str, 24 | ) -> Self { 25 | Mysql { 26 | host, 27 | port, 28 | database, 29 | username, 30 | password, 31 | } 32 | } 33 | } 34 | 35 | impl<'a> Connector for Mysql<'a> { 36 | fn init(&mut self) -> Result<(), Error> { 37 | let _ = binary_exists("mysql")?; 38 | 39 | // test MySQL connection 40 | let mut process = Command::new("mysql") 41 | .args([ 42 | "-h", 43 | self.host, 44 | "-P", 45 | self.port.to_string().as_str(), 46 | "-u", 47 | self.username, 48 | &format!("-p{}", self.password), 49 | "-e", 50 | "SELECT 1;", 51 | ]) 52 | .stdout(Stdio::piped()) 53 | .spawn()?; 54 | 55 | wait_for_command(&mut process) 56 | } 57 | } 58 | 59 | impl<'a> Destination for Mysql<'a> { 60 | fn write(&self, data: Bytes) -> Result<(), Error> { 61 | let mut process = Command::new("mysql") 62 | .args([ 63 | "-h", 64 | self.host, 65 | "-P", 66 | self.port.to_string().as_str(), 67 | "-u", 68 | self.username, 69 | &format!("-p{}", self.password), 70 | self.database, 71 | ]) 72 | .stdin(Stdio::piped()) 73 | .stdout(Stdio::null()) 74 | .spawn()?; 75 | 76 | let _ = process.stdin.take().unwrap().write_all(data.as_slice()); 77 | 78 | wait_for_command(&mut process) 79 | } 80 | } 81 | 82 | #[cfg(test)] 83 | mod tests { 84 | use crate::connector::Connector; 85 | use crate::destination::mysql::Mysql; 86 | use crate::destination::Destination; 87 | 88 | fn get_mysql() -> Mysql<'static> { 89 | Mysql::new("127.0.0.1", 3306, "mysql", "root", "password") 90 | } 91 | 92 | fn get_invalid_mysql() -> Mysql<'static> { 93 | Mysql::new("127.0.0.1", 3306, "mysql", "root", "wrong_password") 94 | } 95 | 96 | #[test] 97 | fn connect() { 98 | let mut m = get_mysql(); 99 | let _ = m.init().expect("can't init mysql"); 100 | assert!(m.write(b"SELECT 1;".to_vec()).is_ok()); 101 | 102 | let mut m = get_invalid_mysql(); 103 | assert!(m.init().is_err()); 104 | assert!(m.write(b"SELECT 1".to_vec()).is_err()); 105 | } 106 | 107 | #[test] 108 | fn test_inserts() {} 109 | } 110 | -------------------------------------------------------------------------------- /docs/DESIGN.md: -------------------------------------------------------------------------------- 1 | ## How RepliByte works 2 | 3 | RepliByte is built to replicate small and very large databases from one place (source) to the other (destination) with a bridge as 4 | intermediary (bridge). Here is an example of what happens while replicating a PostgreSQL database. 5 | 6 | ```mermaid 7 | sequenceDiagram 8 | participant RepliByte 9 | participant PostgreSQL (Source) 10 | participant AWS S3 (Bridge) 11 | PostgreSQL (Source)->>RepliByte: 1. Dump data 12 | loop 13 | RepliByte->>RepliByte: 2. Subsetting (optional) 14 | RepliByte->>RepliByte: 3. Hide or fake sensitive data (optional) 15 | RepliByte->>RepliByte: 4. Compress data (optional) 16 | RepliByte->>RepliByte: 5. Encrypt data (optional) 17 | end 18 | RepliByte->>AWS S3 (Bridge): 6. Upload obfuscated dump data 19 | RepliByte->>AWS S3 (Bridge): 7. Write index file 20 | ``` 21 | 22 | 1. RepliByte connects to the _PostgreSQL Source_ database and makes a full SQL dump of it. 23 | 2. RepliByte receives the SQL dump, parse it, and generates random/fake information in real-time. 24 | 3. RepliByte streams and uploads the modified SQL dump in real-time on AWS S3. 25 | 4. RepliByte keeps track of the uploaded SQL dump by writing it into an index file. 26 | 27 | --- 28 | 29 | Once at least a replica from the source PostgreSQL database is available in the S3 bucket, RepliByte can use and inject it into the 30 | destination PostgreSQL database. 31 | 32 | ```mermaid 33 | sequenceDiagram 34 | participant RepliByte 35 | participant PostgreSQL (Destination) 36 | participant AWS S3 (Bridge) 37 | AWS S3 (Bridge)->>RepliByte: 1. Read index file 38 | AWS S3 (Bridge)->>RepliByte: 2. Download dump SQL file 39 | loop 40 | RepliByte->>RepliByte: 3. Decrypt data (if required) 41 | RepliByte->>RepliByte: 4. Uncompress data (if required) 42 | end 43 | RepliByte->>PostgreSQL (Destination): 5. Restore dump SQL 44 | ``` 45 | 46 | 1. RepliByte connects to the S3 bucket and reads the index file to retrieve the latest SQL to download. 47 | 2. RepliByte downloads the SQL dump in a stream bytes. 48 | 3. RepliByte restores the SQL dump in the destination PostgreSQL database in real-time. 49 | 50 | ## Design 51 | 52 | ### Low Memory and CPU footprint 53 | 54 | Written in Rust, RepliByte can run with 512 MB of RAM and 1 CPU to replicate 1 TB of data (we are working on a benchmark). RepliByte 55 | replicate the data in a stream of bytes and does not store anything on a local disk. 56 | 57 | ### Limitations 58 | 59 | - Tested with PostgreSQL 13 and 14. It should work with prior versions. 60 | - RepliByte as not been designed to run multiple dumps targeting the same Bridge. The Index File does not manage concurrent write (ATM). 61 | 62 | ### Index file structure 63 | 64 | An index file describe the structure of your dumps and all of them. 65 | 66 | Here is the manifest file that you can find at the root of your target `Bridge` (E.g: S3). 67 | 68 | ```json 69 | { 70 | "dumps": [ 71 | { 72 | "size": 1024000, 73 | "directory_name": "dump-{epoch timestamp}", 74 | "created_at": "epoch timestamp", 75 | "compressed": true, 76 | "encrypted": true 77 | } 78 | ] 79 | } 80 | ``` 81 | 82 | - _size_ is in bytes 83 | - _created_at_ is an epoch timestamp in millis 84 | -------------------------------------------------------------------------------- /website/docs/advanced-guides/web-assembly-transformer.md: -------------------------------------------------------------------------------- 1 | --- 2 | sidebar_position: 3 3 | --- 4 | 5 | # Web Assembly transformer 6 | 7 | - This folder contains an example of a wasm (WebAssembly) transformer which reads the column value (in this case, a string) input from stdin, transforms it (in this case, reverses it), and then writes the result to stdout. 8 | - The file `wasm-transformer-reverse-string.wasm` was originally written in rust, and compiled to a `wasm32-wasi` target binary. 9 | - A great way to implement your own custom wasm transformer would be to write it in a [language which supports WebAssembly in a WASI environment](https://www.fermyon.com/wasm-languages/webassembly-language-support) and then compile it to a `.wasm` file. 10 | - In the following section, we will demonstrate how to implement a custom wasm transformer by using Rust (to understand how to do this with other languages, we suggest reading more about [`wasm`](https://developer.mozilla.org/en-US/docs/WebAssembly) and [`WASI`](https://wasi.dev/)). 11 | 12 | ## How it works 13 | 14 | RepliByte's communication with external `wasm` modules is implemented with the use of pipes: 15 | 1. The column value which needs to be transformed is written to stdin by RepliByte. This will always be a single column value. 16 | 2. The wasm module should read the value from stdin and **transform** it (this is where your custom implementation comes in). 17 | 3. The wasm module should write the transformed value to stdout. 18 | 4. RepliByte reads the transformed value from stdout. RepliByte will expect to read a single column value, anything else will cause a runtime error. 19 | 20 | As long as you start with reading from stdin and end with printing to stdout, you can go as crazy as you want with the implementation of your custom transformers. 21 | 22 | ## Implementing a custom transformer with Rust 23 | 24 | First, start a new cargo project: 25 | 26 | ```shell 27 | cargo init my-custom-wasm-transformer 28 | ``` 29 | 30 | Go to `src/main.rs` in the newly created project and write some code: 31 | 32 | ```rust 33 | // This is actually the source of the `.wasm` file in this example. Feel free to edit it ! 34 | fn main() { 35 | // Read input value from stdin 36 | let mut input = String::new(); 37 | std::io::stdin().read_line(&mut input).unwrap(); 38 | 39 | // Transform the value as you see fit (in this case we just reverse the string) 40 | let output: String = input.chars().rev().collect(); 41 | 42 | // Write transformed value to stdout (simply print) 43 | println!("{}", output); 44 | } 45 | ``` 46 | 47 | Add `wasm32-wasi` to your targets: 48 | 49 | ```shell 50 | rustup target add wasm32-wasi 51 | ``` 52 | 53 | Build: 54 | 55 | ```shell 56 | cargo build --release --target wasm32-wasi 57 | ``` 58 | 59 | You will find your freshly built custom wasm transformer here: 60 | 61 | `target/wasm32-wasi/release/my-custom-wasm-transformer.wasm` 62 | 63 | The only thing that's left is to edit the `path` option in `replibyte.yaml`: 64 | 65 | ```yaml 66 | # ... 67 | - database: 68 | table: 69 | columns: 70 | - name: 71 | transformer_name: custom-wasm 72 | transformer_options: 73 | path: "path/to/your/custom-wasm-transformer.wasm" 74 | # ... 75 | ``` 76 | 77 | That's it! 78 | 79 | -------------------------------------------------------------------------------- /website/docs/guides/2-restore-a-dump.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: II. Restore a dump 3 | sidebar_position: 2 4 | --- 5 | 6 | # Restore a dump 7 | 8 | :::note 9 | 10 | I assume you did the previous guide, and you have your final `conf.yaml` file 11 | 12 | ::: 13 | 14 | On the last step, we have created and uploaded our transformed dump in our S3 Datastore. Now, we are ready to restore it in a development database. 15 | 16 | :::note 17 | 18 | The database where you restore must be the same type of the source where you dump. If you created a PostgreSQL dump, then you must restore on a PostgreSQL database. 19 | 20 | ::: 21 | 22 | Replibyte provides you two options to restore a dump: 23 | 24 | * **Option 1**: Locally - which is convenient for local development 25 | * Example use cases: 26 | * You develop an app locally and wants to work with real data. 27 | * You want to inspect what the transformed dump looks like. 28 | * **Option 2**: Remote - which is convenient to restore a remote database. 29 | * Example use cases: 30 | * You have a dump on your local machine, and you want to restore a database only accessible from a specific network. 31 | * You have no access to the dumps, only an admin can restore them. 32 | 33 | ## Option 1: Locally 34 | 35 | ### With Docker 36 | 37 | :::caution 38 | 39 | [Docker](https://www.docker.com/) must be installed and running 40 | 41 | ::: 42 | 43 | It's the best option to develop locally with a consistent transformed dump coming from your production data. Execute the following command to restore in a local Docker instance the latest dump: 44 | 45 | ```shell 46 | replibyte -c conf.yaml dump restore local -d postgresql -v latest 47 | ``` 48 | 49 | `-d` parameter accepts `mongodb`, `mysql` and other databases supported by Replibyte. 50 | 51 | You can also list the available dumps with: 52 | 53 | ```shell 54 | replibyte -c conf.yaml dump list 55 | 56 | type name size when compressed encrypted 57 | PostgreSQL dump-1647706359405 154MB Yesterday at 03:00 am true true 58 | PostgreSQL dump-1647731334517 152MB 2 days ago at 03:00 am true true 59 | PostgreSQL dump-1647734369306 149MB 3 days ago at 03:00 am true true 60 | ``` 61 | 62 | And restore the dump you want with: 63 | 64 | ```shell 65 | replibyte -c conf.yaml dump restore local -d postgres -v dump-1647731334517 66 | ``` 67 | 68 | ### In a file 69 | 70 | You might want to inspect what you have in your dump, and restore it manually, you can execute the same restore command but with the `-o` parameter: 71 | 72 | ```shell 73 | replibyte -c conf.yaml dump restore local -i postgres -v latest -o > dump.sql 74 | ``` 75 | 76 | ## Option 2: Remote 77 | 78 | To restore on a remote database, you need to specify the destination connection URI in your `conf.yaml`: 79 | 80 | ```yaml title="conf.yaml" 81 | destination: 82 | connection_uri: postgres://user:password@host:port/db 83 | # Disable public's schema wipe 84 | # wipe_database: false (default: true) 85 | ``` 86 | 87 | and run the following command: 88 | 89 | ```shell 90 | replibyte -c conf.yaml dump restore remote -v latest 91 | ``` 92 | 93 | --- 94 | 95 | You know now how to restore your transformed dump via multiple options, and even choose which version you want to restore. 96 | 97 | But now, **what happen if your database is very large?** In the next guide, you will learn how to downscale your database from a large size to a more reasonable one, while keeping it consistent. ➡️ 98 | -------------------------------------------------------------------------------- /docker-compose-dev.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | 3 | services: 4 | source-postgres: 5 | image: postgres:13 6 | restart: always 7 | healthcheck: 8 | test: ['CMD', 'pg_isready', '-q', '-d', 'postgres', '-U', 'root'] 9 | timeout: 45s 10 | interval: 10s 11 | retries: 10 12 | environment: 13 | - POSTGRES_USER=root 14 | - POSTGRES_PASSWORD=password 15 | volumes: 16 | - ./db/postgres:/docker-entrypoint-initdb.d/ 17 | ports: 18 | - 5432:5432 19 | dest-postgres: 20 | image: postgres:13 21 | restart: always 22 | healthcheck: 23 | test: ['CMD', 'pg_isready', '-q', '-d', 'postgres', '-U', 'root'] 24 | timeout: 45s 25 | interval: 10s 26 | retries: 10 27 | environment: 28 | - POSTGRES_USER=root 29 | - POSTGRES_PASSWORD=password 30 | ports: 31 | - 5453:5432 32 | source-mysql: 33 | image: mysql:8 34 | restart: always 35 | command: --default-authentication-plugin=mysql_native_password 36 | healthcheck: 37 | test: ['CMD', 'mysqladmin', 'ping', '-u', 'root', '--password=password'] 38 | timeout: 45s 39 | interval: 10s 40 | retries: 10 41 | environment: 42 | - MYSQL_ROOT_PASSWORD=password 43 | volumes: 44 | - ./db/mysql:/docker-entrypoint-initdb.d 45 | ports: 46 | - 3306:3306 47 | dest-mysql: 48 | image: mysql:8 49 | restart: always 50 | command: --default-authentication-plugin=mysql_native_password 51 | healthcheck: 52 | test: ['CMD', 'mysqladmin', 'ping', '-u', 'root', '--password=password'] 53 | timeout: 45s 54 | interval: 10s 55 | retries: 10 56 | environment: 57 | - MYSQL_ROOT_PASSWORD=password 58 | ports: 59 | - 3307:3306 60 | source-mongodb: 61 | image: mongo:5 62 | restart: always 63 | healthcheck: 64 | test: echo 'db.runCommand("ping").ok' | mongo localhost:27017/test --quiet 65 | timeout: 45s 66 | interval: 10s 67 | retries: 10 68 | environment: 69 | - MONGO_INITDB_ROOT_USERNAME=root 70 | - MONGO_INITDB_ROOT_PASSWORD=password 71 | volumes: 72 | - ./db/mongodb:/docker-entrypoint-initdb.d/ 73 | ports: 74 | - 27017:27017 75 | dest-mongodb: 76 | image: mongo:5 77 | restart: always 78 | healthcheck: 79 | test: echo 'db.runCommand("ping").ok' | mongo localhost:27017/test --quiet 80 | timeout: 45s 81 | interval: 10s 82 | retries: 10 83 | environment: 84 | - MONGO_INITDB_ROOT_USERNAME=root 85 | - MONGO_INITDB_ROOT_PASSWORD=password 86 | ports: 87 | - 27018:27017 88 | bridge-minio: 89 | image: minio/minio:RELEASE.2022-03-17T06-34-49Z 90 | restart: always 91 | command: server --console-address ":9001" /data/minio/ 92 | healthcheck: 93 | test: ['CMD', 'curl', '-f', 'http://localhost:9000/minio/health/live'] 94 | interval: 30s 95 | timeout: 20s 96 | retries: 3 97 | environment: 98 | MINIO_ROOT_USER: minioadmin 99 | MINIO_ROOT_PASSWORD: minioadmin 100 | ports: 101 | - 9000:9000 102 | - 9001:9001 103 | create-minio-bucket: 104 | image: minio/mc 105 | depends_on: 106 | - bridge-minio 107 | entrypoint: > 108 | /bin/sh -c " 109 | /usr/bin/mc config host add myminio http://bridge-minio:9000 minioadmin minioadmin; 110 | /usr/bin/mc rm -r --force myminio/replibyte-test; 111 | /usr/bin/mc mb myminio/replibyte-test; 112 | /usr/bin/mc policy download myminio/replibyte-test; 113 | exit 0; 114 | " 115 | -------------------------------------------------------------------------------- /subset/src/lib.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashSet; 2 | use std::io::Error; 3 | 4 | mod dedup; 5 | pub mod postgres; 6 | mod utils; 7 | 8 | pub type Bytes = Vec; 9 | 10 | pub trait Subset { 11 | fn read(&self, data: F, progress: P) 12 | -> Result<(), Error>; 13 | } 14 | 15 | pub struct Progress { 16 | // total data rows 17 | pub total_rows: usize, 18 | // total rows to processed 19 | pub total_rows_to_process: usize, 20 | // rows processed 21 | pub processed_rows: usize, 22 | // last row processed exec time 23 | pub last_process_time: u128, 24 | } 25 | 26 | impl Progress { 27 | pub fn percent(&self) -> u8 { 28 | ((self.processed_rows as f64 / self.total_rows_to_process as f64) * 100.0) as u8 29 | } 30 | } 31 | 32 | #[derive(Debug, Hash, Eq, PartialEq)] 33 | pub struct PassthroughTable<'a> { 34 | pub database: &'a str, 35 | pub table: &'a str, 36 | } 37 | 38 | impl<'a> PassthroughTable<'a> { 39 | pub fn new>(database: S, table: S) -> Self { 40 | PassthroughTable { 41 | database: database.into(), 42 | table: table.into(), 43 | } 44 | } 45 | } 46 | 47 | pub struct SubsetOptions<'a> { 48 | pub passthrough_tables: &'a HashSet>, 49 | } 50 | 51 | impl<'a> SubsetOptions<'a> { 52 | pub fn new(passthrough_tables: &'a HashSet>) -> Self { 53 | SubsetOptions { passthrough_tables } 54 | } 55 | } 56 | 57 | #[derive(Debug, Hash, Eq, PartialEq, Clone)] 58 | pub struct SubsetTable { 59 | pub database: String, 60 | pub table: String, 61 | pub relations: Vec, 62 | } 63 | 64 | impl SubsetTable { 65 | pub fn new>( 66 | database: S, 67 | table: S, 68 | relations: Vec, 69 | ) -> Self { 70 | SubsetTable { 71 | database: database.into(), 72 | table: table.into(), 73 | relations, 74 | } 75 | } 76 | 77 | pub fn related_tables(&self) -> HashSet<&str> { 78 | self.relations 79 | .iter() 80 | .map(|r| r.table.as_str()) 81 | .collect::>() 82 | } 83 | 84 | pub fn find_related_subset_tables<'a>( 85 | &self, 86 | subset_tables: &'a Vec<&SubsetTable>, 87 | ) -> Vec<&'a SubsetTable> { 88 | if subset_tables.is_empty() { 89 | return Vec::new(); 90 | } 91 | 92 | let related_tables = self.related_tables(); 93 | 94 | subset_tables 95 | .iter() 96 | .filter(|subset_table| related_tables.contains(subset_table.table.as_str())) 97 | .map(|subset_table| *subset_table) 98 | .collect::>() 99 | } 100 | } 101 | 102 | /// Representing a query where... 103 | /// database -> is the targeted database 104 | /// table -> is the targeted table 105 | /// from_property is the parent table property referencing the target table `to_property` 106 | #[derive(Debug, Hash, Eq, PartialEq, Clone)] 107 | pub struct SubsetTableRelation { 108 | pub database: String, 109 | pub table: String, 110 | pub from_property: String, 111 | pub to_property: String, 112 | } 113 | 114 | impl SubsetTableRelation { 115 | pub fn new>(database: S, table: S, from_property: S, to_property: S) -> Self { 116 | SubsetTableRelation { 117 | database: database.into(), 118 | table: table.into(), 119 | from_property: from_property.into(), 120 | to_property: to_property.into(), 121 | } 122 | } 123 | } 124 | -------------------------------------------------------------------------------- /replibyte/src/destination/mysql_docker.rs: -------------------------------------------------------------------------------- 1 | use crate::connector::Connector; 2 | use crate::destination::docker::{ 3 | daemon_is_running, Container, ContainerOptions, Image, DOCKER_BINARY_NAME, 4 | }; 5 | use crate::destination::Destination; 6 | use crate::types::Bytes; 7 | use crate::utils::binary_exists; 8 | use std::io::{Error, ErrorKind, Write}; 9 | 10 | const DEFAULT_MYSQL_IMAGE: &str = "mysql"; 11 | pub const DEFAULT_MYSQL_IMAGE_TAG: &str = "8"; 12 | pub const DEFAULT_MYSQL_CONTAINER_PORT: u16 = 3306; 13 | const DEFAULT_MYSQL_PASSWORD: &str = "password"; 14 | 15 | pub struct MysqlDocker { 16 | pub image: Image, 17 | pub options: ContainerOptions, 18 | pub container: Option, 19 | } 20 | 21 | impl MysqlDocker { 22 | pub fn new(tag: String, port: u16) -> Self { 23 | Self { 24 | image: Image { 25 | name: DEFAULT_MYSQL_IMAGE.to_string(), 26 | tag, 27 | }, 28 | options: ContainerOptions { 29 | host_port: port, 30 | container_port: DEFAULT_MYSQL_CONTAINER_PORT, 31 | }, 32 | container: None, 33 | } 34 | } 35 | } 36 | 37 | impl Connector for MysqlDocker { 38 | fn init(&mut self) -> Result<(), Error> { 39 | let _ = binary_exists(DOCKER_BINARY_NAME)?; 40 | let _ = daemon_is_running()?; 41 | 42 | let password_env = format!("MYSQL_ROOT_PASSWORD={}", DEFAULT_MYSQL_PASSWORD); 43 | let container = Container::new( 44 | &self.image, 45 | &self.options, 46 | vec!["-e", password_env.as_str()], 47 | Some(vec![ 48 | "mysqld", 49 | "--default-authentication-plugin=mysql_native_password", 50 | ]), 51 | )?; 52 | 53 | self.container = Some(container); 54 | Ok(()) 55 | } 56 | } 57 | 58 | impl Destination for MysqlDocker { 59 | fn write(&self, data: Bytes) -> Result<(), Error> { 60 | match &self.container { 61 | Some(container) => { 62 | let mut container_exec = 63 | container.exec("exec mysql -uroot -p\"$MYSQL_ROOT_PASSWORD\"")?; 64 | let _ = container_exec 65 | .stdin 66 | .take() 67 | .unwrap() 68 | .write_all(data.as_slice()); 69 | 70 | let exit_status = container_exec.wait()?; 71 | if !exit_status.success() { 72 | return Err(Error::new( 73 | ErrorKind::Other, 74 | format!("command error: {:?}", exit_status.to_string()), 75 | )); 76 | } 77 | 78 | Ok(()) 79 | } 80 | None => Err(Error::new( 81 | ErrorKind::Other, 82 | "command error: cannot retrieve container", 83 | )), 84 | } 85 | } 86 | } 87 | 88 | #[cfg(test)] 89 | mod tests { 90 | use super::MysqlDocker; 91 | use crate::connector::Connector; 92 | use crate::destination::Destination; 93 | 94 | fn get_mysql() -> MysqlDocker { 95 | MysqlDocker::new("8".to_string(), 3308) 96 | } 97 | 98 | fn get_invalid_mysql() -> MysqlDocker { 99 | MysqlDocker::new("bad_tag".to_string(), 3308) 100 | } 101 | 102 | #[test] 103 | fn connect() { 104 | let mut p = get_mysql(); 105 | let _ = p.init().expect("can't init mysql"); 106 | assert!(p.write(b"SELECT 1".to_vec()).is_ok()); 107 | 108 | // cleanup container 109 | let _ = p.container.unwrap().rm(); 110 | 111 | let mut p = get_invalid_mysql(); 112 | assert!(p.init().is_err()); 113 | assert!(p.write(b"SELECT 1".to_vec()).is_err()); 114 | } 115 | } 116 | -------------------------------------------------------------------------------- /replibyte/src/destination/mongodb.rs: -------------------------------------------------------------------------------- 1 | use std::io::{Error, Write}; 2 | use std::process::{Command, Stdio}; 3 | 4 | use crate::connector::Connector; 5 | use crate::destination::Destination; 6 | use crate::types::Bytes; 7 | use crate::utils::{binary_exists, wait_for_command}; 8 | 9 | pub struct MongoDB<'a> { 10 | uri: &'a str, 11 | database: &'a str, 12 | } 13 | 14 | impl<'a> MongoDB<'a> { 15 | pub fn new(uri: &'a str, database: &'a str) -> Self { 16 | MongoDB { uri, database } 17 | } 18 | } 19 | 20 | impl<'a> Connector for MongoDB<'a> { 21 | fn init(&mut self) -> Result<(), Error> { 22 | let _ = binary_exists("mongosh")?; 23 | let _ = binary_exists("mongorestore")?; 24 | let _ = check_connection_status(self)?; 25 | 26 | Ok(()) 27 | } 28 | } 29 | 30 | impl<'a> Destination for MongoDB<'a> { 31 | fn write(&self, data: Bytes) -> Result<(), Error> { 32 | let mut process = Command::new("mongorestore") 33 | .args([ 34 | "--uri", 35 | self.uri, 36 | format!("--nsFrom='{}.*'", self.database).as_str(), 37 | format!("--nsTo='{}.*'", self.database).as_str(), 38 | "--archive", 39 | ]) 40 | .stdin(Stdio::piped()) 41 | .stdout(Stdio::null()) 42 | .spawn()?; 43 | 44 | let _ = process 45 | .stdin 46 | .take() 47 | .unwrap() 48 | .write_all(&data[..data.len() - 1]); // remove trailing null terminator, or else mongorestore will fail 49 | 50 | wait_for_command(&mut process) 51 | } 52 | } 53 | 54 | fn check_connection_status(db: &MongoDB) -> Result<(), Error> { 55 | let mut echo_process = Command::new("echo") 56 | .arg(r#"'db.runCommand("ping").ok'"#) 57 | .stdout(Stdio::piped()) 58 | .spawn()?; 59 | 60 | let mut mongo_process = Command::new("mongosh") 61 | .args([db.uri, "--quiet"]) 62 | .stdin(echo_process.stdout.take().unwrap()) 63 | .stdout(Stdio::inherit()) 64 | .spawn()?; 65 | 66 | wait_for_command(&mut mongo_process) 67 | } 68 | 69 | #[cfg(test)] 70 | mod tests { 71 | use dump_parser::utils::decode_hex; 72 | 73 | use crate::connector::Connector; 74 | use crate::destination::mongodb::MongoDB; 75 | use crate::destination::Destination; 76 | 77 | fn get_mongodb() -> MongoDB<'static> { 78 | MongoDB::new("mongodb://root:password@localhost:27018", "test") 79 | } 80 | 81 | fn get_invalid_mongodb() -> MongoDB<'static> { 82 | MongoDB::new("mongodb://root:wrongpassword@localhost:27018", "test") 83 | } 84 | 85 | #[test] 86 | fn connect() { 87 | let mut p = get_mongodb(); 88 | let _ = p.init().expect("can't init mongodb"); 89 | let bytes = decode_hex("6de299816600000010636f6e63757272656e745f636f6c6c656374696f6e7300040000000276657273696f6e0004000000302e3100027365727665725f76657273696f6e0006000000352e302e360002746f6f6c5f76657273696f6e00080000003130302e352e320000020100000264620005000000746573740002636f6c6c656374696f6e0006000000757365727300026d6574616461746100ad0000007b22696e6465786573223a5b7b2276223a7b22246e756d626572496e74223a2232227d2c226b6579223a7b225f6964223a7b22246e756d626572496e74223a2231227d7d2c226e616d65223a225f69645f227d5d2c2275756964223a223464363734323637316333613463663938316439386164373831343735333234222c22636f6c6c656374696f6e4e616d65223a227573657273222c2274797065223a22636f6c6c656374696f6e227d001073697a6500000000000274797065000b000000636f6c6c656374696f6e0000ffffffff3b0000000264620005000000746573740002636f6c6c656374696f6e000600000075736572730008454f4600001243524300000000000000000000ffffffff3b0000000264620005000000746573740002636f6c6c656374696f6e000600000075736572730008454f4600011243524300000000000000000000ffffffff00").unwrap(); 90 | assert!(p.write(bytes.to_vec()).is_ok()); 91 | 92 | let mut p = get_invalid_mongodb(); 93 | assert!(p.init().is_err()); 94 | assert!(p.write(bytes.to_vec()).is_err()); 95 | } 96 | //TODO add more tests 97 | } 98 | -------------------------------------------------------------------------------- /replibyte/src/tasks/full_dump.rs: -------------------------------------------------------------------------------- 1 | use std::io::{Error, ErrorKind}; 2 | use std::sync::mpsc; 3 | use std::thread; 4 | 5 | use crate::datastore::Datastore; 6 | use crate::source::SourceOptions; 7 | use crate::tasks::{MaxBytes, Message, Task, TransferredBytes}; 8 | use crate::types::{to_bytes, Queries}; 9 | use crate::Source; 10 | 11 | type DataMessage = (u16, Queries); 12 | 13 | /// FullDumpTask is a wrapping struct to execute the synchronization between a *Source* and a *Datastore* 14 | pub struct FullDumpTask<'a, S> 15 | where 16 | S: Source, 17 | { 18 | source: S, 19 | datastore: Box, 20 | options: SourceOptions<'a>, 21 | } 22 | 23 | impl<'a, S> FullDumpTask<'a, S> 24 | where 25 | S: Source, 26 | { 27 | pub fn new(source: S, datastore: Box, options: SourceOptions<'a>) -> Self { 28 | FullDumpTask { 29 | source, 30 | datastore, 31 | options, 32 | } 33 | } 34 | } 35 | 36 | impl<'a, S> Task for FullDumpTask<'a, S> 37 | where 38 | S: Source, 39 | { 40 | fn run( 41 | mut self, 42 | mut progress_callback: F, 43 | ) -> Result<(), Error> { 44 | // initialize the source 45 | let _ = self.source.init()?; 46 | 47 | let (tx, rx) = mpsc::sync_channel::>(1); 48 | let datastore = self.datastore; 49 | 50 | let join_handle = thread::spawn(move || -> Result<(), Error> { 51 | // managing Datastore (S3) upload here 52 | let datastore = datastore; 53 | 54 | loop { 55 | let result = match rx.recv() { 56 | Ok(Message::Data((chunk_part, queries))) => Ok((chunk_part, queries)), 57 | Ok(Message::EOF) => break, 58 | Err(err) => Err(Error::new(ErrorKind::Other, format!("{}", err))), 59 | }; 60 | 61 | if let Ok((chunk_part, queries)) = result { 62 | let _ = match datastore.write(chunk_part, to_bytes(queries)) { 63 | Ok(_) => {} 64 | Err(err) => return Err(Error::new(ErrorKind::Other, format!("{}", err))), 65 | }; 66 | } 67 | } 68 | 69 | Ok(()) 70 | }); 71 | 72 | // buffer of 100MB in memory to use and re-use to upload data into datastore 73 | let buffer_size = 100 * 1024 * 1024; 74 | let mut queries = vec![]; 75 | let mut consumed_buffer_size = 0usize; 76 | let mut total_transferred_bytes = 0usize; 77 | let mut chunk_part = 0u16; 78 | 79 | // init progress 80 | progress_callback( 81 | total_transferred_bytes, 82 | buffer_size * (chunk_part as usize + 1), 83 | ); 84 | 85 | let _ = self.source.read(self.options, |_original_query, query| { 86 | if consumed_buffer_size + query.data().len() > buffer_size { 87 | chunk_part += 1; 88 | consumed_buffer_size = 0; 89 | // TODO .clone() - look if we do not consume more mem 90 | 91 | let message = Message::Data((chunk_part, queries.clone())); 92 | 93 | let _ = tx.send(message); // FIXME catch SendError? 94 | let _ = queries.clear(); 95 | } 96 | 97 | consumed_buffer_size += query.data().len(); 98 | total_transferred_bytes += query.data().len(); 99 | progress_callback( 100 | total_transferred_bytes, 101 | buffer_size * (chunk_part as usize + 1), 102 | ); 103 | queries.push(query); 104 | })?; 105 | 106 | progress_callback(total_transferred_bytes, total_transferred_bytes); 107 | 108 | chunk_part += 1; 109 | let _ = tx.send(Message::Data((chunk_part, queries))); 110 | let _ = tx.send(Message::EOF); 111 | // wait for end of upload execution 112 | join_handle.join().unwrap()?; 113 | 114 | Ok(()) 115 | } 116 | } 117 | -------------------------------------------------------------------------------- /replibyte/src/destination/postgres_docker.rs: -------------------------------------------------------------------------------- 1 | use crate::connector::Connector; 2 | use crate::destination::docker::{ 3 | daemon_is_running, Container, ContainerOptions, Image, DOCKER_BINARY_NAME, 4 | }; 5 | use crate::destination::Destination; 6 | use crate::types::Bytes; 7 | use crate::utils::binary_exists; 8 | use std::io::{Error, ErrorKind, Write}; 9 | 10 | const DEFAULT_POSTGRES_IMAGE: &str = "postgres"; 11 | pub const DEFAULT_POSTGRES_IMAGE_TAG: &str = "13"; 12 | pub const DEFAULT_POSTGRES_CONTAINER_PORT: u16 = 5432; 13 | pub const DEFAULT_POSTGRES_USER: &str = "postgres"; 14 | pub const DEFAULT_POSTGRES_PASSWORD: &str = "password"; 15 | pub const DEFAULT_POSTGRES_DB: &str = "postgres"; 16 | 17 | pub struct PostgresDocker { 18 | pub image: Image, 19 | pub options: ContainerOptions, 20 | pub container: Option, 21 | } 22 | 23 | impl PostgresDocker { 24 | pub fn new(tag: String, port: u16) -> Self { 25 | Self { 26 | image: Image { 27 | name: DEFAULT_POSTGRES_IMAGE.to_string(), 28 | tag, 29 | }, 30 | options: ContainerOptions { 31 | host_port: port, 32 | container_port: DEFAULT_POSTGRES_CONTAINER_PORT, 33 | }, 34 | container: None, 35 | } 36 | } 37 | } 38 | 39 | impl Connector for PostgresDocker { 40 | fn init(&mut self) -> Result<(), Error> { 41 | let _ = binary_exists(DOCKER_BINARY_NAME)?; 42 | let _ = daemon_is_running()?; 43 | 44 | let password_env = format!("POSTGRES_PASSWORD={}", DEFAULT_POSTGRES_PASSWORD); 45 | let user_env = format!("POSTGRES_USER={}", DEFAULT_POSTGRES_USER); 46 | let container = Container::new( 47 | &self.image, 48 | &self.options, 49 | vec!["-e", password_env.as_str(), "-e", user_env.as_str()], 50 | None, 51 | )?; 52 | 53 | self.container = Some(container); 54 | Ok(()) 55 | } 56 | } 57 | 58 | impl Destination for PostgresDocker { 59 | fn write(&self, data: Bytes) -> Result<(), Error> { 60 | let cmd = format!( 61 | "PGPASSWORD={} psql --username {} {}", 62 | DEFAULT_POSTGRES_PASSWORD, DEFAULT_POSTGRES_USER, DEFAULT_POSTGRES_DB 63 | ); 64 | 65 | match &self.container { 66 | Some(container) => { 67 | let mut container_exec = container.exec(&cmd)?; 68 | let _ = container_exec 69 | .stdin 70 | .take() 71 | .unwrap() 72 | .write_all(data.as_slice()); 73 | 74 | let exit_status = container_exec.wait()?; 75 | if !exit_status.success() { 76 | return Err(Error::new( 77 | ErrorKind::Other, 78 | format!("command error: {:?}", exit_status.to_string()), 79 | )); 80 | } 81 | 82 | Ok(()) 83 | } 84 | None => Err(Error::new( 85 | ErrorKind::Other, 86 | "command error: cannot retrieve container", 87 | )), 88 | } 89 | } 90 | } 91 | 92 | #[cfg(test)] 93 | mod tests { 94 | use super::PostgresDocker; 95 | use crate::connector::Connector; 96 | use crate::destination::Destination; 97 | 98 | fn get_postgres() -> PostgresDocker { 99 | PostgresDocker::new("13".to_string(), 5454) 100 | } 101 | 102 | fn get_invalid_postgres() -> PostgresDocker { 103 | PostgresDocker::new("bad_tag".to_string(), 5454) 104 | } 105 | 106 | #[test] 107 | fn connect() { 108 | let mut p = get_postgres(); 109 | let _ = p.init().expect("can't init postgres"); 110 | assert!(p.write(b"SELECT 1".to_vec()).is_ok()); 111 | 112 | // cleanup container 113 | let _ = p.container.unwrap().rm(); 114 | 115 | let mut p = get_invalid_postgres(); 116 | assert!(p.init().is_err()); 117 | assert!(p.write(b"SELECT 1".to_vec()).is_err()); 118 | } 119 | } 120 | -------------------------------------------------------------------------------- /website/docusaurus.config.js: -------------------------------------------------------------------------------- 1 | // @ts-check 2 | // Note: type annotations allow type checking and IDEs autocompletion 3 | 4 | const lightCodeTheme = require('prism-react-renderer/themes/github'); 5 | const darkCodeTheme = require('prism-react-renderer/themes/dracula'); 6 | 7 | /** @type {import('@docusaurus/types').Config} */ 8 | const config = { 9 | title: 'Replibyte', 10 | tagline: 'Seed your dev database with real data', 11 | url: 'https://www.replibyte.com', 12 | baseUrl: '/', 13 | onBrokenLinks: 'throw', 14 | onBrokenMarkdownLinks: 'warn', 15 | favicon: 'img/favicon.ico', 16 | organizationName: 'Qovery', // Usually your GitHub org/user name. 17 | projectName: 'replibyte', // Usually your repo name. 18 | 19 | plugins: [require.resolve("@cmfcmf/docusaurus-search-local")], 20 | presets: [ 21 | [ 22 | '@docusaurus/preset-classic', 23 | /** @type {import('@docusaurus/preset-classic').Options} */ 24 | ({ 25 | docs: { 26 | sidebarPath: require.resolve('./sidebars.js'), 27 | editUrl: 'https://github.com/Qovery/replibyte/tree/main/website/', 28 | remarkPlugins: [require('mdx-mermaid')], 29 | }, 30 | blog: { 31 | showReadingTime: true, 32 | editUrl: 33 | 'https://github.com/Qovery/replibyte/tree/main/website/', 34 | }, 35 | theme: { 36 | customCss: require.resolve('./src/css/custom.css'), 37 | }, 38 | }), 39 | ], 40 | ], 41 | 42 | themeConfig: 43 | /** @type {import('@docusaurus/preset-classic').ThemeConfig} */ 44 | ({ 45 | metadata: [{ 46 | name: 'keywords', content: 'seed database, postgresql, postgres, mysql, mongodb, database, preview environment' 47 | }], 48 | navbar: { 49 | title: 'Replibyte', 50 | logo: { 51 | alt: 'Replibyte Logo', 52 | src: 'img/logo.svg', 53 | }, 54 | items: [ 55 | { 56 | type: 'doc', 57 | docId: 'introduction', 58 | position: 'left', 59 | label: 'Documentation', 60 | }, 61 | { 62 | href: 'https://discord.qovery.com', 63 | label: 'Discord', 64 | position: 'left', 65 | }, 66 | { 67 | href: 'https://www.qovery.com', 68 | label: 'Replibyte Cloud ⚡️', 69 | position: 'right', 70 | }, 71 | { 72 | href: 'https://github.com/Qovery/replibyte', 73 | label: 'GitHub', 74 | position: 'right', 75 | }, 76 | ], 77 | }, 78 | footer: { 79 | //style: 'dark', 80 | links: [ 81 | { 82 | title: 'Docs', 83 | items: [ 84 | { 85 | label: 'Tutorial', 86 | to: '/docs/introduction', 87 | }, 88 | ], 89 | }, 90 | { 91 | title: 'Community', 92 | items: [ 93 | { 94 | label: 'Discord', 95 | href: 'https://discord.qovery.com', 96 | }, 97 | { 98 | label: 'Twitter', 99 | href: 'https://twitter.com/Qovery_', 100 | }, 101 | ], 102 | }, 103 | { 104 | title: 'More', 105 | items: [ 106 | { 107 | label: 'GitHub', 108 | href: 'https://github.com/Qovery/replibyte', 109 | }, 110 | { 111 | label: 'Qovery', 112 | href: 'https://www.qovery.com', 113 | }, 114 | ], 115 | }, 116 | ], 117 | copyright: `Copyright © ${new Date().getFullYear()} Replibyte by Qovery`, 118 | }, 119 | prism: { 120 | theme: lightCodeTheme, 121 | darkTheme: darkCodeTheme, 122 | additionalLanguages: ['rust', 'yaml', 'bash'] 123 | }, 124 | colorMode: { 125 | defaultMode: 'dark' 126 | } 127 | }), 128 | }; 129 | 130 | module.exports = config; 131 | -------------------------------------------------------------------------------- /replibyte/src/destination/postgres.rs: -------------------------------------------------------------------------------- 1 | use std::io::{Error, ErrorKind, Write}; 2 | use std::process::{Command, Stdio}; 3 | 4 | use crate::connector::Connector; 5 | use crate::destination::Destination; 6 | use crate::types::Bytes; 7 | use crate::utils::{binary_exists, wait_for_command}; 8 | 9 | pub struct Postgres<'a> { 10 | host: &'a str, 11 | port: u16, 12 | database: &'a str, 13 | username: &'a str, 14 | password: &'a str, 15 | wipe_database: bool, 16 | } 17 | 18 | impl<'a> Postgres<'a> { 19 | pub fn new( 20 | host: &'a str, 21 | port: u16, 22 | database: &'a str, 23 | username: &'a str, 24 | password: &'a str, 25 | wipe_database: bool, 26 | ) -> Self { 27 | Postgres { 28 | host, 29 | port, 30 | database, 31 | username, 32 | password, 33 | wipe_database, 34 | } 35 | } 36 | } 37 | 38 | impl<'a> Connector for Postgres<'a> { 39 | fn init(&mut self) -> Result<(), Error> { 40 | let _ = binary_exists("psql")?; 41 | 42 | if self.wipe_database { 43 | let s_port = self.port.to_string(); 44 | let wipe_db_query = wipe_database_query(self.username); 45 | 46 | let exit_status = Command::new("psql") 47 | .env("PGPASSWORD", self.password) 48 | .args([ 49 | "-h", 50 | self.host, 51 | "-p", 52 | s_port.as_str(), 53 | "-d", 54 | self.database, 55 | "-U", 56 | self.username, 57 | "-c", 58 | wipe_db_query.as_str(), 59 | ]) 60 | .stdout(Stdio::null()) 61 | .spawn()? 62 | .wait()?; 63 | 64 | if !exit_status.success() { 65 | return Err(Error::new( 66 | ErrorKind::Other, 67 | format!("command error: {:?}", exit_status.to_string()), 68 | )); 69 | } 70 | } 71 | 72 | Ok(()) 73 | } 74 | } 75 | 76 | impl<'a> Destination for Postgres<'a> { 77 | fn write(&self, data: Bytes) -> Result<(), Error> { 78 | let s_port = self.port.to_string(); 79 | 80 | let mut process = Command::new("psql") 81 | .env("PGPASSWORD", self.password) 82 | .args([ 83 | "-h", 84 | self.host, 85 | "-p", 86 | s_port.as_str(), 87 | "-d", 88 | self.database, 89 | "-U", 90 | self.username, 91 | ]) 92 | .stdin(Stdio::piped()) 93 | .stdout(Stdio::null()) 94 | .spawn()?; 95 | 96 | let _ = process.stdin.take().unwrap().write_all(data.as_slice()); 97 | 98 | wait_for_command(&mut process) 99 | } 100 | } 101 | 102 | fn wipe_database_query(username: &str) -> String { 103 | format!( 104 | "\ 105 | DROP SCHEMA public CASCADE; \ 106 | CREATE SCHEMA public; \ 107 | GRANT ALL ON SCHEMA public TO \"{}\"; \ 108 | GRANT ALL ON SCHEMA public TO public;\ 109 | ", 110 | username 111 | ) 112 | } 113 | 114 | #[cfg(test)] 115 | mod tests { 116 | use crate::connector::Connector; 117 | use crate::destination::postgres::Postgres; 118 | use crate::destination::Destination; 119 | 120 | fn get_postgres() -> Postgres<'static> { 121 | Postgres::new("localhost", 5453, "root", "root", "password", true) 122 | } 123 | 124 | fn get_invalid_postgres() -> Postgres<'static> { 125 | Postgres::new("localhost", 5453, "root", "root", "wrongpassword", true) 126 | } 127 | 128 | #[test] 129 | fn connect() { 130 | let mut p = get_postgres(); 131 | let _ = p.init().expect("can't init postgres"); 132 | assert!(p.write(b"SELECT 1".to_vec()).is_ok()); 133 | 134 | let mut p = get_invalid_postgres(); 135 | assert!(p.init().is_err()); 136 | assert!(p.write(b"SELECT 1".to_vec()).is_err()); 137 | } 138 | 139 | #[test] 140 | fn test_inserts() {} 141 | } 142 | -------------------------------------------------------------------------------- /website/docs/introduction.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: Introduction 3 | description: Replibyte is a tool to seed your development database with your production data while keeping sensitive data safe 4 | hide_title: true 5 | sidebar_position: 1 6 | --- 7 | 8 | import ThemedImage from '@theme/ThemedImage'; 9 | import useBaseUrl from "@docusaurus/useBaseUrl"; 10 | 11 | 18 | 19 | Replibyte is a blazingly fast tool to seed your databases with your production data while keeping sensitive data safe ⚡️ 20 | 21 | [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) 22 | ![stability badge](https://img.shields.io/badge/stability-stable-green.svg?style=flat-square) 23 | ![build and tests badge](https://github.com/Qovery/replibyte/actions/workflows/build-and-test.yml/badge.svg?style=flat-square) 24 | [![discord server](https://img.shields.io/discord/688766934917185556?label=discord&style=flat-square)](https://discord.qovery.com) 25 | 26 | ## Prerequisites 27 | 28 | - MacOSX / Linux / Windows 29 | - Nothing more! Replibyte is stateless and does not require anything special. 30 | 31 | ## Usage 32 | 33 | Create a dump 34 | 35 | ```shell 36 | replibyte -c conf.yaml dump create 37 | ``` 38 | 39 | List all dumps 40 | 41 | ```shell 42 | replibyte -c conf.yaml dump list 43 | 44 | type name size when compressed encrypted 45 | PostgreSQL dump-1647706359405 154MB Yesterday at 03:00 am true true 46 | PostgreSQL dump-1647731334517 152MB 2 days ago at 03:00 am true true 47 | PostgreSQL dump-1647734369306 149MB 3 days ago at 03:00 am true true 48 | ``` 49 | 50 | Restore the latest dump in a local container 51 | 52 | ```shell 53 | replibyte -c conf.yaml dump restore local -v latest -i postgres -p 5432 54 | ``` 55 | 56 | Restore the latest dump in a remote database 57 | 58 | ```shell 59 | replibyte -c conf.yaml dump restore remote -v latest 60 | ``` 61 | 62 | ## Features 63 | 64 | - [x] Support data backup and restore for PostgreSQL, MySQL and MongoDB 65 | - [x] Replace sensitive data with fake data 66 | - [x] Works on large database (> 10GB) 67 | - [x] Database Subsetting: Scale down a production database to a more reasonable size 🔥 68 | - [x] Start a local database with the prod data in a single command 🔥 69 | - [x] On-the-fly data (de)compression (Zlib) 70 | - [x] On-the-fly data de/encryption (AES-256) 71 | - [x] Fully stateless (no server, no daemon) and lightweight binary 🍃 72 | - [x] Use custom transformers 73 | 74 | Here are the features we plan to support 75 | 76 | - [ ] Auto-detect and version database schema change 77 | - [ ] Auto-detect sensitive fields 78 | - [ ] Auto-clean backed up data 79 | 80 | ## Getting Started 81 | 82 | * [How Replibyte works](/docs/how-replibyte-works) 83 | * Initial setup: 84 | 1. [Install](/docs/getting-started/installation) 85 | 2. [Configure](/docs/getting-started/configuration) 86 | * Step-by-step guides: 87 | 1. [Create a dump](/docs/guides/create-a-dump) 88 | 2. [Restore a dump](/docs/guides/restore-a-dump) 89 | 3. [Subset a dump](/docs/guides/subset-a-dump) 90 | 4. [Delete a dump](/docs/guides/delete-a-dump) 91 | 5. Deploy Replibyte 92 | 1. [Container](https://www.replibyte.com/docs/guides/deploy-replibyte/container) 93 | 2. [Qovery](https://www.replibyte.com/docs/guides/deploy-replibyte/qovery) 94 | 95 | ## Demo 96 | 97 | [![What is RepliByte](../../assets/video_.png)](https://www.youtube.com/watch?v=IKeLnZvECQw) 98 | 99 | ## Motivation 100 | 101 | At [Qovery](https://www.qovery.com) (the company behind Replibyte), developers can clone their applications and databases just with one 102 | click. However, the cloning process can be tedious and time-consuming, and we end up copying the information multiple times. With RepliByte, 103 | the Qovery team wants to provide a comprehensive way to seed cloud databases from one place to another. 104 | 105 | The long-term motivation behind RepliByte is to provide a way to clone any database in real-time. This project starts small, but has big 106 | ambition! 107 | -------------------------------------------------------------------------------- /replibyte/src/transformer/email.rs: -------------------------------------------------------------------------------- 1 | use crate::transformer::Transformer; 2 | use crate::types::Column; 3 | use fake::faker::internet::raw::SafeEmail; 4 | use fake::locales::EN; 5 | use fake::Fake; 6 | 7 | /// This struct is dedicated to replacing a string by an email address. 8 | pub struct EmailTransformer { 9 | database_name: String, 10 | table_name: String, 11 | column_name: String, 12 | } 13 | 14 | impl EmailTransformer { 15 | pub fn new(database_name: S, table_name: S, column_name: S) -> Self 16 | where 17 | S: Into, 18 | { 19 | EmailTransformer { 20 | database_name: database_name.into(), 21 | table_name: table_name.into(), 22 | column_name: column_name.into(), 23 | } 24 | } 25 | } 26 | 27 | impl Default for EmailTransformer { 28 | fn default() -> Self { 29 | EmailTransformer { 30 | database_name: String::default(), 31 | table_name: String::default(), 32 | column_name: String::default(), 33 | } 34 | } 35 | } 36 | 37 | impl Transformer for EmailTransformer { 38 | fn id(&self) -> &str { 39 | "email" 40 | } 41 | 42 | fn description(&self) -> &str { 43 | "Generate an email address (string only). [john.doe@company.com]->[tony.stark@avengers.com]" 44 | } 45 | 46 | fn database_name(&self) -> &str { 47 | self.database_name.as_str() 48 | } 49 | 50 | fn table_name(&self) -> &str { 51 | self.table_name.as_str() 52 | } 53 | 54 | fn column_name(&self) -> &str { 55 | self.column_name.as_str() 56 | } 57 | 58 | fn transform(&self, column: Column) -> Column { 59 | match column { 60 | Column::StringValue(column_name, value) => { 61 | let new_value = match value.len() { 62 | len if len == 0 => value, 63 | _ => SafeEmail(EN).fake(), 64 | }; 65 | 66 | Column::StringValue(column_name, new_value) 67 | } 68 | column => column, 69 | } 70 | } 71 | } 72 | 73 | #[cfg(test)] 74 | mod tests { 75 | use crate::{transformer::Transformer, types::Column}; 76 | 77 | use super::EmailTransformer; 78 | 79 | #[test] 80 | fn transform_email_with_number_value() { 81 | let expected_value = 34; 82 | let transformer = get_transformer(); 83 | let column = Column::NumberValue("email".to_string(), expected_value); 84 | let transformed_column = transformer.transform(column); 85 | let transformed_value = transformed_column.number_value().unwrap(); 86 | 87 | assert_eq!(transformed_value.to_owned(), expected_value) 88 | } 89 | 90 | #[test] 91 | fn transform_email_with_float_value() { 92 | let expected_value = 1.5; 93 | let transformer = get_transformer(); 94 | let column = Column::FloatNumberValue("email".to_string(), expected_value); 95 | let transformed_column = transformer.transform(column); 96 | let transformed_value = transformed_column.float_number_value().unwrap(); 97 | 98 | assert_eq!(transformed_value.to_owned(), expected_value) 99 | } 100 | 101 | #[test] 102 | fn transform_email_with_empty_string_value() { 103 | let expected_value = ""; 104 | let transformer = get_transformer(); 105 | let column = Column::StringValue("email".to_string(), expected_value.to_string()); 106 | let transformed_column = transformer.transform(column); 107 | let transformed_value = transformed_column.string_value().unwrap(); 108 | 109 | assert_eq!(transformed_value, expected_value) 110 | } 111 | 112 | #[test] 113 | fn transform_email_with_string_value() { 114 | let transformer = get_transformer(); 115 | let column = Column::StringValue("email".to_string(), "john.doe@company.com".to_string()); 116 | let transformed_column = transformer.transform(column); 117 | let transformed_value = transformed_column.string_value().unwrap(); 118 | 119 | assert!(!transformed_value.is_empty()); 120 | assert_ne!(transformed_value, "john.doe@company.com".to_string()); 121 | } 122 | 123 | fn get_transformer() -> EmailTransformer { 124 | EmailTransformer::new("github", "users", "email") 125 | } 126 | } 127 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

replibyte logo

2 | 3 |

Seed Your Development Database With Real Data ⚡️

4 |

Replibyte is a blazingly fast tool to seed your databases with your production data while keeping sensitive data safe 🔥

5 | 6 |

7 | MIT License 8 | stable badge 9 | stable badge 10 | Build and Tests 11 | Discord 12 |

13 | 14 |

15 | 16 | ROSS Index - Fastest Growing Open-Source Startups in Q3 2022 | Runa Capital 17 | 18 | 19 |

20 | 21 | ## Prerequisites 22 | 23 | - MacOSX / Linux / Windows 24 | - Nothing more! Replibyte is stateless and does not require anything special. 25 | 26 | ## Usage 27 | 28 | Create a dump 29 | 30 | ```shell 31 | replibyte -c conf.yaml dump create 32 | ``` 33 | 34 | List all dumps 35 | 36 | ```shell 37 | replibyte -c conf.yaml dump list 38 | 39 | type name size when compressed encrypted 40 | PostgreSQL dump-1647706359405 154MB Yesterday at 03:00 am true true 41 | PostgreSQL dump-1647731334517 152MB 2 days ago at 03:00 am true true 42 | PostgreSQL dump-1647734369306 149MB 3 days ago at 03:00 am true true 43 | ``` 44 | 45 | Restore the latest dump in a local container 46 | 47 | ```shell 48 | replibyte -c conf.yaml dump restore local -v latest -i postgres -p 5432 49 | ``` 50 | 51 | Restore the latest dump in a remote database 52 | 53 | ```shell 54 | replibyte -c conf.yaml dump restore remote -v latest 55 | ``` 56 | 57 | ## Features 58 | 59 | - [x] Support data dump and restore for PostgreSQL, MySQL and MongoDB 60 | - [x] Analyze your data schema 🔎 61 | - [x] Replace sensitive data with fake data 62 | - [x] Works on large database (> 10GB) 63 | - [x] Database Subsetting: Scale down a production database to a more reasonable size 🔥 64 | - [x] Start a local database with the production data in a single command 🔥 65 | - [x] On-the-fly data (de)compression (Zlib) 66 | - [x] On-the-fly data de/encryption (AES-256) 67 | - [x] Fully stateless (no server, no daemon) and lightweight binary 🍃 68 | - [x] Use [custom transformers](examples/wasm) 69 | 70 | Here are the features we plan to support 71 | 72 | - [ ] Auto-detect and version database schema change 73 | - [ ] Auto-detect sensitive fields 74 | - [ ] Auto-clean backed up data 75 | 76 | ## Getting Started 77 | 78 | 1. [How Replibyte works](https://www.replibyte.com/docs/how-replibyte-works) 79 | 2. Initial setup: 80 | 1. [Install](https://www.replibyte.com/docs/getting-started/installation) 81 | 2. [Configure](https://www.replibyte.com/docs/getting-started/configuration) 82 | 3. Step-by-step guides: 83 | 1. [Create a dump](https://www.replibyte.com/docs/guides/create-a-dump) 84 | 2. [Restore a dump](https://www.replibyte.com/docs/guides/restore-a-dump) 85 | 3. [Subset a dump](https://www.replibyte.com/docs/guides/subset-a-dump) 86 | 4. [Delete a dump](https://www.replibyte.com/docs/guides/delete-a-dump) 87 | 5. Deploy Replibyte 88 | 1. [Container](https://www.replibyte.com/docs/guides/deploy-replibyte/container) 89 | 2. [Qovery](https://www.replibyte.com/docs/guides/deploy-replibyte/qovery) 90 | 91 | ## Demo 92 | 93 | [![What is RepliByte](assets/video_.png)](https://www.youtube.com/watch?v=IKeLnZvECQw) 94 | 95 | ## Contributing 96 | 97 | Check [here](https://www.replibyte.com/docs/contributing). 98 | 99 | ## Thanks 100 | 101 | Thanks to all people sharing their ideas to make Replibyte better. We do appreciate it. I would also thank [AirByte](https://airbyte.com/), 102 | a great product and a trustworthy source of inspiration for this project. 103 | 104 | --- 105 | 106 | Replibyte is initiated and maintained by [Qovery](https://www.qovery.com?ref=replibyte-readme). 107 | -------------------------------------------------------------------------------- /replibyte/src/cli.rs: -------------------------------------------------------------------------------- 1 | use std::path::PathBuf; 2 | 3 | use clap::{Args, Parser, Subcommand}; 4 | 5 | /// Replibyte is a tool to seed your databases with your production data while keeping sensitive data safe, just pass `-h` 6 | #[derive(Parser, Debug)] 7 | #[clap(version, about, long_about = None)] 8 | #[clap(propagate_version = true)] 9 | pub struct CLI { 10 | /// Replibyte configuration file 11 | #[clap(short, long, parse(from_os_str), value_name = "configuration file")] 12 | pub config: PathBuf, 13 | #[clap(subcommand)] 14 | pub sub_commands: SubCommand, 15 | /// disable telemetry 16 | #[clap(short, long)] 17 | pub no_telemetry: bool, 18 | } 19 | 20 | /// sub commands 21 | #[derive(Subcommand, Debug)] 22 | pub enum SubCommand { 23 | /// all dump commands 24 | #[clap(subcommand)] 25 | Dump(DumpCommand), 26 | /// all source commands 27 | #[clap(subcommand)] 28 | Source(SourceCommand), 29 | /// all transformer commands 30 | #[clap(subcommand)] 31 | Transformer(TransformerCommand), 32 | } 33 | 34 | /// all dump commands 35 | #[derive(Subcommand, Debug)] 36 | pub enum DumpCommand { 37 | /// list available dumps 38 | List, 39 | /// launch dump -- use `-h` to show all the options 40 | Create(DumpCreateArgs), 41 | /// all restore commands 42 | #[clap(subcommand)] 43 | Restore(RestoreCommand), 44 | /// delete a dump from the defined datastore 45 | Delete(DumpDeleteArgs), 46 | } 47 | 48 | /// all transformer commands 49 | #[derive(Subcommand, Debug)] 50 | pub enum TransformerCommand { 51 | /// list available transformers 52 | List, 53 | } 54 | 55 | /// all restore commands 56 | #[derive(Subcommand, Debug)] 57 | pub enum RestoreCommand { 58 | /// Restore dump inside a local Docker container 59 | Local(RestoreLocalArgs), 60 | /// Restore dump inside the configured destination 61 | Remote(RestoreArgs), 62 | } 63 | 64 | /// all restore commands 65 | #[derive(Args, Debug)] 66 | pub struct RestoreArgs { 67 | /// restore dump -- set `latest` or `` - use `dump list` command to list all dumps available 68 | #[clap(short, long, value_name = "[latest | dump name]")] 69 | pub value: String, 70 | /// stream output on stdout 71 | #[clap(short, long)] 72 | pub output: bool, 73 | } 74 | 75 | /// restore dump in a local Docker container 76 | #[derive(Args, Debug)] 77 | pub struct RestoreLocalArgs { 78 | /// restore dump -- set `latest` or `` - use `dump list` command to list all dumps available 79 | #[clap(short, long, value_name = "[latest | dump name]")] 80 | pub value: String, 81 | /// stream output on stdout 82 | #[clap(short, long)] 83 | pub output: bool, 84 | /// Docker image tag for the container to spawn 85 | #[clap(short, long)] 86 | pub tag: Option, 87 | /// Docker container port to map on the host 88 | #[clap(short, long)] 89 | pub port: Option, 90 | /// Remove the Docker container on Ctrl-c 91 | #[clap(short, long)] 92 | pub remove: bool, 93 | /// Docker image type 94 | #[clap(short, long, value_name = "[postgresql | mysql | mongodb]")] 95 | pub image: Option, 96 | } 97 | 98 | /// all dump run commands 99 | #[derive(Args, Debug)] 100 | pub struct DumpCreateArgs { 101 | #[clap(name = "source_type", short, long, value_name = "[postgresql | mysql | mongodb]", possible_values = &["postgresql", "mysql", "mongodb"], requires = "input")] 102 | /// database source type to import 103 | pub source_type: Option, 104 | /// import dump from stdin 105 | #[clap(name = "input", short, long, requires = "source_type")] 106 | pub input: bool, 107 | #[clap(short, long, parse(from_os_str), value_name = "dump file")] 108 | /// dump file 109 | pub file: Option, 110 | /// dump name 111 | #[clap(short, long)] 112 | pub name: Option, 113 | } 114 | 115 | #[derive(Args, Debug)] 116 | #[clap(group = clap::ArgGroup::new("delete-mode").multiple(false))] 117 | pub struct DumpDeleteArgs { 118 | /// Name of the dump to delete 119 | #[clap(group = "delete-mode")] 120 | pub dump: Option, 121 | /// Remove all dumps older than the specified number of days. Example: `14d` for deleting dumps older than 14 days 122 | #[clap(long, group = "delete-mode")] 123 | pub older_than: Option, 124 | /// Keep only the last N dumps 125 | #[clap(long, group = "delete-mode")] 126 | pub keep_last: Option, 127 | } 128 | 129 | /// all source commands 130 | #[derive(Subcommand, Debug)] 131 | pub enum SourceCommand { 132 | /// Show the database schema. When used with MongoDB, the schema will be probabilistic and returned as a JSON document 133 | Schema, 134 | } 135 | -------------------------------------------------------------------------------- /replibyte/src/destination/docker.rs: -------------------------------------------------------------------------------- 1 | use std::io::{Error, ErrorKind}; 2 | use std::process::{Child, Command, Stdio}; 3 | use std::thread; 4 | use std::time::Duration; 5 | 6 | pub const DOCKER_BINARY_NAME: &str = "docker"; 7 | 8 | pub struct Image { 9 | pub name: String, 10 | pub tag: String, 11 | } 12 | 13 | pub struct ContainerOptions { 14 | pub host_port: u16, 15 | pub container_port: u16, 16 | } 17 | 18 | pub struct Container { 19 | pub id: String, 20 | } 21 | 22 | impl Container { 23 | pub fn new( 24 | image: &Image, 25 | options: &ContainerOptions, 26 | args: Vec<&str>, 27 | command: Option>, 28 | ) -> Result { 29 | let port_mapping = format!("{}:{}", options.host_port, options.container_port); 30 | let image_version = format!("{}:{}", image.name, image.tag); 31 | let mut run_args = vec!["run", "-p", port_mapping.as_str()]; 32 | 33 | for arg in args { 34 | run_args.push(arg); 35 | } 36 | 37 | run_args.push("-d"); 38 | run_args.push(image_version.as_str()); 39 | 40 | if let Some(command) = command { 41 | for arg in command { 42 | run_args.push(arg); 43 | } 44 | } 45 | 46 | let output = Command::new(DOCKER_BINARY_NAME).args(run_args).output()?; 47 | 48 | // FIX: this is a workaround to wait until the container is up 49 | thread::sleep(Duration::from_millis(20_000)); 50 | 51 | match output.status.success() { 52 | true => match String::from_utf8(output.stdout) { 53 | Ok(container_id) => Ok(Container { id: container_id }), 54 | Err(err) => Err(Error::new(ErrorKind::Other, format!("{}", err))), 55 | }, 56 | false => match String::from_utf8(output.stderr) { 57 | Ok(stderr) => Err(Error::new(ErrorKind::Other, format!("{}", stderr))), 58 | Err(err) => Err(Error::new(ErrorKind::Other, format!("{}", err))), 59 | }, 60 | } 61 | } 62 | 63 | pub fn stop(&self) -> Result<(), Error> { 64 | let _process = Command::new(DOCKER_BINARY_NAME) 65 | .args(["stop", &self.id[..12]]) 66 | .stdout(Stdio::null()) 67 | .spawn()?; 68 | 69 | Ok(()) 70 | } 71 | 72 | pub fn rm(&self) -> Result<(), Error> { 73 | let _process = Command::new(DOCKER_BINARY_NAME) 74 | .args(["rm", "-f", &self.id[..12]]) 75 | .stdout(Stdio::null()) 76 | .spawn()?; 77 | 78 | // TODO: should I drop the struct? 79 | drop(&self); 80 | 81 | Ok(()) 82 | } 83 | 84 | pub fn exec(&self, cmd: &str) -> Result { 85 | Command::new(DOCKER_BINARY_NAME) 86 | .args(["exec", "-i", &self.id[..12], "/bin/bash", "-c", cmd]) 87 | .stdin(Stdio::piped()) 88 | .stdout(Stdio::piped()) 89 | .spawn() 90 | } 91 | } 92 | 93 | /// checks if the `dockerd` daemon runs 94 | pub fn daemon_is_running() -> Result<(), Error> { 95 | let mut process = Command::new(DOCKER_BINARY_NAME) 96 | .args(["ps"]) 97 | .stdout(Stdio::null()) 98 | .spawn()?; 99 | 100 | match process.wait() { 101 | Ok(exit_status) => { 102 | if exit_status.success() { 103 | Ok(()) 104 | } else { 105 | Err(Error::new( 106 | ErrorKind::Other, 107 | format!( 108 | "cannot connect to the Docker daemon: exit_status {}", 109 | exit_status.to_string() 110 | ), 111 | )) 112 | } 113 | } 114 | Err(err) => Err(Error::new( 115 | ErrorKind::Other, 116 | format!("cannot connect to the Docker daemon: {}", err), 117 | )), 118 | } 119 | } 120 | 121 | #[cfg(test)] 122 | mod tests { 123 | use super::{Container, ContainerOptions, Image}; 124 | 125 | #[test] 126 | fn handle_containers() { 127 | let image = Image { 128 | name: "postgres".to_string(), 129 | tag: "13".to_string(), 130 | }; 131 | 132 | let options = ContainerOptions { 133 | host_port: 5433, 134 | container_port: 5432, 135 | }; 136 | 137 | let args = vec![ 138 | "-e", 139 | "POSTGRES_PASSWORD=password", 140 | "-e", 141 | "POSTGRES_USER=root", 142 | ]; 143 | 144 | let container = Container::new(&image, &options, args, None).unwrap(); 145 | 146 | assert!(container.id != "".to_string()); 147 | assert!(container.stop().is_ok()); 148 | assert!(container.rm().is_ok()); 149 | } 150 | } 151 | -------------------------------------------------------------------------------- /replibyte/src/transformer/first_name.rs: -------------------------------------------------------------------------------- 1 | use crate::transformer::Transformer; 2 | use crate::types::Column; 3 | use fake::faker::name::raw::FirstName; 4 | use fake::locales::EN; 5 | use fake::Fake; 6 | 7 | /// This struct is dedicated to replacing string by a first name. 8 | pub struct FirstNameTransformer { 9 | database_name: String, 10 | table_name: String, 11 | column_name: String, 12 | } 13 | 14 | impl FirstNameTransformer { 15 | pub fn new(database_name: S, table_name: S, column_name: S) -> Self 16 | where 17 | S: Into, 18 | { 19 | FirstNameTransformer { 20 | database_name: database_name.into(), 21 | table_name: table_name.into(), 22 | column_name: column_name.into(), 23 | } 24 | } 25 | } 26 | 27 | impl Default for FirstNameTransformer { 28 | fn default() -> Self { 29 | FirstNameTransformer { 30 | database_name: String::default(), 31 | table_name: String::default(), 32 | column_name: String::default(), 33 | } 34 | } 35 | } 36 | 37 | impl Transformer for FirstNameTransformer { 38 | fn id(&self) -> &str { 39 | "first-name" 40 | } 41 | 42 | fn description(&self) -> &str { 43 | "Generate a first name (string only). [Lucas]->[Georges]" 44 | } 45 | 46 | fn database_name(&self) -> &str { 47 | self.database_name.as_str() 48 | } 49 | 50 | fn table_name(&self) -> &str { 51 | self.table_name.as_str() 52 | } 53 | 54 | fn column_name(&self) -> &str { 55 | self.column_name.as_str() 56 | } 57 | 58 | fn transform(&self, column: Column) -> Column { 59 | match column { 60 | Column::NumberValue(column_name, value) => Column::NumberValue(column_name, value), 61 | Column::FloatNumberValue(column_name, value) => { 62 | Column::FloatNumberValue(column_name, value) 63 | } 64 | Column::StringValue(column_name, value) => { 65 | let new_value = if value == "" { 66 | "".to_string() 67 | } else { 68 | FirstName(EN).fake() 69 | }; 70 | 71 | Column::StringValue(column_name, new_value) 72 | } 73 | Column::CharValue(column_name, value) => Column::CharValue(column_name, value), 74 | Column::BooleanValue(column_name, value) => Column::BooleanValue(column_name, value), 75 | Column::None(column_name) => Column::None(column_name), 76 | } 77 | } 78 | } 79 | 80 | #[cfg(test)] 81 | mod tests { 82 | use crate::{transformer::Transformer, types::Column}; 83 | 84 | use super::FirstNameTransformer; 85 | 86 | #[test] 87 | fn transform_first_name_with_number_value() { 88 | let expected_value = 34; 89 | let transformer = get_transformer(); 90 | let column = Column::NumberValue("first_name".to_string(), expected_value); 91 | let transformed_column = transformer.transform(column); 92 | let transformed_value = transformed_column.number_value().unwrap(); 93 | 94 | assert_eq!(transformed_value.to_owned(), expected_value) 95 | } 96 | 97 | #[test] 98 | fn transform_first_name_with_float_value() { 99 | let expected_value = 1.5; 100 | let transformer = get_transformer(); 101 | let column = Column::FloatNumberValue("first_name".to_string(), expected_value); 102 | let transformed_column = transformer.transform(column); 103 | let transformed_value = transformed_column.float_number_value().unwrap(); 104 | 105 | assert_eq!(transformed_value.to_owned(), expected_value) 106 | } 107 | 108 | #[test] 109 | fn transform_first_name_with_empty_string_value() { 110 | let expected_value = ""; 111 | let transformer = get_transformer(); 112 | let column = Column::StringValue("first_name".to_string(), expected_value.to_string()); 113 | let transformed_column = transformer.transform(column); 114 | let transformed_value = transformed_column.string_value().unwrap(); 115 | 116 | assert_eq!(transformed_value, expected_value) 117 | } 118 | 119 | #[test] 120 | fn transform_first_name_with_string_value() { 121 | let transformer = get_transformer(); 122 | let column = Column::StringValue("first_name".to_string(), "Lucas".to_string()); 123 | let transformed_column = transformer.transform(column); 124 | let transformed_value = transformed_column.string_value().unwrap(); 125 | 126 | assert!(!transformed_value.is_empty()); 127 | assert_ne!(transformed_value, "Lucas".to_string()); 128 | } 129 | 130 | fn get_transformer() -> FirstNameTransformer { 131 | FirstNameTransformer::new("github", "users", "first_name") 132 | } 133 | } 134 | -------------------------------------------------------------------------------- /website/docs/datastores.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | sidebar_position: 6 3 | --- 4 | 5 | # Datastores 6 | 7 | Replibyte supports multiple datastores like AWS S3, GCP Cloud Storage (any other S3 compatible service), and your local disk. 8 | 9 | ## AWS S3 10 | 11 | You can use the default ways of configuring AWS credentials, but if you need to specify customized credentials or a profile 12 | to use you can do so via the datastore configuration. 13 | 14 | ### Generate API Keys 15 | 16 | To generate your API keys: 17 | 18 | ![](/img/datastore/aws/01.png) 19 | 20 | 1. Sign in into your AWS console 21 | 2. Click on **IAM** or search for **IAM** 22 | 3. Select the **Users** page. 23 | 4. Click on **Add users**. 24 | 25 | 5. Create a user with "**Access Key - Programmatic Access**" 26 | ![](/img/datastore/aws/02.png) 27 | 28 | :::caution 29 | 30 | Restrict access as much as possible. Replibyte just need write/read access within a dedicated bucket. 31 | 32 | ::: 33 | 34 | 6. Set the permissions you need for this user. 35 | ![](/img/datastore/aws/03.png) 36 | 37 | Here's an example IAM Policy for a user to be able to read/write to a specific bucket. 38 | ```json 39 | { 40 | "Version": "2012-10-17", 41 | "Statement": [ 42 | { 43 | "Sid": "VisualEditor0", 44 | "Effect": "Allow", 45 | "Action": [ 46 | "s3:Get*", 47 | "s3:List*", 48 | "s3:Put*" 49 | ], 50 | "Resource": [ 51 | "arn:aws:s3:::your-bucket-name-here", 52 | "arn:aws:s3:::your-bucket-name-here/*" 53 | ] 54 | } 55 | ] 56 | } 57 | ``` 58 | 59 | 7. Copy the **Access key** and the **Secret** 60 | 61 | ### Replibyte configuration 62 | 63 | Here is the datastore configuration to use: 64 | 65 | ```yaml 66 | ... 67 | datastore: 68 | aws: 69 | bucket: 70 | profile: # optional 71 | region: # optional 72 | credentials: # optional 73 | access_key_id: XXX 74 | secret_access_key: XXX 75 | session_token: XXX # optional 76 | ... 77 | ``` 78 | 79 | You can omit the optional properties in which case the default configuration mechanisms will be used, like with the AWS CLI. 80 | 81 | ## GCP Cloud Storage 82 | 83 | ### Generate API Keys 84 | 85 | To generate your API keys compatible with the S3 protocol: 86 | 87 | ![Screenshot to configure GCP cloud storage](/img/datastore/gcp/01.jpg) 88 | 89 | 1. Sign in into your GCP console 90 | 2. Click on the Cloud Storage 91 | 3. Click on the **INTEROPERABILITY** tab 92 | 4. Click on **CREATE A KEY** 93 | 5. Copy the **Access key** and the **Secret** 94 | 95 | ### Create bucket 96 | 97 | Replibyte does not create the bucket automatically for GCP Cloud Storage. You'll need to create it manually. 98 | 99 | :::caution 100 | 101 | Do not turn on object "versioning". 102 | 103 | ::: 104 | 105 | ### Replibyte configuration 106 | 107 | Here is the datastore configuration to use: 108 | 109 | ```yaml 110 | ... 111 | datastore: 112 | gcp: 113 | bucket: your-bucket-name 114 | region: us-central1 115 | access_key: $GS_ACCESS_KEY 116 | secret: $GS_SECRET 117 | ... 118 | ``` 119 | 120 | You can find the GCP Cloud Storage bucket locations [here](https://cloud.google.com/storage/docs/locations). 121 | 122 | ## Other S3 compatible 123 | 124 | Refer to [AWS S3](#aws-s3) for the default S3 wire compatible protocol and the custom endpoint parameter: 125 | 126 | ```yaml 127 | ... 128 | datastore: 129 | aws: 130 | bucket: 131 | region: 132 | credentials: 133 | access_key_id: XXX 134 | secret_access_key: XXX 135 | endpoint: 136 | custom: 'https://your-s3-compatible-endpoint' 137 | ... 138 | ``` 139 | 140 | `access_key_id` and `secret_access_key` must be valid hash-based message authentication code (HMAC) keys. Refer to the service to use to get those keys. 141 | 142 | ## Local disk 143 | 144 | ### Create a directory 145 | 146 | Replibyte does not create the directory automatically for you. You'll need to create it manually. 147 | 148 | For example, you can create a directory by running: 149 | 150 | ```sh 151 | mkdir /data/replibyte 152 | ``` 153 | 154 | ### Replibyte configuration 155 | 156 | Here is the datastore configuration to use: 157 | 158 | ```yaml 159 | ... 160 | datastore: 161 | local_disk: 162 | dir: 163 | ... 164 | ``` 165 | 166 | `dir` must be a readable and writable directory to the user running `replibyte`. 167 | 168 | So, to use our previously created `/data/replibyte` directory, the datastore config must be: 169 | 170 | ```yaml 171 | ... 172 | datastore: 173 | local_disk: 174 | dir: /data/replibyte 175 | ... 176 | ``` 177 | 178 | ## Add another datastore 179 | 180 | Do you need another datastore? Replibyte is extensible and any datastore can be supported. You are free to contribute by opening an issue or/and a pull request. 181 | 182 | To contribute, please see the [contributing](/docs/contributing) page. 183 | -------------------------------------------------------------------------------- /replibyte/src/destination/mongodb_docker.rs: -------------------------------------------------------------------------------- 1 | use crate::connector::Connector; 2 | use crate::destination::docker::{ 3 | daemon_is_running, Container, ContainerOptions, Image, DOCKER_BINARY_NAME, 4 | }; 5 | use crate::destination::Destination; 6 | use crate::types::Bytes; 7 | use crate::utils::binary_exists; 8 | use std::io::{Error, ErrorKind, Write}; 9 | 10 | const DEFAULT_MONGO_IMAGE: &str = "mongo"; 11 | pub const DEFAULT_MONGO_IMAGE_TAG: &str = "5"; 12 | pub const DEFAULT_MONGO_CONTAINER_PORT: u16 = 27017; 13 | const DEFAULT_MONGO_USER: &str = "root"; 14 | const DEFAULT_MONGO_PASSWORD: &str = "password"; 15 | 16 | pub struct MongoDBDocker { 17 | pub image: Image, 18 | pub options: ContainerOptions, 19 | pub container: Option, 20 | } 21 | 22 | impl MongoDBDocker { 23 | pub fn new(tag: String, port: u16) -> Self { 24 | Self { 25 | image: Image { 26 | name: DEFAULT_MONGO_IMAGE.to_string(), 27 | tag, 28 | }, 29 | options: ContainerOptions { 30 | host_port: port, 31 | container_port: DEFAULT_MONGO_CONTAINER_PORT, 32 | }, 33 | container: None, 34 | } 35 | } 36 | } 37 | 38 | impl Connector for MongoDBDocker { 39 | fn init(&mut self) -> Result<(), Error> { 40 | let _ = binary_exists(DOCKER_BINARY_NAME)?; 41 | let _ = daemon_is_running()?; 42 | 43 | let password_env = format!("MONGO_INITDB_ROOT_USERNAME={}", DEFAULT_MONGO_USER); 44 | let user_env = format!("MONGO_INITDB_ROOT_PASSWORD={}", DEFAULT_MONGO_PASSWORD); 45 | let container = Container::new( 46 | &self.image, 47 | &self.options, 48 | vec!["-e", password_env.as_str(), "-e", user_env.as_str()], 49 | None, 50 | )?; 51 | 52 | self.container = Some(container); 53 | Ok(()) 54 | } 55 | } 56 | 57 | impl Destination for MongoDBDocker { 58 | fn write(&self, data: Bytes) -> Result<(), Error> { 59 | let cmd = format!( 60 | "mongorestore --authenticationDatabase admin -u {} -p {} --archive", 61 | DEFAULT_MONGO_USER, DEFAULT_MONGO_PASSWORD, 62 | ); 63 | 64 | match &self.container { 65 | Some(container) => { 66 | let mut container_exec = container.exec(&cmd)?; 67 | let _ = container_exec 68 | .stdin 69 | .take() 70 | .unwrap() 71 | .write_all(&data[..data.len() - 1]); // remove trailing null terminator, or else mongorestore will fail 72 | 73 | let exit_status = container_exec.wait()?; 74 | if !exit_status.success() { 75 | return Err(Error::new( 76 | ErrorKind::Other, 77 | format!("command error: {:?}", exit_status.to_string()), 78 | )); 79 | } 80 | 81 | Ok(()) 82 | } 83 | None => Err(Error::new( 84 | ErrorKind::Other, 85 | "command error: cannot retrieve container", 86 | )), 87 | } 88 | } 89 | } 90 | 91 | #[cfg(test)] 92 | mod tests { 93 | use dump_parser::utils::decode_hex; 94 | 95 | use crate::connector::Connector; 96 | use crate::destination::mongodb_docker::MongoDBDocker; 97 | use crate::destination::Destination; 98 | 99 | fn get_mongodb() -> MongoDBDocker { 100 | MongoDBDocker::new("5".to_string(), 27021) 101 | } 102 | 103 | fn get_invalid_mongodb() -> MongoDBDocker { 104 | MongoDBDocker::new("bad_tag".to_string(), 27021) 105 | } 106 | 107 | #[test] 108 | fn connect() { 109 | let mut p = get_mongodb(); 110 | let _ = p.init().expect("can't init mongodb"); 111 | let bytes = decode_hex("6de299816600000010636f6e63757272656e745f636f6c6c656374696f6e7300040000000276657273696f6e0004000000302e3100027365727665725f76657273696f6e0006000000352e302e360002746f6f6c5f76657273696f6e00080000003130302e352e320000020100000264620005000000746573740002636f6c6c656374696f6e0006000000757365727300026d6574616461746100ad0000007b22696e6465786573223a5b7b2276223a7b22246e756d626572496e74223a2232227d2c226b6579223a7b225f6964223a7b22246e756d626572496e74223a2231227d7d2c226e616d65223a225f69645f227d5d2c2275756964223a223464363734323637316333613463663938316439386164373831343735333234222c22636f6c6c656374696f6e4e616d65223a227573657273222c2274797065223a22636f6c6c656374696f6e227d001073697a6500000000000274797065000b000000636f6c6c656374696f6e0000ffffffff3b0000000264620005000000746573740002636f6c6c656374696f6e000600000075736572730008454f4600001243524300000000000000000000ffffffff3b0000000264620005000000746573740002636f6c6c656374696f6e000600000075736572730008454f4600011243524300000000000000000000ffffffff00").unwrap(); 112 | assert!(p.write(bytes.to_vec()).is_ok()); 113 | 114 | // cleanup container 115 | let _ = p.container.unwrap().rm(); 116 | 117 | let mut p = get_invalid_mongodb(); 118 | assert!(p.init().is_err()); 119 | assert!(p.write(bytes.to_vec()).is_err()); 120 | } 121 | } 122 | -------------------------------------------------------------------------------- /website/docs/guides/deploy-replibyte/container.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Container 3 | sidebar_position: 1 4 | --- 5 | 6 | # Deploy Replibyte as a container 7 | 8 | You are using Replibyte on your local machine to [create](/docs/guides/create-a-dump) and [restore dumps](/docs/guides/restore-a-dump), it's great, but now you might want to deploy it close to your production and development environments to automate the process. This step-by-step guide explains how to do it and share you best practices. 9 | 10 | :::note for qovery users 11 | 12 | To deploy with [Qovery](https://www.qovery.com), follow [this guide](/docs/guides/deploy-replibyte/qovery) 13 | 14 | ::: 15 | 16 | Here is a schema of what we are going to put in place. 17 | 18 | ![schema Replibyte](/img/replibyte_dump_and_restore.jpg) 19 | 20 | 1. In production: 21 | 1. Replibyte periodically dump the production database and.. 22 | 2. upload a dump **without the sensitive data** on a S3 bucket. 23 | 2. In development: 24 | 1. Replibyte periodically restore the development database with the latest dump. 25 | 26 | Let's go! 27 | 28 | ## Run Replibyte container locally 29 | 30 | ### Download the official Replibyte image 31 | 32 | ```sh 33 | docker pull ghcr.io/qovery/replibyte 34 | ``` 35 | 36 | Check the [Github package](https://github.com/qovery/replibyte/pkgs/container/replibyte) for available tags (currently `latest` and per git release tag). 37 | 38 | ::: 39 | 40 | ### Create Replibyte configuration file 41 | 42 | I will take our final `conf.yaml` file from the ["create a dump"](/docs/guides/create-a-dump) guide and rename it into `replibyte.yaml`. 43 | 44 | :::caution 45 | 46 | You must name your replibyte configuration file `replibyte.yaml`. Otherwise, it will not work. 47 | 48 | ::: 49 | 50 | ```yaml title="replibyte.yaml" 51 | encryption_key: $ENCRYPTION_SECRET # put a secure secret here 52 | source: 53 | connection_uri: $SOURCE_CONNECTION_URI 54 | transformers: 55 | - database: public 56 | table: customers 57 | columns: 58 | - name: first_name 59 | transformer_name: first-name 60 | - name: last_name 61 | transformer_name: random 62 | - name: contact_phone 63 | transformer_name: phone-number 64 | - name: contact_email 65 | transformer_name: email 66 | datastore: 67 | aws: 68 | bucket: $S3_BUCKET 69 | region: $S3_REGION 70 | access_key_id: $S3_ACCESS_KEY_ID 71 | secret_access_key: $S3_SECRET_ACCESS_KEY 72 | destination: 73 | connection_uri: $DESTINATION_CONNECTION_URI 74 | ``` 75 | 76 | And set your environment variables in a file. You can leave secure environment variables empty so that they read from the shell environment. 77 | 78 | ```sh 79 | $ cat env.txt 80 | S3_ACCESS_KEY_ID 81 | S3_SECRET_ACCESS_KEY 82 | S3_REGION=us-east-2 83 | S3_BUCKET=my-test-bucket 84 | SOURCE_CONNECTION_URI=postgres://... 85 | DESTINATION_CONNECTION_URI=postgres://... 86 | ENCRYPTION_SECRET 87 | ``` 88 | 89 | ### Start the container 90 | 91 | ```sh 92 | docker run -it --name replibyte \ 93 | --env-file env.txt \ 94 | -v "$(pwd)/replibyte.yaml":/replibyte.yaml:ro \ 95 | ghcr.io/qovery/replibyte \ 96 | ``` 97 | 98 | ## Running in a cloud environment 99 | 100 | ### Deploy with Qovery 101 | 102 | --- 103 | 104 | :::info 105 | 106 | [Qovery](https://www.qovery.com) (the company behind Replibyte) is a platform used by more than 20 000 developers to deploy their apps on AWS in just a few seconds. Replibyte will be natively supported by Qovery in Q4 2022. 107 | 108 | ::: 109 | 110 | To deploy Replibyte with Qovery - [here are the instructions](/docs/guides/deploy-replibyte/qovery). 111 | 112 | --- 113 | 114 | ### Self-hosted Deployment 115 | 116 | This part depends on the platform (E.g Kubernetes, Docker Swarm, Nomad...) you use to deploy your containers. Basically, you just need to pull the container and run it with the right parameters. 117 | 118 | ### Parameters for production 119 | 120 | Here is the command line to dump the production 121 | 122 | ```bash 123 | docker run -e S3_ACCESS_KEY_ID=XXX \ 124 | -e S3_SECRET_ACCESS_KEY=YYY \ 125 | -e S3_REGION=us-east-2 \ 126 | -e S3_BUCKET=my-test-bucket \ 127 | -e SOURCE_CONNECTION_URI=postgres://... \ 128 | -e DESTINATION_CONNECTION_URI=postgres://... \ 129 | -e ENCRYPTION_SECRET=itIsASecret \ 130 | ghcr.io/qovery/replibyte replibyte dump create 131 | ``` 132 | 133 | ### Parameters to seed development databases 134 | 135 | Here is the command line to seed your development database with the latest production dump 136 | 137 | ```bash 138 | docker run -e S3_ACCESS_KEY_ID=XXX \ 139 | -e S3_SECRET_ACCESS_KEY=YYY \ 140 | -e S3_REGION=us-east-2 \ 141 | -e S3_BUCKET=my-test-bucket \ 142 | -e SOURCE_CONNECTION_URI=postgres://... \ 143 | -e DESTINATION_CONNECTION_URI=postgres://... \ 144 | -e ENCRYPTION_SECRET=itIsASecret \ 145 | ghcr.io/qovery/replibyte replibyte dump restore remote -v latest 146 | ``` 147 | 148 | --- 149 | 150 | Do you have any questions? Feel free to join the channel #replibyte on [our Discord server](https://discord.qovery.com). 151 | -------------------------------------------------------------------------------- /replibyte/src/transformer/keep_first_char.rs: -------------------------------------------------------------------------------- 1 | use crate::transformer::Transformer; 2 | use crate::types::Column; 3 | 4 | pub struct KeepFirstCharTransformer { 5 | database_name: String, 6 | table_name: String, 7 | column_name: String, 8 | } 9 | 10 | impl KeepFirstCharTransformer { 11 | pub fn new(database_name: S, table_name: S, column_name: S) -> Self 12 | where 13 | S: Into, 14 | { 15 | KeepFirstCharTransformer { 16 | database_name: database_name.into(), 17 | table_name: table_name.into(), 18 | column_name: column_name.into(), 19 | } 20 | } 21 | } 22 | 23 | impl Default for KeepFirstCharTransformer { 24 | fn default() -> Self { 25 | KeepFirstCharTransformer { 26 | database_name: String::default(), 27 | table_name: String::default(), 28 | column_name: String::default(), 29 | } 30 | } 31 | } 32 | 33 | impl Transformer for KeepFirstCharTransformer { 34 | fn id(&self) -> &str { 35 | "keep-first-char" 36 | } 37 | 38 | fn description(&self) -> &str { 39 | "Keep only the first character of the column." 40 | } 41 | 42 | fn database_name(&self) -> &str { 43 | self.database_name.as_str() 44 | } 45 | 46 | fn table_name(&self) -> &str { 47 | self.table_name.as_str() 48 | } 49 | 50 | fn column_name(&self) -> &str { 51 | self.column_name.as_str() 52 | } 53 | 54 | fn database_and_table_and_column_name(&self) -> String { 55 | format!( 56 | "{}.{}.{}", 57 | self.database_name(), 58 | self.table_name(), 59 | self.column_name() 60 | ) 61 | } 62 | 63 | fn transform(&self, column: Column) -> Column { 64 | match column { 65 | Column::NumberValue(column_name, value) => { 66 | Column::NumberValue(column_name, get_first_digit(value)) 67 | } 68 | Column::StringValue(column_name, value) => { 69 | let new_value = match value.len() { 70 | len if len > 1 => { 71 | if let Some(first_char) = value.chars().nth(0) { 72 | first_char.to_string() 73 | } else { 74 | "".to_string() 75 | } 76 | } 77 | _ => value, 78 | }; 79 | 80 | Column::StringValue(column_name, new_value) 81 | } 82 | column => column, 83 | } 84 | } 85 | } 86 | 87 | fn get_first_digit(mut number: i128) -> i128 { 88 | while number >= 10 { 89 | number /= 10; 90 | } 91 | 92 | number 93 | } 94 | 95 | #[cfg(test)] 96 | mod tests { 97 | use crate::{transformer::Transformer, types::Column}; 98 | 99 | use super::KeepFirstCharTransformer; 100 | 101 | #[test] 102 | fn transform_keep_first_char_only_with_number_value() { 103 | let transformer = get_transformer(); 104 | let column = Column::NumberValue("a_column".to_string(), 123); 105 | let transformed_column = transformer.transform(column); 106 | let transformed_value = transformed_column.number_value().unwrap(); 107 | assert_eq!(transformed_value.to_owned(), 1); 108 | 109 | let transformer = get_transformer(); 110 | let column = Column::NumberValue("a_column".to_string(), 1); 111 | let transformed_column = transformer.transform(column); 112 | let transformed_value = transformed_column.number_value().unwrap(); 113 | assert_eq!(transformed_value.to_owned(), 1); 114 | } 115 | 116 | #[test] 117 | fn transform_doesnt_change_with_float_value() { 118 | let expected_value = 1.5; 119 | let transformer = get_transformer(); 120 | let column = Column::FloatNumberValue("a_column".to_string(), expected_value); 121 | let transformed_column = transformer.transform(column); 122 | let transformed_value = transformed_column.float_number_value().unwrap(); 123 | 124 | assert_eq!(transformed_value.to_owned(), expected_value); 125 | } 126 | 127 | #[test] 128 | fn transform_doesnt_change_with_empty_string_value() { 129 | let expected_value = ""; 130 | let transformer = get_transformer(); 131 | let column = Column::StringValue("a_column".to_string(), expected_value.to_string()); 132 | let transformed_column = transformer.transform(column); 133 | let transformed_value = transformed_column.string_value().unwrap(); 134 | assert_eq!(transformed_value, expected_value); 135 | } 136 | 137 | #[test] 138 | fn transform_keep_only_first_char_with_string_value() { 139 | let transformer = get_transformer(); 140 | let column = Column::StringValue("a_column".to_string(), "Lucas".to_string()); 141 | let transformed_column = transformer.transform(column); 142 | let transformed_value = transformed_column.string_value().unwrap(); 143 | assert_eq!(transformed_value, "L".to_string()); 144 | 145 | let column = Column::StringValue("a_column".to_string(), "L".to_string()); 146 | let transformed_column = transformer.transform(column); 147 | let transformed_value = transformed_column.string_value().unwrap(); 148 | assert_eq!(transformed_value, "L".to_string()); 149 | } 150 | 151 | fn get_transformer() -> KeepFirstCharTransformer { 152 | KeepFirstCharTransformer::new("github", "users", "a_column") 153 | } 154 | } 155 | -------------------------------------------------------------------------------- /.github/workflows/on-release.yml: -------------------------------------------------------------------------------- 1 | on: 2 | release: 3 | types: 4 | - published 5 | 6 | jobs: 7 | build-linux: 8 | runs-on: ubuntu-latest 9 | 10 | steps: 11 | - name: Checkout 12 | uses: actions/checkout@v1 13 | 14 | - name: Install latest rust toolchain 15 | uses: actions-rs/toolchain@v1 16 | with: 17 | toolchain: stable 18 | default: true 19 | override: true 20 | 21 | - name: Build for linux 22 | run: | 23 | docker run --rm \ 24 | --volume "${PWD}":/root/src \ 25 | --workdir /root/src \ 26 | joseluisq/rust-linux-darwin-builder:1.60.0 \ 27 | sh -c "cargo build --release" 28 | 29 | - name: Prepare release 30 | run: | 31 | cd target/x86_64-unknown-linux-musl/release 32 | EVENT_DATA=$(cat "$GITHUB_EVENT_PATH") 33 | RELEASE_NAME=$(echo "$EVENT_DATA" | jq -r .release.tag_name) 34 | FILE=replibyte_${RELEASE_NAME}_x86_64-unknown-linux-musl 35 | sudo tar -czvf ${FILE}.tar.gz replibyte && sudo rm replibyte 36 | sudo touch ${FILE}.tar.gz.sha256sum && sudo chmod 777 ${FILE}.tar.gz.sha256sum 37 | sudo sha256sum "${FILE}.tar.gz" | cut -d ' ' -f 1 > ${FILE}.tar.gz.sha256sum 38 | 39 | - name: Release 40 | uses: softprops/action-gh-release@v1 41 | with: 42 | files: | 43 | target/x86_64-unknown-linux-musl/release/replibyte_* 44 | env: 45 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 46 | 47 | build-windows: 48 | runs-on: ubuntu-latest 49 | 50 | steps: 51 | - name: Checkout 52 | uses: actions/checkout@v1 53 | 54 | - name: Install latest rust toolchain 55 | uses: actions-rs/toolchain@v1 56 | with: 57 | toolchain: stable 58 | default: true 59 | target: x86_64-pc-windows-gnu 60 | override: true 61 | 62 | - name: Build for windows 63 | run: | 64 | sudo apt-get update && sudo apt-get upgrade -y 65 | sudo apt-get install -y g++-mingw-w64-x86-64 66 | cargo build --all --release --target x86_64-pc-windows-gnu 67 | 68 | - name: Prepare release 69 | run: | 70 | cd target/x86_64-pc-windows-gnu/release 71 | EVENT_DATA=$(cat "$GITHUB_EVENT_PATH") 72 | RELEASE_NAME=$(echo "$EVENT_DATA" | jq -r .release.tag_name) 73 | FILE=replibyte_${RELEASE_NAME}_x86_64-pc-windows-gnu.exe 74 | sudo zip -9r ${FILE}.zip replibyte.exe && sudo rm replibyte.exe 75 | sudo touch ${FILE}.zip.sha256sum && sudo chmod 777 ${FILE}.zip.sha256sum 76 | sudo sha256sum "${FILE}.zip" | cut -d ' ' -f 1 > ${FILE}.zip.sha256sum 77 | 78 | - name: Release 79 | uses: softprops/action-gh-release@v1 80 | with: 81 | files: target/x86_64-pc-windows-gnu/release/replibyte_* 82 | env: 83 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 84 | 85 | build-mac: 86 | runs-on: ubuntu-latest 87 | outputs: 88 | sha256sum: ${{ steps.prep.outputs.sha256sum }} 89 | steps: 90 | - name: Checkout 91 | uses: actions/checkout@v1 92 | 93 | - name: Install latest rust toolchain 94 | uses: actions-rs/toolchain@v1 95 | with: 96 | toolchain: stable 97 | target: x86_64-apple-darwin 98 | default: true 99 | override: true 100 | 101 | - name: Build for mac 102 | run: | 103 | docker run --rm \ 104 | --volume "${PWD}":/root/src \ 105 | --workdir /root/src \ 106 | joseluisq/rust-linux-darwin-builder:1.60.0 \ 107 | sh -c "CC=o64-clang CXX=o64-clang++ cargo build --release --target x86_64-apple-darwin" 108 | 109 | - id: prep 110 | name: Prepare release 111 | run: | 112 | cd target/x86_64-apple-darwin/release 113 | EVENT_DATA=$(cat "$GITHUB_EVENT_PATH") 114 | RELEASE_NAME=$(echo "$EVENT_DATA" | jq -r .release.tag_name) 115 | FILE=replibyte_${RELEASE_NAME}_x86_64-apple-darwin 116 | sudo zip -9r ${FILE}.zip replibyte && sudo rm replibyte 117 | sudo touch ${FILE}.zip.sha256sum && sudo chmod 777 ${FILE}.zip.sha256sum 118 | CHECKSUM=$(sudo sha256sum "${FILE}.zip" | cut -d ' ' -f 1) 119 | echo "${CHECKSUM}" > ${FILE}.zip.sha256sum 120 | printf "::set-output name=%s::%s\n" sha256sum "${CHECKSUM}" 121 | 122 | - name: Release 123 | uses: softprops/action-gh-release@v1 124 | with: 125 | files: | 126 | target/x86_64-apple-darwin/release/replibyte_* 127 | env: 128 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 129 | 130 | publish-on-homebrew: 131 | runs-on: ubuntu-latest 132 | needs: build-mac 133 | steps: 134 | - name: Extract version and sha256sum 135 | id: extract 136 | run: | 137 | printf "::set-output name=%s::%s\n" tag-name "${GITHUB_REF#refs/tags/}" 138 | printf "::set-output name=%s::%s\n" sha256sum "${{ needs.build-mac.outputs.sha256sum }}" 139 | - uses: mislav/bump-homebrew-formula-action@v2 140 | if: "!contains(github.ref, '-')" # skip prereleases 141 | with: 142 | formula-name: replibyte 143 | homebrew-tap: Qovery/homebrew-replibyte 144 | download-url: https://github.com/Qovery/replibyte/releases/download/${{ steps.extract.outputs.tag-name }}/replibyte_${{ steps.extract.outputs.tag-name }}_x86_64-apple-darwin.zip 145 | download-sha256: ${{ steps.extract.outputs.sha256sum }} 146 | env: 147 | COMMITTER_TOKEN: ${{ secrets.PERSONAL_TOKEN }} 148 | -------------------------------------------------------------------------------- /replibyte/src/transformer/redacted.rs: -------------------------------------------------------------------------------- 1 | use serde::{Deserialize, Serialize}; 2 | 3 | use crate::transformer::Transformer; 4 | use crate::types::Column; 5 | 6 | /// This struct is dedicated to redact a string with a specific character (default to '*'). 7 | pub struct RedactedTransformer { 8 | database_name: String, 9 | table_name: String, 10 | column_name: String, 11 | options: RedactedTransformerOptions, 12 | } 13 | 14 | #[derive(Debug, Serialize, Deserialize, PartialEq, Clone, Copy)] 15 | pub struct RedactedTransformerOptions { 16 | pub character: char, 17 | pub width: u8, 18 | } 19 | 20 | impl Default for RedactedTransformerOptions { 21 | fn default() -> Self { 22 | RedactedTransformerOptions { 23 | character: '*', 24 | width: 10, 25 | } 26 | } 27 | } 28 | 29 | impl RedactedTransformer { 30 | pub fn new( 31 | database_name: S, 32 | table_name: S, 33 | column_name: S, 34 | options: RedactedTransformerOptions, 35 | ) -> Self 36 | where 37 | S: Into, 38 | { 39 | RedactedTransformer { 40 | database_name: database_name.into(), 41 | table_name: table_name.into(), 42 | column_name: column_name.into(), 43 | options, 44 | } 45 | } 46 | } 47 | 48 | impl Default for RedactedTransformer { 49 | fn default() -> Self { 50 | RedactedTransformer { 51 | database_name: String::default(), 52 | table_name: String::default(), 53 | column_name: String::default(), 54 | options: RedactedTransformerOptions::default(), 55 | } 56 | } 57 | } 58 | 59 | impl Transformer for RedactedTransformer { 60 | fn id(&self) -> &str { 61 | "redacted" 62 | } 63 | 64 | fn description(&self) -> &str { 65 | "Obfuscate your sensitive data (string only). [4242 4242 4242 4242]->[424****************]" 66 | } 67 | 68 | fn database_name(&self) -> &str { 69 | self.database_name.as_str() 70 | } 71 | 72 | fn table_name(&self) -> &str { 73 | self.table_name.as_str() 74 | } 75 | 76 | fn column_name(&self) -> &str { 77 | self.column_name.as_str() 78 | } 79 | 80 | fn transform(&self, column: Column) -> Column { 81 | match column { 82 | Column::StringValue(column_name, value) => { 83 | let new_value = match value.len() { 84 | len if len > 3 => { 85 | format!( 86 | "{}{}", 87 | value.chars().take(3).collect::(), 88 | self.options 89 | .character 90 | .to_string() 91 | .repeat(self.options.width.into()) 92 | ) 93 | } 94 | _ => value, 95 | }; 96 | Column::StringValue(column_name, new_value) 97 | } 98 | column => column, 99 | } 100 | } 101 | } 102 | 103 | #[cfg(test)] 104 | mod tests { 105 | use crate::{transformer::Transformer, types::Column}; 106 | 107 | use super::{RedactedTransformer, RedactedTransformerOptions}; 108 | 109 | #[test] 110 | fn redact() { 111 | let transformer = get_transformer(); 112 | let column = Column::StringValue( 113 | "credit_card_number".to_string(), 114 | "4242 4242 4242 4242".to_string(), 115 | ); 116 | let transformed_column = transformer.transform(column); 117 | let transformed_value = transformed_column.string_value().unwrap(); 118 | assert_eq!(transformed_value.to_owned(), "424**********") 119 | } 120 | 121 | #[test] 122 | fn redact_with_multi_byte_char() { 123 | let transformer = get_transformer(); 124 | let column = Column::StringValue( 125 | "multi_byte_column".to_string(), 126 | "🦀ë池cd".to_string(), 127 | ); 128 | let transformed_column = transformer.transform(column); 129 | let transformed_value = transformed_column.string_value().unwrap(); 130 | assert_eq!(transformed_value.to_owned(), "🦀ë池**********") 131 | } 132 | 133 | #[test] 134 | fn strings_lower_than_3_chars_remains_visible() { 135 | let transformer = get_transformer(); 136 | let column = Column::StringValue("credit_card_number".to_string(), "424".to_string()); 137 | let transformed_column = transformer.transform(column); 138 | let transformed_value = transformed_column.string_value().unwrap(); 139 | assert_eq!(transformed_value.to_owned(), "424") 140 | } 141 | 142 | #[test] 143 | fn redact_with_custom_char() { 144 | let transformer = RedactedTransformer::new( 145 | "github", 146 | "users", 147 | "credit_card_number", 148 | RedactedTransformerOptions { 149 | character: '#', 150 | width: 20, 151 | }, 152 | ); 153 | let column = Column::StringValue( 154 | "credit_card_number".to_string(), 155 | "4242 4242 4242 4242".to_string(), 156 | ); 157 | let transformed_column = transformer.transform(column); 158 | let transformed_value = transformed_column.string_value().unwrap(); 159 | assert_eq!(transformed_value.to_owned(), "424####################") 160 | } 161 | 162 | fn get_transformer() -> RedactedTransformer { 163 | RedactedTransformer::new( 164 | "github", 165 | "users", 166 | "credit_card_number", 167 | RedactedTransformerOptions::default(), 168 | ) 169 | } 170 | } 171 | -------------------------------------------------------------------------------- /website/docs/getting-started/configuration.md: -------------------------------------------------------------------------------- 1 | --- 2 | sidebar_position: 4 3 | --- 4 | 5 | # Configuration 6 | 7 | Create your `conf.yaml` configuration file to source your production database. 8 | 9 | ```yaml 10 | encryption_key: $MY_PRIVATE_ENC_KEY # optional - encrypt data on datastore 11 | source: 12 | connection_uri: postgres://user:password@host:port/db # you can use $DATABASE_URL 13 | datastore: 14 | aws: 15 | bucket: $BUCKET_NAME 16 | region: $S3_REGION 17 | credentials: 18 | access_key_id: $ACCESS_KEY_ID 19 | secret_access_key: $AWS_SECRET_ACCESS_KEY 20 | destination: 21 | connection_uri: postgres://user:password@host:port/db # you can use $DATABASE_URL 22 | ``` 23 | 24 | :::info 25 | 26 | Environment variables are substituted by their value at runtime. An error is thrown if the environment variable does not exist. 27 | 28 | ::: 29 | 30 | Run the app for the source: 31 | 32 | ```shell 33 | replibyte -c conf.yaml 34 | ``` 35 | 36 | ## Source and Destination 37 | 38 | Replibyte supports multiple databases. 39 | 40 | - [PostgreSQL](/docs/databases#postgresql) 41 | - [MySQL](/docs/databases#mysql) 42 | - [MongoDB](/docs/databases#mongodb) 43 | 44 | ## Transformer 45 | 46 | A transformer is useful to change/hide the value of a specified column. Replibyte provides pre-made transformers. You can 47 | also [build your own Transformer in web assembly](/docs/transformers#wasm). 48 | 49 | Here is a list of all the [transformers available](/docs/transformers). 50 | 51 | | id | description | doc | 52 | |-----------------|----------------------------------------------------------------------------------------------------|-------------------------------------------------| 53 | | transient | Does not modify the value | [link](/docs/transformers#transient) | 54 | | random | Randomize value but keep the same length (string only). [AAA]->[BBB] | [link](/docs/transformers#random) | 55 | | first-name | Replace the string value by a first name | [link](/docs/transformers#first-name) | 56 | | email | Replace the string value by an email address | [link](/docs/transformers#email) | 57 | | keep-first-char | Keep only the first char for strings and digit for numbers | [link](/docs/transformers#keep-first-character) | 58 | | phone-number | Replace the string value by a phone number | [link](/docs/transformers#phone-number) | 59 | | credit-card | Replace the string value by a credit card number | [link](/docs/transformers#credit-card) | 60 | | redacted | Obfuscate your sensitive data (>3 characters strings only). [4242 4242 4242 4242]->[424**********] | [link](/docs/transformers#redacted) | 61 | 62 | ## Datastore 63 | 64 | A Datastore is where Replibyte store the created dump to make them accessible from the destination databases. 65 | 66 | | Cloud Service Provider | S3 service name | S3 compatible | 67 | |------------------------|---------------------------------------------------------------------------|----------------| 68 | | Amazon Web Services | [S3](https://aws.amazon.com/s3/) | Yes (Original) | 69 | | Google Cloud Platform | [Cloud Storage](https://cloud.google.com/storage) | Yes | 70 | | Microsoft Azure | [Blob Storage](https://azure.microsoft.com/en-us/services/storage/blobs/) | Yes | 71 | | Digital Ocean | [Spaces](https://www.digitalocean.com/products/spaces) | Yes | 72 | | Scaleway | [Object Storage](https://www.scaleway.com/en/object-storage/) | Yes | 73 | | Minio | [Object Storage](https://min.io/) | Yes | 74 | 75 | :::info 76 | 77 | Any datastore compatible with the S3 protocol is a valid datastore. 78 | 79 | ::: 80 | 81 | ## Example 82 | 83 | Here is a configuration file including some transformations and different options like the database subset. 84 | 85 | ```yaml 86 | encryption_key: $MY_PRIVATE_ENC_KEY # optional - encrypt data on datastore 87 | source: 88 | connection_uri: postgres://user:password@host:port/db # you can use $DATABASE_URL 89 | database_subset: # optional - downscale database while keeping it consistent 90 | database: public 91 | table: orders 92 | strategy_name: random 93 | strategy_options: 94 | percent: 50 95 | passthrough_tables: 96 | - us_states 97 | transformers: # optional - hide sensitive data 98 | - database: public 99 | table: employees 100 | columns: 101 | - name: last_name 102 | transformer_name: random 103 | - name: birth_date 104 | transformer_name: random-date 105 | - name: first_name 106 | transformer_name: first-name 107 | - name: email 108 | transformer_name: email 109 | - name: username 110 | transformer_name: keep-first-char 111 | - database: public 112 | table: customers 113 | columns: 114 | - name: phone 115 | transformer_name: phone-number 116 | only_tables: # optional - dumps only specified tables. 117 | - database: public 118 | table: orders 119 | - database: public 120 | table: customers 121 | datastore: 122 | aws: 123 | bucket: $BUCKET_NAME 124 | region: $S3_REGION 125 | credentials: 126 | access_key_id: $ACCESS_KEY_ID 127 | secret_access_key: $AWS_SECRET_ACCESS_KEY 128 | destination: 129 | connection_uri: postgres://user:password@host:port/db # you can use $DATABASE_URL 130 | ``` 131 | --------------------------------------------------------------------------------