├── website
    ├── static
    │   ├── .nojekyll
    │   └── img
    │   │   ├── favicon.ico
    │   │   ├── datastore
    │   │       ├── aws
    │   │       │   ├── 01.png
    │   │       │   ├── 02.png
    │   │       │   └── 03.png
    │   │       └── gcp
    │   │       │   └── 01.jpg
    │   │   ├── tutorial
    │   │       ├── localeDropdown.png
    │   │       └── docsVersionDropdown.png
    │   │   └── replibyte_dump_and_restore.jpg
    ├── docs
    │   ├── design
    │   │   ├── _category_.json
    │   │   └── how-database-subset-works.md
    │   ├── guides
    │   │   ├── _category_.json
    │   │   ├── deploy-replibyte
    │   │   │   ├── _category_.json
    │   │   │   ├── qovery.md
    │   │   │   └── container.md
    │   │   ├── 4-delete-a-dump.md
    │   │   ├── 3-subset-a-dump.md
    │   │   └── 2-restore-a-dump.md
    │   ├── advanced-guides
    │   │   ├── _category_.json
    │   │   └── web-assembly-transformer.md
    │   ├── getting-started
    │   │   ├── _category_.json
    │   │   ├── concepts.md
    │   │   ├── installation.mdx
    │   │   └── configuration.md
    │   ├── contributing.md
    │   ├── faq.md
    │   ├── databases.mdx
    │   ├── how-replibyte-works.md
    │   ├── introduction.mdx
    │   └── datastores.mdx
    ├── babel.config.js
    ├── tsconfig.json
    ├── src
    │   ├── components
    │   │   └── HomepageFeatures
    │   │   │   ├── styles.module.css
    │   │   │   └── index.tsx
    │   ├── pages
    │   │   ├── index.module.css
    │   │   └── index.tsx
    │   └── css
    │   │   └── custom.css
    ├── .gitignore
    ├── sidebars.js
    ├── README.md
    ├── package.json
    └── docusaurus.config.js
├── docker
    └── exec.sh
├── .dockerignore
├── assets
    ├── video_.png
    └── RepliByte Logo.png
├── replibyte
    ├── src
    │   ├── commands
    │   │   ├── mod.rs
    │   │   ├── transformer.rs
    │   │   └── source.rs
    │   ├── connector.rs
    │   ├── destination
    │   │   ├── mod.rs
    │   │   ├── generic_stdout.rs
    │   │   ├── mysql.rs
    │   │   ├── mysql_docker.rs
    │   │   ├── mongodb.rs
    │   │   ├── postgres_docker.rs
    │   │   ├── postgres.rs
    │   │   ├── docker.rs
    │   │   └── mongodb_docker.rs
    │   ├── tasks
    │   │   ├── mod.rs
    │   │   ├── full_restore.rs
    │   │   └── full_dump.rs
    │   ├── runtime.rs
    │   ├── source
    │   │   ├── mysql_stdin.rs
    │   │   ├── mod.rs
    │   │   ├── mongodb_stdin.rs
    │   │   └── postgres_stdin.rs
    │   ├── transformer
    │   │   ├── transient.rs
    │   │   ├── random.rs
    │   │   ├── phone_number.rs
    │   │   ├── credit_card.rs
    │   │   ├── mod.rs
    │   │   ├── email.rs
    │   │   ├── first_name.rs
    │   │   ├── keep_first_char.rs
    │   │   └── redacted.rs
    │   ├── types.rs
    │   ├── migration
    │   │   ├── update_version_number.rs
    │   │   └── rename_backups_to_dumps.rs
    │   ├── utils.rs
    │   └── cli.rs
    └── Cargo.toml
├── Cargo.toml
├── db
    ├── postgres
    │   └── 01-init.sql
    └── mongodb
    │   └── init-mongo.js
├── examples
    ├── wasm
    │   ├── wasm-transformer-reverse-string.wasm
    │   ├── README.md
    │   └── replibyte.yaml
    ├── destination-postgres.yaml
    ├── source-postgres-with-no-transformers.yaml
    ├── source-postgres-with-env-vars.yaml
    ├── with-local-disk-datastore.yaml
    ├── source-postgres-with-gcp-datastore.yaml
    ├── source-postgres.yaml
    ├── source-mysql-bridge-minio.yaml
    ├── source-and-destination-postgres.yaml
    ├── source-and-dest-mongodb-bridge-minio.yaml
    ├── with-transformer-options.yaml
    ├── with-encryption.yaml
    ├── source-postgres-bridge-minio.yaml
    ├── replibyte.yaml
    ├── with-skip.yaml
    └── with-subset-and-transformer.yaml
├── subset
    ├── README.md
    ├── src
    │   ├── utils.rs
    │   ├── dedup.rs
    │   └── lib.rs
    └── Cargo.toml
├── dump-parser
    ├── src
    │   ├── errors.rs
    │   └── lib.rs
    ├── Cargo.toml
    └── README.md
├── .github
    └── workflows
    │   ├── on-tag.yml
    │   ├── website.yml
    │   ├── publish-image.yaml
    │   ├── build-and-test.yml
    │   └── on-release.yml
├── docker-compose-postgres.yml
├── docker-compose-mongodb.yml
├── docker-compose-mysql.yml
├── docker-compose-postgres-minio.yml
├── docker-compose-mongodb-minio.yml
├── docker-compose-mysql-minio.yml
├── Dockerfile
├── .gitignore
├── release.sh
├── docs
    └── DESIGN.md
├── docker-compose-dev.yml
└── README.md


/website/static/.nojekyll:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/docker/exec.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | eval "./replibyte $@"
3 | 


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
1 | target
2 | .git
3 | .gitignore
4 | .iml
5 | .md
6 | 


--------------------------------------------------------------------------------
/assets/video_.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Qovery/Replibyte/HEAD/assets/video_.png


--------------------------------------------------------------------------------
/replibyte/src/commands/mod.rs:
--------------------------------------------------------------------------------
1 | pub mod dump;
2 | pub mod source;
3 | pub mod transformer;
4 | 


--------------------------------------------------------------------------------
/website/docs/design/_category_.json:
--------------------------------------------------------------------------------
1 | {
2 |   "label": "Design",
3 |   "position": 10
4 | }
5 | 


--------------------------------------------------------------------------------
/website/docs/guides/_category_.json:
--------------------------------------------------------------------------------
1 | {
2 |   "label": "Guides",
3 |   "position": 8
4 | }
5 | 


--------------------------------------------------------------------------------
/assets/RepliByte Logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Qovery/Replibyte/HEAD/assets/RepliByte Logo.png


--------------------------------------------------------------------------------
/website/docs/advanced-guides/_category_.json:
--------------------------------------------------------------------------------
1 | {
2 |   "label": "Advanced Guides",
3 |   "position": 9
4 | }
5 | 


--------------------------------------------------------------------------------
/website/docs/getting-started/_category_.json:
--------------------------------------------------------------------------------
1 | {
2 |   "label": "Getting Started",
3 |   "position": 3
4 | }
5 | 


--------------------------------------------------------------------------------
/website/static/img/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Qovery/Replibyte/HEAD/website/static/img/favicon.ico


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
1 | [workspace]
2 | exclude = ["db/*", "assets/*"]
3 | members = ["dump-parser", "replibyte", "subset"]
4 | 


--------------------------------------------------------------------------------
/db/postgres/01-init.sql:
--------------------------------------------------------------------------------
1 | -- CREATE USER postgres SUPERUSER;
2 | -- CREATE DATABASE postgres WITH OWNER postgres;
3 | 


--------------------------------------------------------------------------------
/website/docs/guides/deploy-replibyte/_category_.json:
--------------------------------------------------------------------------------
1 | {
2 |   "label": "Deploy Replibyte",
3 |   "position": 6
4 | }
5 | 


--------------------------------------------------------------------------------
/website/babel.config.js:
--------------------------------------------------------------------------------
1 | module.exports = {
2 |   presets: [require.resolve('@docusaurus/core/lib/babel/preset')],
3 | };
4 | 


--------------------------------------------------------------------------------
/website/static/img/datastore/aws/01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Qovery/Replibyte/HEAD/website/static/img/datastore/aws/01.png


--------------------------------------------------------------------------------
/website/static/img/datastore/aws/02.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Qovery/Replibyte/HEAD/website/static/img/datastore/aws/02.png


--------------------------------------------------------------------------------
/website/static/img/datastore/aws/03.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Qovery/Replibyte/HEAD/website/static/img/datastore/aws/03.png


--------------------------------------------------------------------------------
/website/static/img/datastore/gcp/01.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Qovery/Replibyte/HEAD/website/static/img/datastore/gcp/01.jpg


--------------------------------------------------------------------------------
/replibyte/src/connector.rs:
--------------------------------------------------------------------------------
1 | use std::io::Error;
2 | 
3 | pub trait Connector {
4 |     fn init(&mut self) -> Result<(), Error>;
5 | }
6 | 


--------------------------------------------------------------------------------
/website/static/img/tutorial/localeDropdown.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Qovery/Replibyte/HEAD/website/static/img/tutorial/localeDropdown.png


--------------------------------------------------------------------------------
/examples/wasm/wasm-transformer-reverse-string.wasm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Qovery/Replibyte/HEAD/examples/wasm/wasm-transformer-reverse-string.wasm


--------------------------------------------------------------------------------
/website/static/img/replibyte_dump_and_restore.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Qovery/Replibyte/HEAD/website/static/img/replibyte_dump_and_restore.jpg


--------------------------------------------------------------------------------
/website/static/img/tutorial/docsVersionDropdown.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Qovery/Replibyte/HEAD/website/static/img/tutorial/docsVersionDropdown.png


--------------------------------------------------------------------------------
/subset/README.md:
--------------------------------------------------------------------------------
1 | # Subset
2 | 
3 | Subset is a Rust crate to scale down a database to a more reasonable size. So it can be used in staging, test and development environments.
4 | 


--------------------------------------------------------------------------------
/examples/wasm/README.md:
--------------------------------------------------------------------------------
1 | # RepliByte with a custom WebAssembly transformer
2 | 
3 | Check out the [official guide here](https://www.replibyte.com/docs/advanced-guides/web-assembly-transformer)
4 | 


--------------------------------------------------------------------------------
/subset/src/utils.rs:
--------------------------------------------------------------------------------
1 | use std::time::{SystemTime, UNIX_EPOCH};
2 | 
3 | pub fn epoch_millis() -> u128 {
4 |     SystemTime::now()
5 |         .duration_since(UNIX_EPOCH)
6 |         .unwrap()
7 |         .as_millis()
8 | }
9 | 


--------------------------------------------------------------------------------
/website/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 |   // This file is not used in compilation. It is here just for a nice editor experience.
3 |   "extends": "@tsconfig/docusaurus/tsconfig.json",
4 |   "compilerOptions": {
5 |     "baseUrl": "."
6 |   }
7 | }
8 | 


--------------------------------------------------------------------------------
/website/src/components/HomepageFeatures/styles.module.css:
--------------------------------------------------------------------------------
 1 | .features {
 2 |   display: flex;
 3 |   align-items: center;
 4 |   padding: 2rem 0;
 5 |   width: 100%;
 6 | }
 7 | 
 8 | .featureSvg {
 9 |   height: 200px;
10 |   width: 200px;
11 | }
12 | 


--------------------------------------------------------------------------------
/subset/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "subset"
 3 | version = "0.10.0"
 4 | edition = "2021"
 5 | 
 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 7 | 
 8 | [dependencies]
 9 | dump-parser = { path = "../dump-parser" }
10 | tempfile = "3.3"
11 | md5 = "0.7"
12 | 


--------------------------------------------------------------------------------
/examples/destination-postgres.yaml:
--------------------------------------------------------------------------------
 1 | destination:
 2 |   connection_uri: postgres://root:password@localhost:5453/root
 3 | datastore:
 4 |   aws:
 5 |     bucket: replibyte-test
 6 |     region: us-east-2
 7 |     credentials:
 8 |       access_key_id: $AWS_ACCESS_KEY_ID
 9 |       secret_access_key: $AWS_SECRET_ACCESS_KEY
10 | 


--------------------------------------------------------------------------------
/examples/source-postgres-with-no-transformers.yaml:
--------------------------------------------------------------------------------
 1 | source:
 2 |   connection_uri: postgres://root:password@localhost:5432/root
 3 | datastore:
 4 |   aws:
 5 |     bucket: replibyte-test
 6 |     region: us-east-2
 7 |     credentials:
 8 |       access_key_id: $AWS_ACCESS_KEY_ID
 9 |       secret_access_key: $AWS_SECRET_ACCESS_KEY
10 | 


--------------------------------------------------------------------------------
/website/.gitignore:
--------------------------------------------------------------------------------
 1 | # Dependencies
 2 | /node_modules
 3 | 
 4 | # Production
 5 | /build
 6 | 
 7 | # Generated files
 8 | .docusaurus
 9 | .cache-loader
10 | 
11 | # Misc
12 | .DS_Store
13 | .env.local
14 | .env.development.local
15 | .env.test.local
16 | .env.production.local
17 | 
18 | npm-debug.log*
19 | yarn-debug.log*
20 | yarn-error.log*
21 | 


--------------------------------------------------------------------------------
/website/docs/guides/deploy-replibyte/qovery.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: Qovery
3 | sidebar_position: 2
4 | ---
5 | 
6 | # Deploy Replibyte with Qovery
7 | 
8 | To use Replibyte with Qovery, refer to the [Qovery documentation - Lifecycle Job](https://hub.qovery.com/docs/using-qovery/configuration/lifecycle-job/) and this [example](https://github.com/Qovery/lifecycle-job-examples/tree/main/examples/seed-database-with-replibyte).
9 | 


--------------------------------------------------------------------------------
/replibyte/src/destination/mod.rs:
--------------------------------------------------------------------------------
 1 | use std::io::Error;
 2 | 
 3 | use crate::connector::Connector;
 4 | use crate::types::Bytes;
 5 | 
 6 | mod docker;
 7 | pub mod generic_stdout;
 8 | pub mod mongodb;
 9 | pub mod mongodb_docker;
10 | pub mod mysql;
11 | pub mod mysql_docker;
12 | pub mod postgres;
13 | pub mod postgres_docker;
14 | 
15 | pub trait Destination: Connector {
16 |     fn write(&self, data: Bytes) -> Result<(), Error>;
17 | }
18 | 


--------------------------------------------------------------------------------
/replibyte/src/commands/transformer.rs:
--------------------------------------------------------------------------------
 1 | use crate::transformer::transformers;
 2 | use crate::utils::table;
 3 | 
 4 | /// display all transformers available
 5 | pub fn list() {
 6 |     let mut table = table();
 7 |     table.set_titles(row!["name", "description"]);
 8 | 
 9 |     for transformer in transformers() {
10 |         table.add_row(row![transformer.id(), transformer.description()]);
11 |     }
12 | 
13 |     let _ = table.printstd();
14 | }
15 | 


--------------------------------------------------------------------------------
/examples/source-postgres-with-env-vars.yaml:
--------------------------------------------------------------------------------
 1 | source:
 2 |   connection_uri: $DATABASE_URL
 3 |   transformers:
 4 |     - database: public
 5 |       table: employees
 6 |       columns:
 7 |         - name: last_name
 8 |           transformer_name: random
 9 | datastore:
10 |   aws:
11 |     bucket: $BUCKET_NAME
12 |     region: us-east-2
13 |     credentials:
14 |       access_key_id: $AWS_ACCESS_KEY_ID
15 |       secret_access_key: $AWS_SECRET_ACCESS_KEY
16 | 


--------------------------------------------------------------------------------
/dump-parser/src/errors.rs:
--------------------------------------------------------------------------------
 1 | use std::io::ErrorKind;
 2 | 
 3 | #[derive(Debug)]
 4 | pub enum Error {
 5 |     DumpFile(DumpFileError),
 6 | }
 7 | 
 8 | #[derive(Debug)]
 9 | pub enum DumpFileError {
10 |     DoesNotExist,
11 |     ReadError(std::io::Error),
12 |     MalFormatted,
13 | }
14 | 
15 | impl From<DumpFileError> for std::io::Error {
16 |     fn from(err: DumpFileError) -> Self {
17 |         std::io::Error::new(ErrorKind::Other, format!("{:?}", err))
18 |     }
19 | }
20 | 


--------------------------------------------------------------------------------
/replibyte/src/tasks/mod.rs:
--------------------------------------------------------------------------------
 1 | use std::io::Error;
 2 | 
 3 | pub mod full_dump;
 4 | pub mod full_restore;
 5 | 
 6 | pub type TransferredBytes = usize;
 7 | pub type MaxBytes = usize;
 8 | 
 9 | pub trait Task {
10 |     fn run<F: FnMut(TransferredBytes, MaxBytes)>(self, progress_callback: F) -> Result<(), Error>;
11 | }
12 | 
13 | /// inter-thread message for Source/Destination and Datastore
14 | #[derive(Debug, Clone)]
15 | enum Message<T> {
16 |     Data(T),
17 |     EOF,
18 | }
19 | 


--------------------------------------------------------------------------------
/examples/with-local-disk-datastore.yaml:
--------------------------------------------------------------------------------
 1 | source:
 2 |   connection_uri: postgres://root:password@localhost:5432/root
 3 |   transformers:
 4 |     - database: public
 5 |       table: employees
 6 |       columns:
 7 |         - name: first_name
 8 |           transformer_name: first-name
 9 |         - name: last_name
10 |           transformer_name: random
11 | datastore:
12 |   local_disk:
13 |     dir: ./my-datastore
14 | destination:
15 |   connection_uri: postgres://root:password@localhost:5453/root
16 | 


--------------------------------------------------------------------------------
/website/src/pages/index.module.css:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * CSS files with the .module.css suffix will be treated as CSS modules
 3 |  * and scoped locally.
 4 |  */
 5 | 
 6 | .heroBanner {
 7 |   padding: 4rem 0;
 8 |   text-align: center;
 9 |   position: relative;
10 |   overflow: hidden;
11 | }
12 | 
13 | @media screen and (max-width: 996px) {
14 |   .heroBanner {
15 |     padding: 2rem;
16 |   }
17 | }
18 | 
19 | .buttons {
20 |   display: flex;
21 |   align-items: center;
22 |   justify-content: center;
23 | }
24 | 


--------------------------------------------------------------------------------
/examples/source-postgres-with-gcp-datastore.yaml:
--------------------------------------------------------------------------------
 1 | source:
 2 |   connection_uri: postgres://root:password@localhost:5432/root
 3 |   transformers:
 4 |     - database: public
 5 |       table: employees
 6 |       columns:
 7 |         - name: fist_name
 8 |           transformer_name: first-name
 9 |         - name: last_name
10 |           transformer_name: random
11 | datastore:
12 |   gcp:
13 |     bucket: replibyte-test
14 |     region: us-west1
15 |     access_key: $GS_ACCESS_KEY_ID
16 |     secret: $GS_SECRET_ACCESS_KEY
17 | 


--------------------------------------------------------------------------------
/examples/source-postgres.yaml:
--------------------------------------------------------------------------------
 1 | source:
 2 |   connection_uri: postgres://root:password@localhost:5432/root
 3 |   transformers:
 4 |     - database: public
 5 |       table: employees
 6 |       columns:
 7 |         - name: fist_name
 8 |           transformer_name: first-name
 9 |         - name: last_name
10 |           transformer_name: random
11 | datastore:
12 |   aws:
13 |     bucket: replibyte-test
14 |     region: us-east-2
15 |     credentials:
16 |       access_key_id: $AWS_ACCESS_KEY_ID
17 |       secret_access_key: $AWS_SECRET_ACCESS_KEY
18 | 


--------------------------------------------------------------------------------
/replibyte/src/runtime.rs:
--------------------------------------------------------------------------------
 1 | use lazy_static::lazy_static;
 2 | use std::future::Future;
 3 | use std::sync::Mutex;
 4 | use tokio::runtime::{Builder, Runtime};
 5 | 
 6 | lazy_static! {
 7 |     static ref TOKIO_RUNTIME: Mutex<Runtime> = Mutex::new({
 8 |         Builder::new_current_thread()
 9 |             .thread_name("tokio-blocking")
10 |             .enable_all()
11 |             .build()
12 |             .unwrap()
13 |     });
14 | }
15 | 
16 | pub fn block_on<F: Future>(future: F) -> F::Output {
17 |     TOKIO_RUNTIME.lock().unwrap().block_on(future)
18 | }
19 | 


--------------------------------------------------------------------------------
/.github/workflows/on-tag.yml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   push:
 3 |     tags:
 4 |       - "v*"
 5 | 
 6 | jobs:
 7 |   release-tag:
 8 |     runs-on: ubuntu-latest
 9 |     steps:
10 |       - name: Checkout code
11 |         uses: actions/checkout@master
12 |       - name: Create Release
13 |         id: create_release
14 |         uses: actions/create-release@v1
15 |         env:
16 |           GITHUB_TOKEN: ${{ secrets.PERSONAL_TOKEN }}
17 |         with:
18 |           tag_name: ${{ github.ref }}
19 |           release_name: Release ${{ github.ref }}
20 |           draft: false
21 |           prerelease: false
22 | 


--------------------------------------------------------------------------------
/website/docs/design/how-database-subset-works.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | sidebar_position: 1
 3 | ---
 4 | 
 5 | # How database subset works
 6 | 
 7 | This design doc explains how the database subset has been implemented on Replibyte. 
 8 | 
 9 | :::note
10 | 
11 | Feel free to dig into the [Replibyte Subset](https://github.com/Qovery/Replibyte/tree/main/subset) source-code if you are even more curious.
12 | 
13 | :::
14 | 
15 | ### Relations and virtual relations
16 | 
17 | TODO
18 | 
19 | ### Cyclic references
20 | 
21 | TODO
22 | 
23 | ### Subset Strategy
24 | 
25 | TODO
26 | 
27 | ### Performances
28 | 
29 | TODO
30 | 


--------------------------------------------------------------------------------
/examples/source-mysql-bridge-minio.yaml:
--------------------------------------------------------------------------------
 1 | source:
 2 |   connection_uri: mysql://root:password@127.0.0.1:3306/world
 3 |   transformers:
 4 |     - database: world
 5 |       table: city
 6 |       columns:
 7 |         - name: Name
 8 |           transformer_name: random
 9 | datastore:
10 |   aws:
11 |     bucket: replibyte-test
12 |     region: us-east-2
13 |     credentials:
14 |       access_key_id: minioadmin
15 |       secret_access_key: minioadmin
16 |     endpoint:
17 |       custom: 'http://localhost:9000'
18 | destination:
19 |   # it's different to the source
20 |   connection_uri: mysql://root:password@127.0.0.1:3307/world
21 | 


--------------------------------------------------------------------------------
/.github/workflows/website.yml:
--------------------------------------------------------------------------------
 1 | name: github pages
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 | 
 8 | jobs:
 9 |   deploy:
10 |     runs-on: ubuntu-18.04
11 |     steps:
12 |       - uses: actions/checkout@v2
13 | 
14 |       - name: Setup Node
15 |         uses: actions/setup-node@v1
16 |         with:
17 |           node-version: '17.x'
18 | 
19 |       - run: cd website && npm install && npm run-script build
20 | 
21 |       - name: Deploy
22 |         uses: peaceiris/actions-gh-pages@v3
23 |         with:
24 |           github_token: ${{ secrets.GITHUB_TOKEN }}
25 |           publish_dir: ./website/build
26 |           cname: www.replibyte.com
27 | 


--------------------------------------------------------------------------------
/examples/source-and-destination-postgres.yaml:
--------------------------------------------------------------------------------
 1 | source:
 2 |   connection_uri: postgres://root:password@localhost:5432/root
 3 |   transformers:
 4 |     - database: public
 5 |       table: employees
 6 |       columns:
 7 |         - name: fist_name
 8 |           transformer_name: first-name
 9 |         - name: last_name
10 |           transformer_name: random
11 | destination:
12 |   # it's different to the source
13 |   connection_uri: postgres://root:password@localhost:5453/root
14 | datastore:
15 |   aws:
16 |     bucket: replibyte-test
17 |     region: us-east-2
18 |     credentials:
19 |       access_key_id: $AWS_ACCESS_KEY_ID
20 |       secret_access_key: $AWS_SECRET_ACCESS_KEY
21 | 


--------------------------------------------------------------------------------
/examples/source-and-dest-mongodb-bridge-minio.yaml:
--------------------------------------------------------------------------------
 1 | source:
 2 |   connection_uri: mongodb://root:password@localhost:27017/?authSource=admin
 3 |   transformers:
 4 |     - database: test
 5 |       table: users
 6 |       columns:
 7 |         - name: name
 8 |           transformer_name: first-name
 9 |         - name: age
10 |           transformer_name: random
11 | destination:
12 |   connection_uri: mongodb://root:password@localhost:27018/?authSource=admin
13 | datastore:
14 |   aws:
15 |     bucket: replibyte-test
16 |     region: us-east-2
17 |     credentials:
18 |       access_key_id: minioadmin
19 |       secret_access_key: minioadmin
20 |     endpoint:
21 |       custom: 'http://localhost:9000'
22 | 


--------------------------------------------------------------------------------
/examples/wasm/replibyte.yaml:
--------------------------------------------------------------------------------
 1 | source:
 2 |   connection_uri: mongodb://root:password@localhost:27017/?authSource=admin
 3 |   transformers:
 4 |     - database: test
 5 |       table: users
 6 |       columns:
 7 |         - name: name
 8 |           transformer_name: custom-wasm
 9 |           transformer_options:
10 |             path: "examples/wasm/wasm-transformer-reverse-string.wasm"
11 | datastore:
12 |   aws:
13 |     bucket: replibyte-test
14 |     region: us-east-2
15 |     access_key_id: minioadmin
16 |     secret_access_key: minioadmin
17 |     endpoint:
18 |       custom: 'http://localhost:9000'
19 | destination:
20 |   connection_uri: mongodb://root:password@localhost:27018/?authSource=admin
21 | 


--------------------------------------------------------------------------------
/examples/with-transformer-options.yaml:
--------------------------------------------------------------------------------
 1 | source:
 2 |   connection_uri: postgres://root:password@localhost:5432/root
 3 |   transformers:
 4 |     - database: public
 5 |       table: employees
 6 |       columns:
 7 |         - name: last_name
 8 |           transformer_name: redacted
 9 |           transformer_options:
10 |             character: '#'
11 |             width: 12
12 | datastore:
13 |   aws:
14 |     bucket: replibyte-test
15 |     region: us-east-2
16 |     credentials:
17 |       access_key_id: minioadmin
18 |       secret_access_key: minioadmin
19 |     endpoint:
20 |       custom: 'http://localhost:9000'
21 | destination:
22 |   connection_uri: postgres://root:password@localhost:5453/root
23 | 


--------------------------------------------------------------------------------
/examples/with-encryption.yaml:
--------------------------------------------------------------------------------
 1 | encryption_key: this is a secret key
 2 | source:
 3 |   connection_uri: postgres://root:password@localhost:5432/root
 4 |   transformers:
 5 |     - database: public
 6 |       table: employees
 7 |       columns:
 8 |         - name: fist_name
 9 |           transformer_name: first-name
10 |         - name: last_name
11 |           transformer_name: random
12 | destination:
13 |   # it's different to the source
14 |   connection_uri: postgres://root:password@localhost:5453/root
15 | datastore:
16 |   aws:
17 |     bucket: replibyte-test
18 |     region: us-east-2
19 |     credentials:
20 |       access_key_id: $AWS_ACCESS_KEY_ID
21 |       secret_access_key: $AWS_SECRET_ACCESS_KEY
22 | 


--------------------------------------------------------------------------------
/examples/source-postgres-bridge-minio.yaml:
--------------------------------------------------------------------------------
 1 | encryption_key: 'this is a test'
 2 | source:
 3 |   connection_uri: postgres://root:password@localhost:5432/root
 4 |   transformers:
 5 |     - database: public
 6 |       table: employees
 7 |       columns:
 8 |         - name: first_name
 9 |           transformer_name: first-name
10 |         - name: last_name
11 |           transformer_name: random
12 | datastore:
13 |   aws:
14 |     bucket: replibyte-test
15 |     region: us-east-2
16 |     credentials:
17 |       access_key_id: minioadmin
18 |       secret_access_key: minioadmin
19 |     endpoint:
20 |       custom: 'http://localhost:9000'
21 | destination:
22 |   connection_uri: postgres://root:password@localhost:5453/root
23 | 


--------------------------------------------------------------------------------
/db/mongodb/init-mongo.js:
--------------------------------------------------------------------------------
 1 | db.createUser({
 2 |     user: 'root',
 3 |     pwd: 'password',
 4 |     roles: [
 5 |         {
 6 |             role: 'readWrite',
 7 |             db: 'test',
 8 |         },
 9 |     ],
10 | });
11 | 
12 | db = new Mongo().getDB("test");
13 | 
14 | db.createCollection('users', { capped: false });
15 | db.createCollection('states', { capped: false });
16 | db.createCollection('cars', { capped: false });
17 | 
18 | for (let i = 0; i < 10; i++) {
19 |     db.users.insertOne({
20 |         name: 'user' + i,
21 |         age: i,
22 |     });
23 |     db.states.insertOne({
24 |         name: 'state' + i,
25 |         number: i,
26 |     });
27 |     db.cars.insertOne({
28 |         model: 'car' + i,
29 |         year: 2010 + i,
30 |     });
31 | }


--------------------------------------------------------------------------------
/examples/replibyte.yaml:
--------------------------------------------------------------------------------
 1 | encryption_key: $ENCRYPTION_SECRET
 2 | source:
 3 |   connection_uri: $SOURCE_CONNECTION_URI
 4 |   transformers:
 5 |     - database: public # TO CHANGE
 6 |       table: employees # TO CHANGE
 7 |       columns:
 8 |         - name: fist_name # TO CHANGE
 9 |           transformer_name: first-name # TO CHANGE
10 |         - name: last_name # TO CHANGE
11 |           transformer_name: random # TO CHANGE
12 | destination:
13 |   connection_uri: $DESTINATION_CONNECTION_URI
14 |   # Wipe the public schema
15 |   # wipe_database: false (default: true)
16 | datastore:
17 |   aws:
18 |     bucket: $S3_BUCKET
19 |     region: $S3_REGION
20 |     credentials:
21 |       access_key_id: $S3_ACCESS_KEY_ID
22 |       secret_access_key: $S3_SECRET_ACCESS_KEY
23 | 


--------------------------------------------------------------------------------
/examples/with-skip.yaml:
--------------------------------------------------------------------------------
 1 | source:
 2 |   connection_uri: postgres://root:password@localhost:5432/root
 3 |   skip:
 4 |     - database: public
 5 |       table: us_states
 6 |     - database: public
 7 |       table: order_details
 8 |   transformers:
 9 |     - database: public
10 |       table: employees
11 |       columns:
12 |         - name: fist_name
13 |           transformer_name: first-name
14 |         - name: last_name
15 |           transformer_name: random
16 | datastore:
17 |   aws:
18 |     bucket: replibyte-test
19 |     region: us-east-2
20 |     credentials:
21 |       access_key_id: minioadmin
22 |       secret_access_key: minioadmin
23 |     endpoint:
24 |       custom: 'http://localhost:9000'
25 | destination:
26 |   connection_uri: postgres://root:password@localhost:5453/root
27 | 


--------------------------------------------------------------------------------
/dump-parser/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "dump-parser"
 3 | version = "0.10.0"
 4 | edition = "2021"
 5 | 
 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 7 | 
 8 | [dependencies]
 9 | bson = "2.2"
10 | serde = "1.0"
11 | 
12 | ########## WARNING #############
13 | # DO NOT UPGRADE THE CRC CRATE #
14 | # version 2 (or higher) is not compatible with the current crc64 algorithm that 'mongorestore' uses in its archive parser.
15 | # mongorestore ECMA: https://go.dev/src/hash/crc64/crc64.go#L28
16 | # crc-rs  1.8  ECMA: https://github.com/mrhooray/crc-rs/blob/1.8.1/build.rs#L41 (COMPATIBLE)
17 | # crc-rs ^2.0  ECMA: https://github.com/akhilles/crc-catalog/blob/2.0.1/src/catalog.rs#L104 (INCOMPATIBLE)
18 | crc = "1.8"
19 | ################################
20 | 


--------------------------------------------------------------------------------
/replibyte/src/destination/generic_stdout.rs:
--------------------------------------------------------------------------------
 1 | use std::io::{stdout, Error, Write};
 2 | 
 3 | use crate::connector::Connector;
 4 | use crate::destination::Destination;
 5 | use crate::types::Bytes;
 6 | 
 7 | /// Stream dump output on stdout
 8 | pub struct GenericStdout {}
 9 | 
10 | impl GenericStdout {
11 |     pub fn new() -> Self {
12 |         GenericStdout {}
13 |     }
14 | }
15 | 
16 | impl Default for GenericStdout {
17 |     fn default() -> Self {
18 |         GenericStdout {}
19 |     }
20 | }
21 | 
22 | impl Connector for GenericStdout {
23 |     fn init(&mut self) -> Result<(), Error> {
24 |         Ok(())
25 |     }
26 | }
27 | 
28 | impl<'a> Destination for GenericStdout {
29 |     fn write(&self, data: Bytes) -> Result<(), Error> {
30 |         let mut stdout = stdout();
31 |         let _ = stdout.write_all(data.as_slice());
32 |         Ok(())
33 |     }
34 | }
35 | 


--------------------------------------------------------------------------------
/website/sidebars.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Creating a sidebar enables you to:
 3 |  - create an ordered group of docs
 4 |  - render a sidebar for each doc of that group
 5 |  - provide next/previous navigation
 6 | 
 7 |  The sidebars can be generated from the filesystem, or explicitly defined here.
 8 | 
 9 |  Create as many sidebars as you want.
10 |  */
11 | 
12 | // @ts-check
13 | 
14 | /** @type {import('@docusaurus/plugin-content-docs').SidebarsConfig} */
15 | const sidebars = {
16 |   // By default, Docusaurus generates a sidebar from the docs folder structure
17 |   tutorialSidebar: [{type: 'autogenerated', dirName: '.'}],
18 | 
19 |   // But you can create a sidebar manually
20 |   /*
21 |   tutorialSidebar: [
22 |     {
23 |       type: 'category',
24 |       label: 'Tutorial',
25 |       items: ['hello'],
26 |     },
27 |   ],
28 |    */
29 | };
30 | 
31 | module.exports = sidebars;
32 | 


--------------------------------------------------------------------------------
/docker-compose-postgres.yml:
--------------------------------------------------------------------------------
 1 | version: '3'
 2 | 
 3 | services:
 4 |   source-postgres:
 5 |     image: postgres:13
 6 |     restart: always
 7 |     healthcheck:
 8 |       test: [ "CMD", "pg_isready", "-q", "-d", "postgres", "-U", "root" ]
 9 |       timeout: 45s
10 |       interval: 10s
11 |       retries: 10
12 |     environment:
13 |       - POSTGRES_USER=root
14 |       - POSTGRES_PASSWORD=password
15 |     volumes:
16 |       - ./db/postgres:/docker-entrypoint-initdb.d/
17 |     ports:
18 |       - 5432:5432
19 |   dest-postgres:
20 |     image: postgres:13
21 |     restart: always
22 |     healthcheck:
23 |       test: [ "CMD", "pg_isready", "-q", "-d", "postgres", "-U", "root" ]
24 |       timeout: 45s
25 |       interval: 10s
26 |       retries: 10
27 |     environment:
28 |       - POSTGRES_USER=root
29 |       - POSTGRES_PASSWORD=password
30 |     ports:
31 |       - 5453:5432
32 | 


--------------------------------------------------------------------------------
/examples/with-subset-and-transformer.yaml:
--------------------------------------------------------------------------------
 1 | source:
 2 |   connection_uri: postgres://root:password@localhost:5432/root
 3 |   database_subset:
 4 |     database: public
 5 |     table: orders
 6 |     strategy_name: random
 7 |     strategy_options:
 8 |       percent: 50
 9 |     passthrough_tables:
10 |       - us_states
11 |   transformers:
12 |     - database: public
13 |       table: employees
14 |       columns:
15 |         - name: last_name
16 |           transformer_name: redacted
17 |           transformer_options:
18 |             character: '#'
19 |             width: 12
20 | datastore:
21 |   aws:
22 |     bucket: replibyte-test
23 |     region: us-east-2
24 |     credentials:
25 |       access_key_id: minioadmin
26 |       secret_access_key: minioadmin
27 |     endpoint:
28 |       custom: 'http://localhost:9000'
29 | destination:
30 |   connection_uri: postgres://root:password@localhost:5453/root
31 | 


--------------------------------------------------------------------------------
/docker-compose-mongodb.yml:
--------------------------------------------------------------------------------
 1 | version: '3'
 2 | 
 3 | services:
 4 |   source-mongodb:
 5 |     image: mongo:5
 6 |     restart: always
 7 |     healthcheck:
 8 |       test: echo 'db.runCommand("ping").ok' | mongo localhost:27017/test --quiet
 9 |       timeout: 45s
10 |       interval: 10s
11 |       retries: 10
12 |     environment:
13 |       - MONGO_INITDB_ROOT_USERNAME=root
14 |       - MONGO_INITDB_ROOT_PASSWORD=password
15 |     volumes:
16 |       - ./db/mongo:/docker-entrypoint-initdb.d/
17 |     ports:
18 |       - 27017:27017
19 |   dest-mongodb:
20 |     image: mongo:5
21 |     restart: always
22 |     healthcheck:
23 |       test: echo 'db.runCommand("ping").ok' | mongo localhost:27017/test --quiet
24 |       timeout: 45s
25 |       interval: 10s
26 |       retries: 10
27 |     environment:
28 |       - MONGO_INITDB_ROOT_USERNAME=root
29 |       - MONGO_INITDB_ROOT_PASSWORD=password
30 |     ports:
31 |       - 27018:27017


--------------------------------------------------------------------------------
/dump-parser/README.md:
--------------------------------------------------------------------------------
 1 | # Dump Parser
 2 | 
 3 | Library to parse and edit database dump for Postgres, MySQL and MongoDB.
 4 | 
 5 | 
 6 | Example for Postgres
 7 | ```rust
 8 | let q = r"
 9 | INSERT INTO public.customers (customer_id, company_name, contact_name, contact_title)
10 | VALUES (1, 'Alfreds Futterkiste', 'Maria Anders', NULL);
11 | ";
12 | 
13 | let mut tokenizer = Tokenizer::new(q);
14 | let tokens_result = tokenizer.tokenize();
15 | assert_eq!(tokens_result.is_ok(), true);
16 | 
17 | let tokens = trim_pre_whitespaces(tokens_result.unwrap());
18 | let column_values = get_column_values_from_insert_into_query(&tokens);
19 | 
20 | assert_eq!(
21 |     column_values,
22 |     vec![
23 |         &Token::Number("1".to_string(), false),
24 |         &Token::SingleQuotedString("Alfreds Futterkiste".to_string()),
25 |         &Token::SingleQuotedString("Maria Anders".to_string()),
26 |         &Token::make_keyword("NULL"),
27 |     ]
28 | );
29 | ```
30 | 


--------------------------------------------------------------------------------
/replibyte/src/source/mysql_stdin.rs:
--------------------------------------------------------------------------------
 1 | use std::io::{stdin, BufReader, Error};
 2 | 
 3 | use crate::connector::Connector;
 4 | use crate::source::mysql::read_and_transform;
 5 | use crate::types::{OriginalQuery, Query};
 6 | use crate::Source;
 7 | use crate::SourceOptions;
 8 | 
 9 | /// Source MySQL dump from STDIN
10 | pub struct MysqlStdin {}
11 | 
12 | impl Default for MysqlStdin {
13 |     fn default() -> Self {
14 |         Self {}
15 |     }
16 | }
17 | 
18 | impl Connector for MysqlStdin {
19 |     fn init(&mut self) -> Result<(), Error> {
20 |         Ok(())
21 |     }
22 | }
23 | 
24 | impl Source for MysqlStdin {
25 |     fn read<F: FnMut(OriginalQuery, Query)>(
26 |         &self,
27 |         options: SourceOptions,
28 |         query_callback: F,
29 |     ) -> Result<(), Error> {
30 |         let reader = BufReader::new(stdin());
31 |         read_and_transform(reader, options, query_callback);
32 | 
33 |         Ok(())
34 |     }
35 | }
36 | 


--------------------------------------------------------------------------------
/website/README.md:
--------------------------------------------------------------------------------
 1 | # Replibyte Website
 2 | 
 3 | This website is built using [Docusaurus 2](https://docusaurus.io/), a modern static website generator.
 4 | 
 5 | ### Installation
 6 | 
 7 | ```
 8 | $ yarn
 9 | ```
10 | 
11 | ### Local Development
12 | 
13 | ```
14 | $ yarn start
15 | ```
16 | 
17 | This command starts a local development server and opens up a browser window. Most changes are reflected live without having to restart the server.
18 | 
19 | ### Build
20 | 
21 | ```
22 | $ yarn build
23 | ```
24 | 
25 | This command generates static content into the `build` directory and can be served using any static contents hosting service.
26 | 
27 | ### Deployment
28 | 
29 | Using SSH:
30 | 
31 | ```
32 | $ USE_SSH=true yarn deploy
33 | ```
34 | 
35 | Not using SSH:
36 | 
37 | ```
38 | $ GIT_USER=<Your GitHub username> yarn deploy
39 | ```
40 | 
41 | If you are using GitHub pages for hosting, this command is a convenient way to build the website and push to the `gh-pages` branch.
42 | 


--------------------------------------------------------------------------------
/replibyte/src/source/mod.rs:
--------------------------------------------------------------------------------
 1 | use std::io::Error;
 2 | 
 3 | use crate::config::{DatabaseSubsetConfig, OnlyTablesConfig, SkipConfig};
 4 | use crate::connector::Connector;
 5 | use crate::transformer::Transformer;
 6 | use crate::types::{OriginalQuery, Query};
 7 | 
 8 | pub mod mongodb;
 9 | pub mod mongodb_stdin;
10 | pub mod mysql;
11 | pub mod mysql_stdin;
12 | pub mod postgres;
13 | pub mod postgres_stdin;
14 | 
15 | pub trait Explain: Connector {
16 |     fn schema(&self) -> Result<(), Error>;
17 | }
18 | 
19 | pub trait Source: Connector {
20 |     fn read<F: FnMut(OriginalQuery, Query)>(
21 |         &self,
22 |         options: SourceOptions,
23 |         query_callback: F,
24 |     ) -> Result<(), Error>;
25 | }
26 | 
27 | pub struct SourceOptions<'a> {
28 |     pub transformers: &'a Vec<Box<dyn Transformer>>,
29 |     pub skip_config: &'a Vec<SkipConfig>,
30 |     pub database_subset: &'a Option<DatabaseSubsetConfig>,
31 |     pub only_tables: &'a Vec<OnlyTablesConfig>,
32 | }
33 | 


--------------------------------------------------------------------------------
/docker-compose-mysql.yml:
--------------------------------------------------------------------------------
 1 | version: '3'
 2 | 
 3 | services:
 4 |   source-mysql:
 5 |     platform: linux/x86_64
 6 |     image: mysql:8
 7 |     restart: always
 8 |     command: --default-authentication-plugin=mysql_native_password
 9 |     healthcheck:
10 |       test: ['CMD', 'mysqladmin', 'ping', '-u', 'root', '--password=password']
11 |       timeout: 45s
12 |       interval: 10s
13 |       retries: 10
14 |     environment:
15 |       - MYSQL_ROOT_PASSWORD=password
16 |     volumes:
17 |       - ./db/mysql:/docker-entrypoint-initdb.d
18 |     ports:
19 |       - 3306:3306
20 |   dest-mysql:
21 |     platform: linux/x86_64
22 |     image: mysql:8
23 |     restart: always
24 |     command: --default-authentication-plugin=mysql_native_password
25 |     healthcheck:
26 |       test: ['CMD', 'mysqladmin', 'ping', '-u', 'root', '--password=password']
27 |       timeout: 45s
28 |       interval: 10s
29 |       retries: 10
30 |     environment:
31 |       - MYSQL_ROOT_PASSWORD=password
32 |     ports:
33 |       - 3307:3306
34 | 


--------------------------------------------------------------------------------
/.github/workflows/publish-image.yaml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   release:
 3 |     types:
 4 |       - published
 5 | 
 6 | jobs:
 7 |   build-linux:
 8 |     runs-on: ubuntu-latest
 9 |     permissions:
10 |       packages: write
11 | 
12 |     steps:
13 |       - name: Set up Docker Buildx
14 |         uses: docker/setup-buildx-action@v2
15 | 
16 |       - uses: docker/login-action@v2
17 |         with:
18 |           registry: ghcr.io
19 |           username: ${{ github.actor }}
20 |           password: ${{ secrets.GITHUB_TOKEN }}
21 | 
22 |       - id: metadata
23 |         uses: docker/metadata-action@v3
24 |         with:
25 |           images: ghcr.io/${{ github.repository }}
26 |           tags: |
27 |             type=semver,pattern={{version}},value=${{ github.event.release.tag_name }}
28 |           # shortcut to create `latest` tag
29 |           flavor: latest=true
30 | 
31 |       - uses: docker/build-push-action@v3
32 |         with:
33 |           push: true
34 |           tags: ${{ steps.metadata.outputs.tags }}
35 |           labels: ${{ steps.metadata.outputs.labels }}
36 | 
37 | 


--------------------------------------------------------------------------------
/replibyte/src/source/mongodb_stdin.rs:
--------------------------------------------------------------------------------
 1 | use std::io::{stdin, BufReader, Error};
 2 | 
 3 | use crate::connector::Connector;
 4 | use crate::source::mongodb::read_and_transform;
 5 | use crate::types::{OriginalQuery, Query};
 6 | use crate::Source;
 7 | use crate::SourceOptions;
 8 | 
 9 | pub struct MongoDBStdin {}
10 | 
11 | impl Default for MongoDBStdin {
12 |     fn default() -> Self {
13 |         Self {}
14 |     }
15 | }
16 | 
17 | impl Connector for MongoDBStdin {
18 |     fn init(&mut self) -> Result<(), Error> {
19 |         Ok(())
20 |     }
21 | }
22 | 
23 | impl Source for MongoDBStdin {
24 |     fn read<F: FnMut(OriginalQuery, Query)>(
25 |         &self,
26 |         options: SourceOptions,
27 |         query_callback: F,
28 |     ) -> Result<(), Error> {
29 |         let reader = BufReader::new(stdin());
30 | 
31 |         if let Some(_database_subset) = &options.database_subset {
32 |             todo!("database subset not supported yet for MongoDB source")
33 |         }
34 | 
35 |         let _ = read_and_transform(reader, options, query_callback)?;
36 |         Ok(())
37 |     }
38 | }
39 | 


--------------------------------------------------------------------------------
/website/src/components/HomepageFeatures/index.tsx:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | import clsx from 'clsx';
 3 | import styles from './styles.module.css';
 4 | 
 5 | type FeatureItem = {
 6 |   title: string;
 7 |   Svg: React.ComponentType<React.ComponentProps<'svg'>>;
 8 |   description: JSX.Element;
 9 | };
10 | 
11 | const FeatureList: FeatureItem[] = [
12 | ];
13 | 
14 | function Feature({title, Svg, description}: FeatureItem) {
15 |   return (
16 |     <div className={clsx('col col--4')}>
17 |       <div className="text--center">
18 |         <Svg className={styles.featureSvg} role="img" />
19 |       </div>
20 |       <div className="text--center padding-horiz--md">
21 |         <h3>{title}</h3>
22 |         <p>{description}</p>
23 |       </div>
24 |     </div>
25 |   );
26 | }
27 | 
28 | export default function HomepageFeatures(): JSX.Element {
29 |   return (
30 |     <section className={styles.features}>
31 |       <div className="container">
32 |         <div className="row">
33 |           {FeatureList.map((props, idx) => (
34 |             <Feature key={idx} {...props} />
35 |           ))}
36 |         </div>
37 |       </div>
38 |     </section>
39 |   );
40 | }
41 | 


--------------------------------------------------------------------------------
/replibyte/src/source/postgres_stdin.rs:
--------------------------------------------------------------------------------
 1 | use std::io::{stdin, BufReader, Error};
 2 | 
 3 | use crate::connector::Connector;
 4 | use crate::source::postgres::{read_and_transform, subset};
 5 | use crate::types::{OriginalQuery, Query};
 6 | use crate::Source;
 7 | use crate::SourceOptions;
 8 | 
 9 | /// Source Postgres dump from STDIN
10 | pub struct PostgresStdin {}
11 | 
12 | impl Default for PostgresStdin {
13 |     fn default() -> Self {
14 |         Self {}
15 |     }
16 | }
17 | 
18 | impl Connector for PostgresStdin {
19 |     fn init(&mut self) -> Result<(), Error> {
20 |         Ok(())
21 |     }
22 | }
23 | 
24 | impl Source for PostgresStdin {
25 |     fn read<F: FnMut(OriginalQuery, Query)>(
26 |         &self,
27 |         options: SourceOptions,
28 |         query_callback: F,
29 |     ) -> Result<(), Error> {
30 |         match &options.database_subset {
31 |             None => {
32 |                 let reader = BufReader::new(stdin());
33 |                 read_and_transform(reader, options, query_callback);
34 |             }
35 |             Some(subset_config) => {
36 |                 let dump_reader = BufReader::new(stdin());
37 |                 let reader = subset(dump_reader, subset_config)?;
38 |                 read_and_transform(reader, options, query_callback);
39 |             }
40 |         };
41 | 
42 |         Ok(())
43 |     }
44 | }
45 | 


--------------------------------------------------------------------------------
/website/docs/guides/4-delete-a-dump.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: IV. Delete a dump
 3 | sidebar_position: 4
 4 | ---
 5 | 
 6 | # Delete a dump
 7 | 
 8 | The `dump delete` command comes with 3 different deleting strategies.
 9 | 
10 | 1. Delete a dump by its name
11 | 2. Delete dumps older than a specified number of days
12 | 3. Keep only a maximum number of dumps
13 | 
14 | ### Delete by dump name
15 | 
16 | ```shell
17 | replibyte -c conf.yaml dump delete <DUMP_NAME>
18 | ```
19 | 
20 | This is the simplest strategy you can find.
21 | 
22 | The list of available dumps can be retrieved by running the following command:
23 | 
24 | ```shell
25 | replibyte -c conf.yaml dump list
26 | 
27 | type          name                  size    when                    compressed  encrypted
28 | PostgreSQL    dump-1647706359405    154MB   Yesterday at 03:00 am   true        true
29 | PostgreSQL    dump-1647731334517    152MB   2 days ago at 03:00 am  true        true
30 | PostgreSQL    dump-1647734369306    149MB   3 days ago at 03:00 am  true        true
31 | ```
32 | 
33 | ### Delete dumps older than 2 days
34 | 
35 | ```shell
36 | replibyte -c conf.yaml dump delete --older-than=2d
37 | ```
38 | 
39 | Only the day unit is supported for now, other units could come in the future.
40 | 
41 | ### Keep only the last 10 dumps
42 | 
43 | ```shell
44 | replibyte -c conf.yaml dump delete --keep-last=10
45 | ```
46 | 


--------------------------------------------------------------------------------
/website/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "replibyte",
 3 |   "version": "1.0.0",
 4 |   "private": true,
 5 |   "scripts": {
 6 |     "docusaurus": "docusaurus",
 7 |     "start": "docusaurus start",
 8 |     "build": "docusaurus build",
 9 |     "swizzle": "docusaurus swizzle",
10 |     "deploy": "docusaurus deploy",
11 |     "clear": "docusaurus clear",
12 |     "serve": "docusaurus serve",
13 |     "write-translations": "docusaurus write-translations",
14 |     "write-heading-ids": "docusaurus write-heading-ids",
15 |     "typecheck": "tsc"
16 |   },
17 |   "dependencies": {
18 |     "@docusaurus/core": "2.0.0-beta.18",
19 |     "@docusaurus/preset-classic": "2.0.0-beta.18",
20 |     "@mdx-js/react": "^1.6.22",
21 |     "clsx": "^1.1.1",
22 |     "prism-react-renderer": "^1.3.1",
23 |     "react": "^17.0.2",
24 |     "react-dom": "^17.0.2",
25 |     "mermaid": "^8.11.5",
26 |     "mdx-mermaid": "^1.2.2",
27 |     "@cmfcmf/docusaurus-search-local": "^0.10.0"
28 |   },
29 |   "devDependencies": {
30 |     "@docusaurus/module-type-aliases": "2.0.0-beta.18",
31 |     "@tsconfig/docusaurus": "^1.0.5",
32 |     "typescript": "^4.6.3"
33 |   },
34 |   "browserslist": {
35 |     "production": [
36 |       ">0.5%",
37 |       "not dead",
38 |       "not op_mini all"
39 |     ],
40 |     "development": [
41 |       "last 1 chrome version",
42 |       "last 1 firefox version",
43 |       "last 1 safari version"
44 |     ]
45 |   }
46 | }
47 | 


--------------------------------------------------------------------------------
/website/src/pages/index.tsx:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | import clsx from 'clsx';
 3 | import Link from '@docusaurus/Link';
 4 | import useDocusaurusContext from '@docusaurus/useDocusaurusContext';
 5 | import styles from './index.module.css';
 6 | import {Redirect} from "@docusaurus/router";
 7 | 
 8 | function HomepageHeader() {
 9 |   const {siteConfig} = useDocusaurusContext();
10 |   return (
11 |     <header className={clsx('hero hero--primary', styles.heroBanner)}>
12 |       <div className="container">
13 |         <h1 className="hero__title">{siteConfig.title}</h1>
14 |         <p className="hero__subtitle">{siteConfig.tagline}</p>
15 |         <div className={styles.buttons}>
16 |           <Link
17 |             className="button button--secondary button--lg"
18 |             to="/docs/introduction">
19 |             Getting Started
20 |           </Link>
21 |         </div>
22 |       </div>
23 |     </header>
24 |   );
25 | }
26 | 
27 | // export default function Home(): JSX.Element {
28 | //   const {siteConfig} = useDocusaurusContext();
29 | //   return (
30 | //     <Layout
31 | //       title={`${siteConfig.title} | Seed your development database with real data`}
32 | //       description="Replibyte is a powerful tool to seed your databases with real data and other cool features 🔥">
33 | //       <HomepageHeader />
34 | //       <main>
35 | //         <HomepageFeatures />
36 | //       </main>
37 | //     </Layout>
38 | //   );
39 | // }
40 | 
41 | export default function Home() {
42 |   return <Redirect to='/docs/introduction'/>;
43 | }
44 | 


--------------------------------------------------------------------------------
/replibyte/src/transformer/transient.rs:
--------------------------------------------------------------------------------
 1 | use crate::transformer::Transformer;
 2 | use crate::types::Column;
 3 | 
 4 | /// This transformer will not make any changes.
 5 | pub struct TransientTransformer {
 6 |     database_name: String,
 7 |     table_name: String,
 8 |     column_name: String,
 9 | }
10 | 
11 | impl Default for TransientTransformer {
12 |     fn default() -> Self {
13 |         TransientTransformer {
14 |             database_name: String::default(),
15 |             table_name: String::default(),
16 |             column_name: String::default(),
17 |         }
18 |     }
19 | }
20 | 
21 | impl TransientTransformer {
22 |     pub fn new<S>(database_name: S, table_name: S, column_name: S) -> Self
23 |     where
24 |         S: Into<String>,
25 |     {
26 |         TransientTransformer {
27 |             table_name: table_name.into(),
28 |             column_name: column_name.into(),
29 |             database_name: database_name.into(),
30 |         }
31 |     }
32 | }
33 | 
34 | impl Transformer for TransientTransformer {
35 |     fn id(&self) -> &str {
36 |         "transient"
37 |     }
38 | 
39 |     fn description(&self) -> &str {
40 |         "Does not modify the value."
41 |     }
42 | 
43 |     fn database_name(&self) -> &str {
44 |         self.database_name.as_str()
45 |     }
46 | 
47 |     fn table_name(&self) -> &str {
48 |         self.table_name.as_str()
49 |     }
50 | 
51 |     fn column_name(&self) -> &str {
52 |         self.column_name.as_str()
53 |     }
54 | 
55 |     fn transform(&self, column: Column) -> Column {
56 |         column
57 |     }
58 | }
59 | 


--------------------------------------------------------------------------------
/website/src/css/custom.css:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Any CSS included here will be global. The classic template
 3 |  * bundles Infima by default. Infima is a CSS framework designed to
 4 |  * work well for content-centric websites.
 5 |  */
 6 | 
 7 | /* You can override the default Infima variables here. */
 8 | :root {
 9 |   --ifm-color-primary: #5B50D6;
10 |   --ifm-color-primary-dark: #433AB8;
11 |   --ifm-color-primary-darker: #2F289A;
12 |   --ifm-color-primary-darkest: #130F66;
13 |   --ifm-color-primary-light: #847AE6;
14 |   --ifm-color-primary-lighter: #C2BCFA;
15 |   --ifm-color-primary-lightest: #F2F3FE;
16 |   --ifm-footer-background-color: #f8f8fd;
17 |   --ifm-code-font-size: 95%;
18 | }
19 | 
20 | /* For readability concerns, you should choose a lighter palette in dark mode. */
21 | [data-theme='dark'] {
22 |   --ifm-color-primary: #C2BCFA;
23 |   --ifm-color-primary-dark: #5B50D6;
24 |   --ifm-color-primary-darker: #433AB8;
25 |   --ifm-color-primary-darkest: #2F289A;
26 |   --ifm-color-primary-light: #C2BCFA;
27 |   --ifm-color-primary-lighter: #E0DDFC;
28 |   --ifm-color-primary-lightest: #F2F3FE;
29 |   --ifm-footer-background-color: #151B2B;
30 | }
31 | 
32 | .docusaurus-highlight-code-line {
33 |   background-color: #090c13;
34 |   display: block;
35 |   margin: 0 calc(-1 * var(--ifm-pre-padding));
36 |   padding: 0 var(--ifm-pre-padding);
37 | }
38 | 
39 | [data-theme='dark'] .docusaurus-highlight-code-line {
40 |   background-color: #151B2B;
41 | }
42 | 
43 | [data-theme='dark'] .navbar {
44 |   background-color: #151B2B;
45 | }
46 | 
47 | [data-theme='dark'] {
48 |   background-color: #151B2B;
49 | }
50 | 


--------------------------------------------------------------------------------
/dump-parser/src/lib.rs:
--------------------------------------------------------------------------------
 1 | use std::io::{BufReader, Read};
 2 | 
 3 | use crate::errors::DumpFileError;
 4 | 
 5 | pub mod errors;
 6 | pub mod mongodb;
 7 | pub mod mysql;
 8 | pub mod postgres;
 9 | pub mod utils;
10 | 
11 | #[derive(Debug, PartialOrd, PartialEq, Ord, Eq)]
12 | pub enum Type {
13 |     Postgres,
14 |     Mysql,
15 | }
16 | 
17 | pub trait LogicalDatabase<'a, T>
18 | where
19 |     T: Table,
20 | {
21 |     fn name(&self) -> &str;
22 |     fn tables(&self) -> Result<Vec<T>, DumpFileError>;
23 | }
24 | 
25 | pub trait Table {
26 |     fn rows(&self) -> &'static Vec<Row>;
27 | }
28 | 
29 | #[derive(Debug, Hash, Eq, PartialEq)]
30 | pub struct Row {
31 |     columns: Vec<Column>,
32 | }
33 | 
34 | #[derive(Debug, Hash, Eq, PartialEq)]
35 | pub struct Column {
36 |     name: String,
37 |     value: Vec<u8>,
38 | }
39 | 
40 | pub trait Database<'a, LD, T>
41 | where
42 |     LD: LogicalDatabase<'a, T>,
43 |     T: Table,
44 | {
45 |     fn database_type(&self) -> Type;
46 |     /// list logical databases available
47 |     fn databases<R: Read>(&self, dump_reader: BufReader<R>) -> Result<Vec<LD>, DumpFileError>;
48 |     /// find a logical database by name
49 |     fn get_database<S: Into<&'a str>, R: Read>(
50 |         &self,
51 |         name: S,
52 |         dump_reader: BufReader<R>,
53 |     ) -> Result<Option<LD>, DumpFileError> {
54 |         let databases = self.databases(dump_reader)?;
55 | 
56 |         let db_name = name.into();
57 |         for db in databases {
58 |             if db.name() == db_name {
59 |                 return Ok(Some(db));
60 |             }
61 |         }
62 | 
63 |         Ok(None)
64 |     }
65 | }
66 | 


--------------------------------------------------------------------------------
/replibyte/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | edition = "2021"
 3 | version = "0.10.0"
 4 | name = "replibyte"
 5 | authors = ["Qovery Team", "Fab", "Benny", "Contributos"]
 6 | 
 7 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 8 | 
 9 | [dependencies]
10 | dump-parser = { path = "../dump-parser" }
11 | subset = { path = "../subset" }
12 | rand = "0.8.5"
13 | anyhow = "1.0.56"
14 | serde_yaml = "0.8"
15 | serde_json = "1.0"
16 | aws-config = "0.9.0"
17 | aws-smithy-client = "0.39.0"
18 | aws-smithy-http = "0.39.0"
19 | aws-sdk-s3 = "0.9.0"
20 | aws-types = "0.9.0"
21 | tokio = { version = "1", features = ["full"] }
22 | rustls = "0.20.4"
23 | clap = { version = "3.1", features = ["derive"] }
24 | serde = { version = "1.0", features = ["derive"] }
25 | lazy_static = "1.4.0"
26 | fake = "2.4"
27 | log = "0.4"
28 | env_logger = "0.9"
29 | prettytable-rs = "0.8"
30 | timeago = "0.3"
31 | indicatif = "0.16"
32 | http = "0.2"
33 | flate2 = "1.0"
34 | bson = "2.2"
35 | aes-gcm = "0.9"
36 | which = "4.2.5"
37 | mongodb-schema-parser = { git = "https://github.com/mongodb-rust/mongodb-schema-parser.git", rev = "2d489307dd70b63b216a9968f7dec7c217108b32" }
38 | url = "2.2.2"
39 | tempfile = "3.3"
40 | ctrlc = "3.2.1"
41 | reqwest = { version = "0.11", features = ["blocking"] }
42 | chrono = {version = "0.4", features = ["serde"] }
43 | machine-uid = "0.2"
44 | percent-encoding = "2.1.0"
45 | 
46 | # FIXME removed until the CI release pipeline is fixed
47 | #wasmer = { version = "2.2", optional = true }
48 | wasmer = { version = "2.2" }
49 | # FIXME same as above
50 | #wasmer-wasi = { version = "2.2", optional = true }
51 | wasmer-wasi = { version = "2.2" }
52 | 
53 | # FIXME same as above
54 | #[features]
55 | #wasm = ["wasmer", "wasmer-wasi"]
56 | 


--------------------------------------------------------------------------------
/docker-compose-postgres-minio.yml:
--------------------------------------------------------------------------------
 1 | version: '3'
 2 | 
 3 | services:
 4 |   source-postgres:
 5 |     image: postgres:13
 6 |     restart: always
 7 |     healthcheck:
 8 |       test: [ "CMD", "pg_isready", "-q", "-d", "postgres", "-U", "root" ]
 9 |       timeout: 45s
10 |       interval: 10s
11 |       retries: 10
12 |     environment:
13 |       - POSTGRES_USER=root
14 |       - POSTGRES_PASSWORD=password
15 |     volumes:
16 |       - ./db/postgres:/docker-entrypoint-initdb.d/
17 |     ports:
18 |       - 5432:5432
19 |   dest-postgres:
20 |     image: postgres:13
21 |     restart: always
22 |     healthcheck:
23 |       test: [ "CMD", "pg_isready", "-q", "-d", "postgres", "-U", "root" ]
24 |       timeout: 45s
25 |       interval: 10s
26 |       retries: 10
27 |     environment:
28 |       - POSTGRES_USER=root
29 |       - POSTGRES_PASSWORD=password
30 |     ports:
31 |       - 5453:5432
32 |   bridge-minio:
33 |     image: minio/minio:RELEASE.2022-03-17T06-34-49Z
34 |     restart: always
35 |     command: server --console-address ":9001" /data/minio/
36 |     healthcheck:
37 |       test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
38 |       interval: 30s
39 |       timeout: 20s
40 |       retries: 3
41 |     environment:
42 |       MINIO_ROOT_USER: minioadmin
43 |       MINIO_ROOT_PASSWORD: minioadmin
44 |     ports:
45 |       - 9000:9000
46 |       - 9001:9001
47 |   create-minio-bucket:
48 |     image: minio/mc
49 |     depends_on:
50 |       - bridge-minio
51 |     entrypoint: >
52 |       /bin/sh -c "
53 |       /usr/bin/mc config host add myminio http://bridge-minio:9000 minioadmin minioadmin;
54 |       /usr/bin/mc rm -r --force myminio/replibyte-test;
55 |       /usr/bin/mc mb myminio/replibyte-test;
56 |       /usr/bin/mc policy download myminio/replibyte-test;
57 |       exit 0;
58 |       "
59 | 


--------------------------------------------------------------------------------
/website/docs/contributing.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | sidebar_position: 11
 3 | ---
 4 | 
 5 | # Contributing
 6 | 
 7 | :::tip
 8 | 
 9 | If you are non-experienced in Rust, consider picking issues with label [good first issue](https://github.com/Qovery/replibyte/issues?q=is%3Aopen+is%3Aissue+label%3A%22good+first+issue%22).
10 | 
11 | :::
12 | 
13 | ## Local development
14 | 
15 | For local development, you will need to:
16 | 
17 | 1. Install [Rust](https://www.rust-lang.org/).
18 | 2. Install [Docker](https://www.docker.com).
19 | 3. Run `docker compose -f ./docker-compose-dev.yml up` to
20 | start the local databases. At the moment, `docker-compose` includes 2 PostgreSQL database instances, 2 MySQL instances, 2 MongoDB instances
21 | and a [MinIO](https://min.io) datastore. One source, one destination by database and one datastore. In the future, we will provide more options.
22 | 
23 | The Minio console is accessible at http://localhost:9001.
24 | 
25 | Once your Docker instances are running, you can run the RepliByte tests, to check if everything is configured correctly:
26 | 
27 | ```shell
28 | AWS_ACCESS_KEY_ID=minioadmin AWS_SECRET_ACCESS_KEY=minioadmin cargo test
29 | ```
30 | 
31 | To check that your development environment works well, you can run all the tests locally with:
32 | 
33 | ```shell
34 | cargo test --all
35 | ```
36 | 
37 | ## How to contribute
38 | 
39 | RepliByte is in its early stage of development and need some time to be usable in production. We need some help, and you are welcome to
40 | contribute. To better synchronize consider joining our #replibyte channel on our [Discord](https://discord.qovery.com). Otherwise, you can
41 | pick [opened issues](https://github.com/Qovery/replibyte/issues) and contribute.
42 | 
43 | ## Where should I start?
44 | 
45 | Check [opened issues](https://github.com/Qovery/replibyte/issues). 
46 | 
47 | 


--------------------------------------------------------------------------------
/docker-compose-mongodb-minio.yml:
--------------------------------------------------------------------------------
 1 | version: '3'
 2 | 
 3 | services:
 4 |   source-mongodb:
 5 |     image: mongo:5
 6 |     restart: always
 7 |     healthcheck:
 8 |       test: echo 'db.runCommand("ping").ok' | mongo localhost:27017/test --quiet
 9 |       timeout: 45s
10 |       interval: 10s
11 |       retries: 10
12 |     environment:
13 |       - MONGO_INITDB_ROOT_USERNAME=root
14 |       - MONGO_INITDB_ROOT_PASSWORD=password
15 |     volumes:
16 |       - ./db/mongodb:/docker-entrypoint-initdb.d/
17 |     ports:
18 |       - 27017:27017
19 |   dest-mongodb:
20 |     image: mongo:5
21 |     restart: always
22 |     healthcheck:
23 |       test: echo 'db.runCommand("ping").ok' | mongo localhost:27017/test --quiet
24 |       timeout: 45s
25 |       interval: 10s
26 |       retries: 10
27 |     environment:
28 |       - MONGO_INITDB_ROOT_USERNAME=root
29 |       - MONGO_INITDB_ROOT_PASSWORD=password
30 |     ports:
31 |       - 27018:27017
32 |   bridge-minio:
33 |     image: minio/minio:RELEASE.2022-03-17T06-34-49Z
34 |     restart: always
35 |     command: server --console-address ":9001" /data/minio/
36 |     healthcheck:
37 |       test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
38 |       interval: 30s
39 |       timeout: 20s
40 |       retries: 3
41 |     environment:
42 |       MINIO_ROOT_USER: minioadmin
43 |       MINIO_ROOT_PASSWORD: minioadmin
44 |     ports:
45 |       - 9000:9000
46 |       - 9001:9001
47 |   create-minio-bucket:
48 |     image: minio/mc
49 |     depends_on:
50 |       - bridge-minio
51 |     entrypoint: >
52 |       /bin/sh -c "
53 |       /usr/bin/mc config host add myminio http://bridge-minio:9000 minioadmin minioadmin;
54 |       /usr/bin/mc rm -r --force myminio/replibyte-test;
55 |       /usr/bin/mc mb myminio/replibyte-test;
56 |       /usr/bin/mc policy download myminio/replibyte-test;
57 |       exit 0;
58 |       "


--------------------------------------------------------------------------------
/website/docs/guides/3-subset-a-dump.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: III. Subset a dump
 3 | sidebar_position: 3
 4 | ---
 5 | 
 6 | # Subset a dump
 7 | 
 8 | :::caution
 9 | 
10 | Only PostgreSQL supports *Subsetting* at the moment. Feel free to [contribute](/docs/contributing) to accelerate the support of MySQL and MongoDB 
11 | 
12 | :::
13 | 
14 | Subsetting is a powerful feature to only import a smaller consistent part from your production database. 
15 | 
16 | ## How Subsetting works
17 | 
18 | Check out how subsetting works under the hood [here](/docs/design/how-database-subset-works).
19 | 
20 | ## Configuration
21 | 
22 | Using Subsetting feature is as simple as adding new parameters in your `conf.yaml`
23 | 
24 | ```yaml title="add database_subset object"
25 | source:
26 |   connection_uri: postgres://user:password@host:port/db
27 |   transformers:
28 |     - database: public
29 |       table: customers
30 |       columns:
31 |         - name: first_name
32 |           transformer_name: first-name
33 |         - name: last_name
34 |           transformer_name: random
35 |         - name: contact_phone
36 |           transformer_name: phone-number
37 |         - name: contact_email
38 |           transformer_name: email
39 |   database_subset:
40 |     database: public
41 |     table: customers
42 |     strategy_name: random
43 |     strategy_options:
44 |       percent: 10
45 |     passthrough_tables:
46 |       - product_catalog
47 | ```
48 | 
49 | By applying this configuration, Replibyte will:
50 | 
51 | * Keep around 10% of the full database
52 | * Go down the whole tables linked to `public.customers`
53 | * Keep the whole rows from product_catalog
54 | 
55 | ## Subset Strategy
56 | 
57 | TODO
58 | 
59 | ## Considerations
60 | 
61 | This feature is still under active improvement. Feel free to [open an issue](https://github.com/Qovery/Replibyte/issues/new) if you face any trouble.
62 | 


--------------------------------------------------------------------------------
/docker-compose-mysql-minio.yml:
--------------------------------------------------------------------------------
 1 | version: '3'
 2 | 
 3 | services:
 4 |   source-mysql:
 5 |     image: mysql:8
 6 |     restart: always
 7 |     command: --default-authentication-plugin=mysql_native_password
 8 |     healthcheck:
 9 |       test: ['CMD', 'mysqladmin', 'ping', '-u', 'root', '--password=password']
10 |       timeout: 45s
11 |       interval: 10s
12 |       retries: 10
13 |     environment:
14 |       - MYSQL_ROOT_PASSWORD=password
15 |     volumes:
16 |       - ./db/mysql:/docker-entrypoint-initdb.d
17 |     ports:
18 |       - 3306:3306
19 |   dest-mysql:
20 |     image: mysql:8
21 |     restart: always
22 |     command: --default-authentication-plugin=mysql_native_password
23 |     healthcheck:
24 |       test: ['CMD', 'mysqladmin', 'ping', '-u', 'root', '--password=password']
25 |       timeout: 45s
26 |       interval: 10s
27 |       retries: 10
28 |     environment:
29 |       - MYSQL_ROOT_PASSWORD=password
30 |     ports:
31 |       - 3307:3306
32 |   bridge-minio:
33 |     image: minio/minio:RELEASE.2022-03-17T06-34-49Z
34 |     restart: always
35 |     command: server --console-address ":9001" /data/minio/
36 |     healthcheck:
37 |       test: ['CMD', 'curl', '-f', 'http://localhost:9000/minio/health/live']
38 |       interval: 30s
39 |       timeout: 20s
40 |       retries: 3
41 |     environment:
42 |       MINIO_ROOT_USER: minioadmin
43 |       MINIO_ROOT_PASSWORD: minioadmin
44 |     ports:
45 |       - 9000:9000
46 |       - 9001:9001
47 |   create-minio-bucket:
48 |     image: minio/mc
49 |     depends_on:
50 |       - bridge-minio
51 |     entrypoint: >
52 |       /bin/sh -c "
53 |       /usr/bin/mc config host add myminio http://bridge-minio:9000 minioadmin minioadmin;
54 |       /usr/bin/mc rm -r --force myminio/replibyte-test;
55 |       /usr/bin/mc mb myminio/replibyte-test;
56 |       /usr/bin/mc policy download myminio/replibyte-test;
57 |       exit 0;
58 |       "
59 | 


--------------------------------------------------------------------------------
/website/docs/faq.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | sidebar_position: 12
 3 | ---
 4 | 
 5 | # FAQ
 6 | 
 7 | :::tip
 8 | 
 9 | [Open an issue](https://github.com/Qovery/replibyte/issues/new) if you don't find the answer to your question.
10 | 
11 | :::
12 | 
13 | ### What language is used for Replibyte?
14 | 
15 | [Rust](https://www.rust-lang.org/)
16 | 
17 | ### Why using Rust?
18 | 
19 | Replibyte is a IO intensive tool that need to process data as fast as possible. Rust is a perfect candidate for high throughput and low
20 | memory consumption.
21 | 
22 | ### Does RepliByte is an ETL?
23 | 
24 | RepliByte is not an ETL like [AirByte](https://github.com/airbytehq/airbyte), [AirFlow](https://airflow.apache.org/), Talend, and it will
25 | never be. If you need to synchronize versatile data sources, you are better choosing a classic ETL. RepliByte is a tool for software
26 | engineers to help them to synchronize data from the same databases. With RepliByte, you can only replicate data from the same type of
27 | databases. As mentioned above, the primary purpose of RepliByte is to duplicate into different environments. You can see RepliByte as a
28 | specific use case of an ETL, where an ETL is more generic.
29 | 
30 | ### Do you support backup from a dump file?
31 | 
32 | absolutely,
33 | 
34 | ```shell
35 | cat dump.sql | replibyte -c conf.yaml backup run -s postgres -i
36 | ```
37 | 
38 | and
39 | 
40 | ```shell
41 | replibyte -c conf.yaml backup run -s postgres -f dump.sql
42 | ```
43 | 
44 | ### How RepliByte can list the dumps? Is there an API?
45 | 
46 | There is no API, RepliByte is fully stateless and store the dump list into the datastore (E.g. S3) via an metadata file.
47 | 
48 | ### How can I contact you?
49 | 
50 | 3 options:
51 | 
52 | 1. [Open an issue](https://github.com/Qovery/replibyte/issues/new).
53 | 2. Join our #replibyte channel on [our discord](https://discord.qovery.com).
54 | 3. Drop us an email to `github+replibyte {at} qovery {dot} com`.
55 | 


--------------------------------------------------------------------------------
/subset/src/dedup.rs:
--------------------------------------------------------------------------------
 1 | use std::fs::{File, OpenOptions};
 2 | use std::io::{BufRead, BufReader, Error, Write};
 3 | use std::path::Path;
 4 | 
 5 | pub type Line<'a> = &'a str;
 6 | pub type GroupHash = String;
 7 | 
 8 | /// Create or find the appropriate file based on the `group_hash` and append the line if it does not already exist.
 9 | pub fn does_line_exist_and_set(
10 |     temp_directory: &Path,
11 |     group_hash: &GroupHash,
12 |     line: Line,
13 | ) -> Result<bool, Error> {
14 |     if does_line_exist(temp_directory, group_hash, line)? {
15 |         return Ok(true);
16 |     }
17 | 
18 |     let file_path = temp_directory.join(group_hash);
19 | 
20 |     // append the line because it does not exist
21 |     let mut file = OpenOptions::new()
22 |         .write(true)
23 |         .append(true)
24 |         .truncate(false)
25 |         .open(file_path.as_path())?;
26 | 
27 |     let line = format!("{}\n", line.trim_start().trim_end());
28 |     let _ = file.write(line.as_bytes())?;
29 | 
30 |     Ok(false)
31 | }
32 | 
33 | pub fn does_line_exist(
34 |     temp_directory: &Path,
35 |     group_hash: &GroupHash,
36 |     line: Line,
37 | ) -> Result<bool, Error> {
38 |     let file_path = temp_directory.join(group_hash);
39 |     let file = match File::open(file_path.as_path()) {
40 |         Ok(file) => file,
41 |         Err(_) => File::create(file_path.as_path())?,
42 |     };
43 | 
44 |     let mut buf = String::new();
45 |     let mut reader = BufReader::new(&file);
46 |     // remove potential whitespaces and \n
47 |     let line = line.trim_start().trim_end();
48 |     while let Ok(amount) = reader.read_line(&mut buf) {
49 |         if amount == 0 {
50 |             // EOF
51 |             break;
52 |         }
53 | 
54 |         if buf.as_str().trim_start().trim_end() == line {
55 |             // the line already exist in the file, we can stop here
56 |             return Ok(true);
57 |         }
58 | 
59 |         let _ = buf.clear();
60 |     }
61 | 
62 |     Ok(false)
63 | }
64 | 


--------------------------------------------------------------------------------
/replibyte/src/commands/source.rs:
--------------------------------------------------------------------------------
 1 | use std::io::{Error, ErrorKind};
 2 | 
 3 | use crate::config::{Config, ConnectionUri};
 4 | use crate::source::Explain;
 5 | use crate::source::mongodb::MongoDB;
 6 | use crate::source::mysql::Mysql;
 7 | use crate::source::postgres::Postgres;
 8 | 
 9 | /// show the database schema
10 | pub fn schema(config: Config) -> anyhow::Result<()> {
11 |     match config.source {
12 |         Some(source) => {
13 |             match source.connection_uri()? {
14 |                 ConnectionUri::Postgres(host, port, username, password, database) => {
15 |                     let postgres = Postgres::new(
16 |                         host.as_str(),
17 |                         port,
18 |                         database.as_str(),
19 |                         username.as_str(),
20 |                         password.as_str(),
21 |                     );
22 | 
23 |                     postgres.schema()?;
24 | 
25 |                     Ok(())
26 |                 }
27 |                 ConnectionUri::Mysql(host, port, username, password, database) => {
28 |                     let mysql = Mysql::new(
29 |                         host.as_str(),
30 |                         port,
31 |                         database.as_str(),
32 |                         username.as_str(),
33 |                         password.as_str(),
34 |                     );
35 | 
36 |                     mysql.schema()?;
37 | 
38 |                     Ok(())
39 |                 }
40 |                 ConnectionUri::MongoDB(uri, database) => {
41 |                     let mongodb = MongoDB::new(uri.as_str(), database.as_str());
42 | 
43 |                     mongodb.schema()?;
44 | 
45 |                     Ok(())
46 |                 }
47 |             }
48 |         }
49 |         None => {
50 |             Err(anyhow::Error::from(Error::new(
51 |                 ErrorKind::Other,
52 |                 "missing <source> object in the configuration file",
53 |             )))
54 |         }
55 |     }
56 | }
57 | 


--------------------------------------------------------------------------------
/website/docs/getting-started/concepts.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | sidebar_position: 3
 3 | ---
 4 | 
 5 | # Concepts
 6 | 
 7 | To better use Replibyte, here are a list of the different concept to have in mind.
 8 | 
 9 | :::tip
10 | 
11 | I assume you read "[How Replibyte works](/docs/how-replibyte-works)"
12 | 
13 | :::
14 | 
15 | ```mermaid
16 | flowchart LR
17 |     Source --> Subset --> Transformer --> Datastore --> Destination
18 | ```
19 | 
20 | ## Source
21 | 
22 | A source is a database from where Replibyte will create the dump. The database dump can be provided from a file, as a process
23 | input or be created by Replibyte.
24 | 
25 | ## Subset
26 | 
27 | Subsetting data is the process of taking a representative sample of your data in a manner that preserves the integrity of your database, e.g., give me 5% of all transactions or pull all data associated with customers who live in California. If you do this naively, your database will break foreign key constraints, or you’ll end up with a statistically non-representative data sample. Here are a few situations in which you might find subsetting data to be important or necessary:
28 | 1. You’d like to use your production database in staging or test environments but the database is very large so you want to use only a portion of it.
29 | 2. You’d like a test database that contains a few specific rows from production (and related rows from other tables) so you can reproduce a bug.
30 | 3. You want to share data with others, but you don’t want them to have all of it. A common scenario is providing developers an anonymized subset which also enables them to run the test database locally on their own machines.
31 | 
32 | ## Transformer
33 | 
34 | A transformer is a module to alternate the value of a specified column. Replibyte provides pre-made [transformers](/docs/transformers). You can also [build your own Transformer in web assembly](/docs/transformers#wasm).
35 | 
36 | ## Datastore
37 | 
38 | A datastore is where the source dump is stored. E.g a dump can store into a S3 datastore or on a local hard drive.
39 | 
40 | ## Destination
41 | 
42 | A destination is a database where the dump will is restored.
43 | 


--------------------------------------------------------------------------------
/.github/workflows/build-and-test.yml:
--------------------------------------------------------------------------------
 1 | on: [push, pull_request]
 2 | 
 3 | name: Build and Test
 4 | 
 5 | jobs:
 6 |   test:
 7 |     strategy:
 8 |       matrix:
 9 |         os: [ubuntu-latest]
10 |         rust-toolchain: [stable]
11 |       fail-fast: false
12 | 
13 |     runs-on: ${{ matrix.os }}
14 | 
15 |     steps:
16 |       - name: Checkout code
17 |         uses: actions/checkout@v2
18 | 
19 |       - name: Install Rust toolchain
20 |         uses: actions-rs/toolchain@v1
21 |         with:
22 |           toolchain: ${{ matrix.rust-toolchain }}
23 |           components: rustfmt
24 |           override: true
25 | 
26 |       - name: Install Build Essentials
27 |         run: sudo apt-get install build-essential mingw-w64 gcc
28 | 
29 |       - name: Verify versions
30 |         run: rustc --version && rustup --version && cargo --version
31 | 
32 |       - name: Cache build artifacts
33 |         id: cache-cargo
34 |         uses: actions/cache@v2
35 |         with:
36 |           path: |
37 |             ~/.cargo/registry
38 |             ~/.cargo/git
39 |             target
40 |           key: ${{ runner.os }}-cargo-${{ matrix.rust-toolchain }}
41 | 
42 |       - name: Cache integration artifacts
43 |         id: cache-integration
44 |         uses: actions/cache@v2
45 |         with:
46 |           path: |
47 |             tests/integration/runner/node_modules
48 |           key: ${{ runner.os }}-integration-${{ matrix.rust-toolchain }}
49 | 
50 |       - name: Build RepliByte
51 |         run: cargo build --release --all-features
52 | 
53 |       - name: Start Postgres, MySQL, MongoDB and MinIO Containers
54 |         run: docker-compose -f "docker-compose-dev.yml" up -d --build
55 | 
56 |       - name: Test RepliByte
57 |         env:
58 |           AWS_REGION: ${{ secrets.AWS_REGION }}
59 |         run: cargo test --all-features
60 | 
61 |       # - name: Bench RepliByte
62 |       #   run: cargo bench
63 | 
64 |       #- name: Check RepliByte code style
65 |       #  run: cargo fmt -- --check
66 | 
67 |       - name: Stop Postgres, MySQL, MongoDB and MinIO Containers
68 |         if: always()
69 |         run: docker-compose -f "docker-compose-dev.yml" down --remove-orphans
70 | 


--------------------------------------------------------------------------------
/website/docs/databases.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | sidebar_position: 5
 3 | ---
 4 | 
 5 | # Databases
 6 | 
 7 | Replibyte supports [PostgreSQL](#postgresql), [MySQL](#mysql--mariadb) and [MongoDB](#mongodb) databases.
 8 | 
 9 | ## PostgreSQL
10 | 
11 | :::caution requirements
12 | 
13 | You need `pg_dump` binary locally installed for running `replibyte backup --remote` and `replibyte restore --remote`
14 | 
15 | :::
16 | 
17 | To use PostgreSQL it's as simple as using prefixed connection URI with `postgres://`
18 | 
19 | ```yaml
20 | source:
21 |   connection_uri: postgres://<user>:<password>@<host>:<port>/<database> # you can use $DATABASE_URL
22 | #...
23 | destination:
24 |   connection_uri: postgres://<user>:<password>@<host>:<port>/<database> # you can use $DATABASE_URL
25 | ```
26 | 
27 | ## MySQL / MariaDB
28 | 
29 | :::caution requirements
30 | 
31 | You need `mysqldump` binary locally installed for running `replibyte backup --remote` and `replibyte restore --remote`
32 | 
33 | :::
34 | 
35 | To use MySQL or MariaDB it's as simple as using prefixed connection URI with `mysql://`
36 | 
37 | ```yaml
38 | source:
39 |   connection_uri: mysql://<user>:<password>@<host>:<port>/<database> # you can use $DATABASE_URL
40 | #...
41 | destination:
42 |   connection_uri: mysql://<user>:<password>@<host>:<port>/<database> # you can use $DATABASE_URL
43 | ```
44 | 
45 | ## MongoDB
46 | 
47 | :::caution requirements
48 | 
49 | You need `mongodump` binary locally installed for running `replibyte backup --remote` and `replibyte restore --remote`
50 | 
51 | :::
52 | 
53 | To use MongoDB it's as simple as using prefixed connection URI with `mongodb://`
54 | 
55 | ```yaml
56 | source:
57 |   connection_uri: mongodb://<user>:<password>@<host>:<port>/<database>?<options> # you can use $DATABASE_URL
58 | #...
59 | destination:
60 |   connection_uri: mongodb://<user>:<password>@<host>:<port>/<database>?<options> # you can use $DATABASE_URL
61 | ```
62 | 
63 | 
64 | ## Add another database
65 | 
66 | If you don't find your database, Replibyte is extensible and any database can be supported. You are free to contribute by opening an issue or/and a pull request.
67 | 
68 | To contribute, please see the [contributing](/docs/contributing) page.
69 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM rust:1.59-buster as build
 2 | 
 3 | # create a new empty shell project
 4 | RUN USER=root cargo new --bin replibyte
 5 | WORKDIR /replibyte
 6 | RUN USER=root cargo new --lib replibyte
 7 | RUN USER=root cargo new --lib dump-parser
 8 | RUN USER=root cargo new --lib subset
 9 | 
10 | # copy over your manifests
11 | # root
12 | COPY ./Cargo.lock ./Cargo.lock
13 | COPY ./Cargo.toml ./Cargo.toml
14 | 
15 | # dump-parser
16 | COPY ./dump-parser ./dump-parser
17 | 
18 | # subset
19 | COPY ./subset ./subset
20 | 
21 | # replibyte
22 | COPY ./replibyte/Cargo.toml ./replibyte/Cargo.toml
23 | COPY ./replibyte/Cargo.lock ./replibyte/Cargo.lock
24 | 
25 | # this build step will cache your dependencies
26 | RUN cargo build --release
27 | RUN rm src/*.rs
28 | 
29 | # copy your source tree
30 | COPY ./replibyte/src ./replibyte/src
31 | COPY ./dump-parser/src ./dump-parser/src
32 | COPY ./subset/src ./subset/src
33 | 
34 | # build for release
35 | RUN rm ./target/release/deps/replibyte*
36 | RUN cargo build --release
37 | 
38 | # our final base
39 | FROM debian:buster-slim
40 | 
41 | # used to configure Github Packages
42 | LABEL org.opencontainers.image.source https://github.com/qovery/replibyte
43 | 
44 | # Install Postgres and MySQL binaries
45 | RUN apt-get clean && apt-get update && apt-get install -y \
46 |     wget \
47 |     postgresql-client \
48 |     default-mysql-client
49 | 
50 | # Install MongoDB tools
51 | RUN wget https://fastdl.mongodb.org/tools/db/mongodb-database-tools-debian92-x86_64-100.5.2.deb && \
52 |     apt install ./mongodb-database-tools-*.deb && \
53 |     rm -f mongodb-database-tools-*.deb && \
54 |     rm -rf /var/lib/apt/lists/*
55 | 
56 | # copy the build artifact from the build stage
57 | COPY --from=build /replibyte/target/release/replibyte .
58 | 
59 | COPY ./docker/* /
60 | RUN chmod +x exec.sh && chmod +x replibyte
61 | 
62 | ARG S3_ACCESS_KEY_ID
63 | ENV S3_ACCESS_KEY_ID $S3_ACCESS_KEY_ID
64 | 
65 | ARG S3_SECRET_ACCESS_KEY
66 | ENV S3_SECRET_ACCESS_KEY $S3_SECRET_ACCESS_KEY
67 | 
68 | ARG S3_REGION
69 | ENV S3_REGION $S3_REGION
70 | 
71 | ARG S3_BUCKET
72 | ENV S3_BUCKET $S3_BUCKET
73 | 
74 | ARG SOURCE_CONNECTION_URI
75 | ENV SOURCE_CONNECTION_URI $SOURCE_CONNECTION_URI
76 | 
77 | ARG DESTINATION_CONNECTION_URI
78 | ENV DESTINATION_CONNECTION_URI $DESTINATION_CONNECTION_URI
79 | 
80 | ARG ENCRYPTION_SECRET
81 | ENV ENCRYPTION_SECRET $ENCRYPTION_SECRET
82 | 
83 | ENTRYPOINT ["sh", "exec.sh"]
84 | 


--------------------------------------------------------------------------------
/replibyte/src/types.rs:
--------------------------------------------------------------------------------
 1 | pub type Bytes = Vec<u8>;
 2 | pub type OriginalQuery = Query;
 3 | 
 4 | pub type Queries = Vec<Query>;
 5 | 
 6 | pub fn to_bytes(queries: Queries) -> Bytes {
 7 |     queries
 8 |         .into_iter()
 9 |         .flat_map(|query| {
10 |             let mut bytes = query.0;
11 |             bytes.push(b'\n');
12 |             bytes
13 |         })
14 |         .collect::<Vec<_>>()
15 | }
16 | 
17 | #[derive(Debug, Clone, Eq, PartialEq, Hash)]
18 | pub struct Query(pub Vec<u8>);
19 | 
20 | impl Query {
21 |     pub fn data(&self) -> &Vec<u8> {
22 |         &self.0
23 |     }
24 | }
25 | 
26 | #[derive(Clone)]
27 | pub struct InsertIntoQuery {
28 |     pub table_name: String,
29 |     pub columns: Vec<Column>,
30 | }
31 | 
32 | #[derive(Clone)]
33 | pub enum Column {
34 |     NumberValue(String, i128),
35 |     FloatNumberValue(String, f64),
36 |     StringValue(String, String),
37 |     CharValue(String, char),
38 |     BooleanValue(String, bool),
39 |     None(String),
40 | }
41 | 
42 | impl Column {
43 |     pub fn name(&self) -> &str {
44 |         match self {
45 |             Column::NumberValue(name, _) => name.as_str(),
46 |             Column::FloatNumberValue(name, _) => name.as_str(),
47 |             Column::StringValue(name, _) => name.as_str(),
48 |             Column::CharValue(name, _) => name.as_str(),
49 |             Column::BooleanValue(name, _) => name.as_str(),
50 |             Column::None(name) => name.as_str(),
51 |         }
52 |     }
53 | 
54 |     pub fn number_value(&self) -> Option<&i128> {
55 |         match self {
56 |             Column::NumberValue(_, value) => Some(value),
57 |             _ => None,
58 |         }
59 |     }
60 | 
61 |     pub fn string_value(&self) -> Option<&str> {
62 |         match self {
63 |             Column::StringValue(_, value) => Some(value.as_str()),
64 |             _ => None,
65 |         }
66 |     }
67 | 
68 |     pub fn float_number_value(&self) -> Option<&f64> {
69 |         match self {
70 |             Column::FloatNumberValue(_, value) => Some(value),
71 |             _ => None,
72 |         }
73 |     }
74 | 
75 |     pub fn char_value(&self) -> Option<&char> {
76 |         match self {
77 |             Column::CharValue(_, value) => Some(value),
78 |             _ => None,
79 |         }
80 |     }
81 | 
82 |     pub fn boolean_value(&self) -> Option<&bool> {
83 |         match self {
84 |             Column::BooleanValue(_, value) => Some(value),
85 |             _ => None,
86 |         }
87 |     }
88 | }
89 | 


--------------------------------------------------------------------------------
/website/docs/how-replibyte-works.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | sidebar_position: 2
 3 | ---
 4 | 
 5 | # How Replibyte works
 6 | 
 7 | RepliByte is built to seed a development database with production data. Replibyte helps you to:
 8 | 
 9 | 1. Create a development dump from production
10 | 2. [Subset](/docs/design/how-database-subset-works) your production dump into a more reasonable size.
11 | 3. Hide sensitive data via customizable [Transformers](/docs/transformers).
12 | 4. Make your development dump easily accessible from any remote and local databases.
13 | 
14 | ## How creating a Replibyte dump works
15 | 
16 | Here is an example of what happens while replicating a PostgreSQL database.
17 | 
18 | ```mermaid
19 | sequenceDiagram
20 |     participant RepliByte
21 |     participant PostgreSQL (Source)
22 |     participant AWS S3 (Datastore)
23 |     PostgreSQL (Source)->>RepliByte: 1. Dump data
24 |     loop
25 |         RepliByte->>RepliByte: 2. Subsetting (optional)
26 |         RepliByte->>RepliByte: 3. Transform sensitive data (optional)
27 |         RepliByte->>RepliByte: 4. Compress data (optional)
28 |         RepliByte->>RepliByte: 5. Encrypt data (optional)
29 |     end
30 |     RepliByte->>AWS S3 (Datastore): 6. Upload dump data
31 |     RepliByte->>AWS S3 (Datastore): 7. Write index file
32 | ```
33 | 
34 | 1. RepliByte connects to the PostgreSQL Source database and makes a full SQL dump of it.
35 | 2. RepliByte receives the SQL dump, parse it, and generates random/fake information in real-time.
36 | 3. RepliByte streams and uploads the modified SQL dumps in real-time on AWS S3.
37 | 4. RepliByte keeps track of the uploaded SQL dumps by writing it into an index file.
38 | 
39 | 
40 | ## How loading a Replibyte dump works
41 | 
42 | Once at least a replica from the source PostgreSQL database is available in the S3 bucket, RepliByte can use and inject it into the
43 | destination PostgreSQL database.
44 | 
45 | ```mermaid
46 | sequenceDiagram
47 |     participant RepliByte
48 |     participant PostgreSQL (Destination)
49 |     participant AWS S3 (Datastore)
50 |     AWS S3 (Datastore)->>RepliByte: 1. Read index file
51 |     AWS S3 (Datastore)->>RepliByte: 2. Download dump SQL file
52 |     loop
53 |         RepliByte->>RepliByte: 3. Decrypt data (if required)
54 |         RepliByte->>RepliByte: 4. Uncompress data (if required)
55 |     end
56 |     RepliByte->>PostgreSQL (Destination): 5. Restore dump SQL
57 | ```
58 | 
59 | 1. RepliByte connects to the S3 bucket and reads the index file to retrieve the latest SQL to download.
60 | 2. RepliByte downloads the SQL dump in a stream bytes.
61 | 3. RepliByte restores the SQL dump in the destination PostgreSQL database in real-time.
62 | 
63 | 
64 | 


--------------------------------------------------------------------------------
/replibyte/src/transformer/random.rs:
--------------------------------------------------------------------------------
 1 | use crate::transformer::Transformer;
 2 | use crate::types::Column;
 3 | use rand::distributions::Alphanumeric;
 4 | use rand::Rng;
 5 | 
 6 | /// This struct is dedicated to generating random elements.
 7 | pub struct RandomTransformer {
 8 |     database_name: String,
 9 |     table_name: String,
10 |     column_name: String,
11 | }
12 | 
13 | impl RandomTransformer {
14 |     pub fn new<S>(database_name: S, table_name: S, column_name: S) -> Self
15 |     where
16 |         S: Into<String>,
17 |     {
18 |         RandomTransformer {
19 |             table_name: table_name.into(),
20 |             column_name: column_name.into(),
21 |             database_name: database_name.into(),
22 |         }
23 |     }
24 | }
25 | 
26 | impl Default for RandomTransformer {
27 |     fn default() -> Self {
28 |         RandomTransformer {
29 |             database_name: String::default(),
30 |             table_name: String::default(),
31 |             column_name: String::default(),
32 |         }
33 |     }
34 | }
35 | 
36 | impl Transformer for RandomTransformer {
37 |     fn id(&self) -> &str {
38 |         "random"
39 |     }
40 | 
41 |     fn description(&self) -> &str {
42 |         "Randomize value but keep the same length (string only). [AAA]->[BBB]"
43 |     }
44 | 
45 |     fn database_name(&self) -> &str {
46 |         self.database_name.as_str()
47 |     }
48 | 
49 |     fn table_name(&self) -> &str {
50 |         self.table_name.as_str()
51 |     }
52 | 
53 |     fn column_name(&self) -> &str {
54 |         self.column_name.as_str()
55 |     }
56 | 
57 |     fn transform(&self, column: Column) -> Column {
58 |         let mut random = rand::thread_rng();
59 | 
60 |         match column {
61 |             Column::NumberValue(column_name, _) => {
62 |                 Column::NumberValue(column_name, random.gen::<i128>())
63 |             }
64 |             Column::FloatNumberValue(column_name, _) => {
65 |                 Column::FloatNumberValue(column_name, random.gen::<f64>())
66 |             }
67 |             Column::StringValue(column_name, value) => {
68 |                 let new_value = random
69 |                     .sample_iter(&Alphanumeric)
70 |                     .take(value.len())
71 |                     .map(char::from)
72 |                     .collect::<String>();
73 | 
74 |                 Column::StringValue(column_name, new_value)
75 |             }
76 |             Column::CharValue(column_name, _) => {
77 |                 Column::CharValue(column_name, random.gen::<char>())
78 |             }
79 |             Column::BooleanValue(column_name, value) => Column::BooleanValue(column_name, value),
80 |             Column::None(column_name) => Column::None(column_name),
81 |         }
82 |     }
83 | }
84 | 


--------------------------------------------------------------------------------
/replibyte/src/transformer/phone_number.rs:
--------------------------------------------------------------------------------
 1 | use crate::transformer::Transformer;
 2 | use crate::types::Column;
 3 | use fake::faker::phone_number::raw::PhoneNumber;
 4 | use fake::locales::EN;
 5 | use fake::Fake;
 6 | 
 7 | /// This struct is dedicated to replacing a string by an email address.
 8 | pub struct PhoneNumberTransformer {
 9 |     database_name: String,
10 |     table_name: String,
11 |     column_name: String,
12 | }
13 | 
14 | impl PhoneNumberTransformer {
15 |     pub fn new<S>(database_name: S, table_name: S, column_name: S) -> Self
16 |     where
17 |         S: Into<String>,
18 |     {
19 |         PhoneNumberTransformer {
20 |             database_name: database_name.into(),
21 |             table_name: table_name.into(),
22 |             column_name: column_name.into(),
23 |         }
24 |     }
25 | }
26 | 
27 | impl Default for PhoneNumberTransformer {
28 |     fn default() -> Self {
29 |         PhoneNumberTransformer {
30 |             database_name: String::default(),
31 |             table_name: String::default(),
32 |             column_name: String::default(),
33 |         }
34 |     }
35 | }
36 | 
37 | impl Transformer for PhoneNumberTransformer {
38 |     fn id(&self) -> &str {
39 |         "phone-number"
40 |     }
41 | 
42 |     fn description(&self) -> &str {
43 |         "Generate a phone number (string only)."
44 |     }
45 | 
46 |     fn database_name(&self) -> &str {
47 |         self.database_name.as_str()
48 |     }
49 | 
50 |     fn table_name(&self) -> &str {
51 |         self.table_name.as_str()
52 |     }
53 | 
54 |     fn column_name(&self) -> &str {
55 |         self.column_name.as_str()
56 |     }
57 | 
58 |     fn transform(&self, column: Column) -> Column {
59 |         match column {
60 |             Column::StringValue(column_name, _) => {
61 |                 Column::StringValue(column_name, PhoneNumber(EN).fake())
62 |             }
63 |             column => column,
64 |         }
65 |     }
66 | }
67 | 
68 | #[cfg(test)]
69 | mod tests {
70 |     use crate::{transformer::Transformer, types::Column};
71 | 
72 |     use super::PhoneNumberTransformer;
73 | 
74 |     #[test]
75 |     fn transform_string_with_a_phone_number() {
76 |         let transformer = get_transformer();
77 |         let column = Column::StringValue("phone_number".to_string(), "+123456789".to_string());
78 |         let transformed_column = transformer.transform(column);
79 |         let transformed_value = transformed_column.string_value().unwrap();
80 | 
81 |         assert!(!transformed_value.is_empty());
82 |         assert_ne!(transformed_value, "+123456789".to_string());
83 |     }
84 | 
85 |     fn get_transformer() -> PhoneNumberTransformer {
86 |         PhoneNumberTransformer::new("github", "users", "phone_number")
87 |     }
88 | }
89 | 


--------------------------------------------------------------------------------
/replibyte/src/transformer/credit_card.rs:
--------------------------------------------------------------------------------
 1 | use crate::transformer::Transformer;
 2 | use crate::types::Column;
 3 | use fake::faker::creditcard::raw::CreditCardNumber;
 4 | use fake::locales::EN;
 5 | use fake::Fake;
 6 | 
 7 | /// This struct is dedicated to replacing a credit card string.
 8 | pub struct CreditCardTransformer {
 9 |     database_name: String,
10 |     table_name: String,
11 |     column_name: String,
12 | }
13 | 
14 | impl CreditCardTransformer {
15 |     pub fn new<S>(database_name: S, table_name: S, column_name: S) -> Self
16 |     where
17 |         S: Into<String>,
18 |     {
19 |         CreditCardTransformer {
20 |             database_name: database_name.into(),
21 |             table_name: table_name.into(),
22 |             column_name: column_name.into(),
23 |         }
24 |     }
25 | }
26 | 
27 | impl Default for CreditCardTransformer {
28 |     fn default() -> Self {
29 |         CreditCardTransformer {
30 |             database_name: String::default(),
31 |             table_name: String::default(),
32 |             column_name: String::default(),
33 |         }
34 |     }
35 | }
36 | 
37 | impl Transformer for CreditCardTransformer {
38 |     fn id(&self) -> &str {
39 |         "credit-card"
40 |     }
41 | 
42 |     fn description(&self) -> &str {
43 |         "Generate a credit card number (string only)."
44 |     }
45 | 
46 |     fn database_name(&self) -> &str {
47 |         self.database_name.as_str()
48 |     }
49 | 
50 |     fn table_name(&self) -> &str {
51 |         self.table_name.as_str()
52 |     }
53 | 
54 |     fn column_name(&self) -> &str {
55 |         self.column_name.as_str()
56 |     }
57 | 
58 |     fn transform(&self, column: Column) -> Column {
59 |         match column {
60 |             Column::StringValue(column_name, _value) => {
61 |                 Column::StringValue(column_name, CreditCardNumber(EN).fake())
62 |             }
63 |             column => column,
64 |         }
65 |     }
66 | }
67 | 
68 | #[cfg(test)]
69 | mod tests {
70 |     use crate::{transformer::Transformer, types::Column};
71 | 
72 |     use super::CreditCardTransformer;
73 | 
74 |     #[test]
75 |     fn transform_string_with_a_credit_card() {
76 |         let transformer = get_transformer();
77 |         let column = Column::StringValue("credit_card".to_string(), "4242424242424242".to_string());
78 |         let transformed_column = transformer.transform(column);
79 |         let transformed_value = transformed_column.string_value().unwrap();
80 | 
81 |         assert!(!transformed_value.is_empty());
82 |         assert_ne!(transformed_value, "4242424242424242".to_string());
83 |     }
84 | 
85 |     fn get_transformer() -> CreditCardTransformer {
86 |         CreditCardTransformer::new("github", "users", "credit_card")
87 |     }
88 | }
89 | 


--------------------------------------------------------------------------------
/replibyte/src/migration/update_version_number.rs:
--------------------------------------------------------------------------------
 1 | use std::{
 2 |     io::{Error, ErrorKind},
 3 |     str::FromStr,
 4 | };
 5 | 
 6 | use log::info;
 7 | use serde_json::{json, Value};
 8 | 
 9 | use crate::datastore::Datastore;
10 | 
11 | use super::{Migration, Version};
12 | 
13 | pub struct UpdateVersionNumber<'a> {
14 |     version: &'a str,
15 | }
16 | 
17 | impl<'a> UpdateVersionNumber<'a> {
18 |     pub fn new(version: &'a str) -> Self {
19 |         Self { version }
20 |     }
21 | }
22 | 
23 | impl<'a> Migration for UpdateVersionNumber<'a> {
24 |     fn minimal_version(&self) -> Version {
25 |         Version::from_str("0.7.3").unwrap()
26 |     }
27 | 
28 |     fn run(&self, datastore: &Box<dyn Datastore>) -> Result<(), Error> {
29 |         info!("migrate: update version number");
30 | 
31 |         let mut raw_index_file = datastore.raw_index_file()?;
32 |         let _ = update_version(&mut raw_index_file, self.version)?;
33 |         datastore.write_raw_index_file(&raw_index_file)
34 |     }
35 | }
36 | 
37 | fn update_version(metadata_json: &mut Value, version: &str) -> Result<(), Error> {
38 |     match metadata_json.as_object_mut() {
39 |         Some(metadata) => {
40 |             metadata.insert("v".to_string(), json!(version));
41 |             Ok(())
42 |         }
43 |         None => Err(Error::new(
44 |             ErrorKind::Other,
45 |             "migrate: metadata.json is not an object",
46 |         )),
47 |     }
48 | }
49 | 
50 | #[cfg(test)]
51 | mod tests {
52 |     use serde_json::json;
53 | 
54 |     use crate::migration::update_version_number::update_version;
55 | 
56 |     #[test]
57 |     fn test_update_version() {
58 |         let mut metadata_json = json!({"backups": []});
59 | 
60 |         assert!(update_version(&mut metadata_json, "0.1.0").is_ok());
61 |         assert!(metadata_json.get("v").is_some());
62 |         assert_eq!(metadata_json.get("v").unwrap(), "0.1.0");
63 | 
64 |         let mut metadata_json = json!({
65 |             "backups": [
66 |                 {
67 |                     "directory_name":"dump-1653170039392",
68 |                     "size":62279,
69 |                     "created_at":1234,
70 |                     "compressed":true,
71 |                     "encrypted":false
72 |                 }
73 |             ]
74 |         });
75 |         assert!(update_version(&mut metadata_json, "0.2.0").is_ok());
76 |         assert!(metadata_json.get("v").is_some());
77 |         assert_eq!(metadata_json.get("v").unwrap(), "0.2.0");
78 | 
79 |         let mut metadata_json = json!({"v": "0.7.3", "backups": []});
80 |         assert!(update_version(&mut metadata_json, "0.7.4").is_ok());
81 |         assert!(metadata_json.get("v").is_some());
82 |         assert_eq!(metadata_json.get("v").unwrap(), "0.7.4");
83 |     }
84 | }
85 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | ### JetBrains template
  2 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
  3 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
  4 | 
  5 | # User-specific stuff
  6 | .idea/**/workspace.xml
  7 | .idea/**/tasks.xml
  8 | .idea/**/usage.statistics.xml
  9 | .idea/**/dictionaries
 10 | .idea/**/shelf
 11 | 
 12 | # Generated files
 13 | .idea/**/contentModel.xml
 14 | 
 15 | # Sensitive or high-churn files
 16 | .idea/**/dataSources/
 17 | .idea/**/dataSources.ids
 18 | .idea/**/dataSources.local.xml
 19 | .idea/**/sqlDataSources.xml
 20 | .idea/**/dynamic.xml
 21 | .idea/**/uiDesigner.xml
 22 | .idea/**/dbnavigator.xml
 23 | 
 24 | # Gradle
 25 | .idea/**/gradle.xml
 26 | .idea/**/libraries
 27 | 
 28 | # Gradle and Maven with auto-import
 29 | # When using Gradle or Maven with auto-import, you should exclude module files,
 30 | # since they will be recreated, and may cause churn.  Uncomment if using
 31 | # auto-import.
 32 | # .idea/artifacts
 33 | # .idea/compiler.xml
 34 | # .idea/jarRepositories.xml
 35 | # .idea/modules.xml
 36 | # .idea/*.iml
 37 | # .idea/modules
 38 | # *.iml
 39 | # *.ipr
 40 | 
 41 | # CMake
 42 | cmake-build-*/
 43 | 
 44 | # Mongo Explorer plugin
 45 | .idea/**/mongoSettings.xml
 46 | 
 47 | # File-based project format
 48 | *.iws
 49 | 
 50 | # IntelliJ
 51 | out/
 52 | 
 53 | # mpeltonen/sbt-idea plugin
 54 | .idea_modules/
 55 | 
 56 | # JIRA plugin
 57 | atlassian-ide-plugin.xml
 58 | 
 59 | # Cursive Clojure plugin
 60 | .idea/replstate.xml
 61 | 
 62 | # Crashlytics plugin (for Android Studio and IntelliJ)
 63 | com_crashlytics_export_strings.xml
 64 | crashlytics.properties
 65 | crashlytics-build.properties
 66 | fabric.properties
 67 | 
 68 | # Editor-based Rest Client
 69 | .idea/httpRequests
 70 | 
 71 | # Android studio 3.1+ serialized cache file
 72 | .idea/caches/build_file_checksums.ser
 73 | 
 74 | ### Go template
 75 | # Binaries for programs and plugins
 76 | *.exe
 77 | *.exe~
 78 | *.dll
 79 | *.so
 80 | *.dylib
 81 | 
 82 | # Test binary, built with `go test -c`
 83 | *.test
 84 | 
 85 | # Output of the go coverage tool, specifically when used with LiteIDE
 86 | *.out
 87 | 
 88 | # Dependency directories (remove the comment below to include it)
 89 | # vendor/
 90 | 
 91 | /target/
 92 | 
 93 | # Configuration files
 94 | prod-conf.yaml
 95 | .prod-conf.yaml
 96 | 
 97 | 
 98 | # Dependencies
 99 | /node_modules
100 | 
101 | # Production
102 | /build
103 | 
104 | # Generated files
105 | .docusaurus
106 | .cache-loader
107 | 
108 | # Misc
109 | .DS_Store
110 | .env.local
111 | .env.development.local
112 | .env.test.local
113 | .env.production.local
114 | 
115 | npm-debug.log*
116 | yarn-debug.log*
117 | yarn-error.log*
118 | 
119 | /my-datastore
120 | 
121 | *.release


--------------------------------------------------------------------------------
/website/docs/getting-started/installation.mdx:
--------------------------------------------------------------------------------
  1 | ---
  2 | sidebar_position: 2
  3 | ---
  4 | 
  5 | # Installation
  6 | 
  7 | <details>
  8 | 
  9 |   <summary>Install on MacOSX</summary>
 10 | 
 11 |   ```shell
 12 |   brew tap Qovery/replibyte
 13 |   brew install replibyte
 14 |   ```
 15 | 
 16 |   Or [manually](https://github.com/Qovery/replibyte/releases).
 17 | 
 18 | </details>
 19 | 
 20 | <details>
 21 | 
 22 |   <summary>Install on Linux</summary>
 23 | 
 24 |   ```shell
 25 |   # download latest replibyte archive for Linux
 26 |   curl -s https://api.github.com/repos/Qovery/replibyte/releases/latest | \
 27 |   jq -r '.assets[].browser_download_url' | \
 28 |   grep -i 'linux-musl.tar.gz$' | wget -qi - && \
 29 | 
 30 |   # unarchive
 31 |   tar zxf *.tar.gz
 32 | 
 33 |   # make replibyte executable
 34 |   chmod +x replibyte
 35 | 
 36 |   # make it accessible from everywhere
 37 |   mv replibyte /usr/local/bin/
 38 |   ```
 39 | 
 40 | </details>
 41 | 
 42 | <details>
 43 | 
 44 |   <summary>Install on Windows</summary>
 45 | 
 46 |   Download [the latest Windows release](https://github.com/Qovery/replibyte/releases) and install it.
 47 | 
 48 | </details>
 49 | 
 50 | <details>
 51 | 
 52 |   <summary>Install from source</summary>
 53 | 
 54 |   ```shell
 55 |   git clone https://github.com/Qovery/replibyte.git && cd replibyte
 56 | 
 57 |   # Install cargo
 58 |   # visit: https://doc.rust-lang.org/cargo/getting-started/installation.html
 59 | 
 60 |   # Build with cargo
 61 |   cargo build --release
 62 | 
 63 |   # Run RepliByte
 64 |   ./target/release/replibyte -h
 65 |   ```
 66 | 
 67 | </details>
 68 | 
 69 | <details>
 70 | 
 71 |   <summary>Run replibyte with Docker</summary>
 72 | 
 73 |   This example assume you have a configuration file named replibyte.yaml in the directory you're running the docker command.
 74 | 
 75 |   ```shell
 76 |   docker run -it --rm --name replibyte \
 77 |     -v "$(pwd)/replibyte.yaml:/replibyte.yaml:ro" \
 78 |     ghcr.io/qovery/replibyte --config replibyte.yaml
 79 |   ```
 80 | 
 81 |   If you're using the `local_disk` datastore, you must mount a volume by adding `-v "$(pwd)/my-datastore:/datastore"`.
 82 |   This assumes that the "datastore" part of your config file is as follows:
 83 | 
 84 |   ```yaml
 85 |   datastore:
 86 |     local_disk:
 87 |       dir: ./my-datastore
 88 |   ```
 89 | 
 90 | </details>
 91 | 
 92 | ## Telemetry
 93 | 
 94 | RepliByte collects anonymized data from users in order to improve our product. Feel free to inspect the
 95 | code [here](https://github.com/Qovery/replibyte/blob/main/replibyte/src/telemetry.rs). This can be deactivated at any time, and any data that has already been collected can be deleted on
 96 | request (hello+replibyte {at} qovery {dot} com).
 97 | 
 98 | ### Collected data
 99 | 
100 | - Command line parameters
101 | - Options used (subset, transformer, compression) in the configuration file.
102 | 


--------------------------------------------------------------------------------
/release.sh:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env bash
  2 | 
  3 | set -e
  4 | 
  5 | TOML_FILES="\
  6 | replibyte/Cargo.toml \
  7 | subset/Cargo.toml \
  8 | dump-parser/Cargo.toml
  9 | "
 10 | 
 11 | old=$1
 12 | new=$2
 13 | 
 14 | if [ -z "${old}" ] || [ -z "${new}" ]
 15 | then
 16 |     echo "please run: $0 <old version> <new version>"
 17 |     exit 1
 18 | fi
 19 | 
 20 | if [ "$(git status --porcelain=v1 2>/dev/null | wc -l)" -ne 0 ]
 21 | then
 22 |     git status
 23 |     echo "There are unsaved changes in the repository, press CTRL-C to abort now or return to continue."
 24 |     read -r answer
 25 | fi
 26 | 
 27 | echo -n "Release process starting from '${old}' -> '${new}', do you want to continue? [y/N] "
 28 | read -r answer
 29 | 
 30 | 
 31 | case "${answer}" in
 32 |     Y*|y*)
 33 |         ;;
 34 |     *)
 35 |         echo "Aborting"
 36 |         exit 0
 37 |         ;;
 38 | esac;
 39 | 
 40 | echo "==> ${answer}"
 41 | 
 42 | echo -n "Updating TOML files:"
 43 | for toml in ${TOML_FILES}
 44 | do
 45 |     echo -n " ${toml}"
 46 |     sed -e "s/^version = \"${old}\"$/version = \"${new}\"/" -i.release "${toml}"
 47 | done
 48 | echo "."
 49 | 
 50 | echo "Please review the following changes. (return to continue)"
 51 | read -r answer
 52 | 
 53 | git diff
 54 | 
 55 | echo "Do you want to Continue or Rollback? [c/R]"
 56 | read -r answer
 57 | 
 58 | case "${answer}" in
 59 |     C*|c*)
 60 |         git checkout -b "release-v${new}"
 61 |         git commit -sa -m "Release v${new}"
 62 |         git push --set-upstream origin "release-v${new}"
 63 |         ;;
 64 |     *)
 65 |         git checkout .
 66 |         exit
 67 |         ;;
 68 | esac;
 69 | 
 70 | echo "Please open the following pull request we'll wait here continue when it is merged."
 71 | echo
 72 | echo "  >> https://github.com/qovery/replibyte/pull/new/release-v${new} <<"
 73 | echo
 74 | echo "Once you continue we'll generate and push the release tag with the latest 'main'"
 75 | echo
 76 | echo "WARNING: Review and wait until the pull request is merged before continuing to create the release"
 77 | read -r answer
 78 | 
 79 | echo "Generating release tag v${new}"
 80 | 
 81 | git checkout main
 82 | git pull
 83 | 
 84 | # The version is correctly updated in the replibyte crate cargo.toml (aka the PR is merged)
 85 | if grep -q  "version = \"${new}\"" ${TOML_FILES[0]}; then
 86 |     git tag -a -m"Release v${new}" "v${new}"
 87 |     git push --tags
 88 | 
 89 |     echo "Congrats release v${new} is done!"
 90 | else
 91 |     echo
 92 |     echo "It seems the version is not updated, are you sure you have merged the pull request as stated before?"
 93 |     echo "If that's not the case, you're invited to run again the release script and wait for the PR is merged before continuing."
 94 |     echo
 95 |     echo "Rollback changes"
 96 | 
 97 |     git branch -d "release-v${new}"
 98 |     git push origin --delete "release-v${new}"
 99 | fi
100 | 


--------------------------------------------------------------------------------
/replibyte/src/utils.rs:
--------------------------------------------------------------------------------
 1 | use prettytable::{format, Table};
 2 | use std::io::{Error, ErrorKind, Read};
 3 | use std::process::Child;
 4 | use std::time::{SystemTime, UNIX_EPOCH};
 5 | use which::which;
 6 | 
 7 | pub fn epoch_millis() -> u128 {
 8 |     SystemTime::now()
 9 |         .duration_since(UNIX_EPOCH)
10 |         .unwrap()
11 |         .as_millis()
12 | }
13 | 
14 | pub fn table() -> Table {
15 |     // Create the table
16 |     let mut table = Table::new();
17 | 
18 |     table.set_format(*format::consts::FORMAT_NO_BORDER_LINE_SEPARATOR);
19 | 
20 |     table
21 | }
22 | 
23 | /// converts Bytes into Bytes, KB, MB, GB or TB
24 | pub fn to_human_readable_unit(bytes: usize) -> String {
25 |     match bytes {
26 |         0..=1023 => format!("{} Bytes", bytes),
27 |         1024..=1023_999 => format!("{:.2} kB", bytes / 1000),
28 |         1024_000..=1023_999_999 => format!("{:.2} MB", bytes / 1_000_000),
29 |         1024_000_000..=1023_999_999_999 => format!("{:.2} MB", bytes / 1_000_000_000),
30 |         1024_000_000_000..=1023_999_999_999_999 => format!("{:.2} GB", bytes / 1_000_000_000_000),
31 |         _ => format!("{:.2} TB", bytes / 1_000_000_000_000_000),
32 |     }
33 | }
34 | 
35 | /// check for binary presence in PATH
36 | pub fn binary_exists(binary_name: &str) -> Result<(), Error> {
37 |     let _ = which(binary_name).map_err(|_| {
38 |         Error::new(
39 |             ErrorKind::Other,
40 |             format!("cannot find '{}' binary in path", binary_name),
41 |         )
42 |     })?;
43 | 
44 |     Ok(())
45 | }
46 | 
47 | // wait for the end of a process and handle errors
48 | pub fn wait_for_command(process: &mut Child) -> Result<(), Error> {
49 |     match process.wait() {
50 |         Ok(exit_status) => {
51 |             if !exit_status.success() {
52 |                 if let Some(stderr) = process.stderr.take().as_mut() {
53 |                     let mut buffer = String::new();
54 |                     let error = match stderr.read_to_string(&mut buffer) {
55 |                         Ok(_) => Error::new(ErrorKind::Other, format!("{}", buffer)),
56 |                         Err(err) => Error::new(ErrorKind::Other, format!("{}", err)),
57 |                     };
58 | 
59 |                     return Err(Error::new(
60 |                         ErrorKind::Other,
61 |                         format!("command error: {}", error),
62 |                     ));
63 |                 }
64 | 
65 |                 return Err(Error::new(
66 |                     ErrorKind::Other,
67 |                     format!("command error: {}", exit_status.to_string()),
68 |                 ));
69 |             }
70 | 
71 |             Ok(())
72 |         }
73 |         Err(err) => Err(Error::new(
74 |             ErrorKind::Other,
75 |             format!("command error: {}", err),
76 |         )),
77 |     }
78 | }
79 | 
80 | pub fn get_replibyte_version() -> &'static str {
81 |     env!("CARGO_PKG_VERSION")
82 | }
83 | 


--------------------------------------------------------------------------------
/replibyte/src/tasks/full_restore.rs:
--------------------------------------------------------------------------------
 1 | use std::io::Error;
 2 | use std::sync::mpsc;
 3 | use std::thread;
 4 | 
 5 | use crate::datastore::{Datastore, ReadOptions};
 6 | use crate::destination::Destination;
 7 | use crate::tasks::{MaxBytes, Message, Task, TransferredBytes};
 8 | use crate::types::Bytes;
 9 | 
10 | /// FullRestoreTask is a wrapping struct to execute the synchronization between a *Datastore* and a *Source*.
11 | pub struct FullRestoreTask<'a, D>
12 | where
13 |     D: Destination,
14 | {
15 |     destination: &'a mut D,
16 |     datastore: Box<dyn Datastore>,
17 |     read_options: ReadOptions,
18 | }
19 | 
20 | impl<'a, D> FullRestoreTask<'a, D>
21 | where
22 |     D: Destination,
23 | {
24 |     pub fn new(
25 |         destination: &'a mut D,
26 |         datastore: Box<dyn Datastore>,
27 |         read_options: ReadOptions,
28 |     ) -> Self {
29 |         FullRestoreTask {
30 |             destination,
31 |             datastore,
32 |             read_options,
33 |         }
34 |     }
35 | }
36 | 
37 | impl<'a, D> Task for FullRestoreTask<'a, D>
38 | where
39 |     D: Destination,
40 | {
41 |     fn run<F: FnMut(TransferredBytes, MaxBytes)>(
42 |         mut self,
43 |         mut progress_callback: F,
44 |     ) -> Result<(), Error> {
45 |         // initialize the destination
46 |         let _ = self.destination.init()?;
47 | 
48 |         // bound to 1 to avoid eating too much memory if we download the dump faster than we ingest it
49 |         let (tx, rx) = mpsc::sync_channel::<Message<Bytes>>(1);
50 |         let datastore = self.datastore;
51 | 
52 |         let mut index_file = datastore.index_file()?;
53 |         let dump = index_file.find_dump(&self.read_options)?;
54 | 
55 |         // init progress
56 |         progress_callback(0, dump.size);
57 | 
58 |         let read_options = self.read_options.clone();
59 | 
60 |         let join_handle = thread::spawn(move || {
61 |             // managing Datastore (S3) download here
62 |             let datastore = datastore;
63 |             let read_options = read_options;
64 | 
65 |             let _ = match datastore.read(&read_options, &mut |data| {
66 |                 let _ = tx.send(Message::Data(data));
67 |             }) {
68 |                 Ok(_) => {}
69 |                 Err(err) => panic!("{:?}", err),
70 |             };
71 | 
72 |             let _ = tx.send(Message::EOF);
73 |         });
74 | 
75 |         loop {
76 |             let data = match rx.recv() {
77 |                 Ok(Message::Data(data)) => data,
78 |                 Ok(Message::EOF) => break,
79 |                 Err(err) => panic!("{:?}", err), // FIXME what should I do here?
80 |             };
81 | 
82 |             progress_callback(data.len(), dump.size);
83 | 
84 |             let _ = self.destination.write(data)?;
85 |         }
86 | 
87 |         // wait for end of download execution
88 |         let _ = join_handle.join(); // FIXME catch result here
89 | 
90 |         progress_callback(dump.size, dump.size);
91 | 
92 |         Ok(())
93 |     }
94 | }
95 | 


--------------------------------------------------------------------------------
/replibyte/src/transformer/mod.rs:
--------------------------------------------------------------------------------
 1 | use crate::transformer::credit_card::CreditCardTransformer;
 2 | use crate::transformer::custom_wasm::CustomWasmTransformer;
 3 | use crate::transformer::email::EmailTransformer;
 4 | use crate::transformer::first_name::FirstNameTransformer;
 5 | use crate::transformer::keep_first_char::KeepFirstCharTransformer;
 6 | use crate::transformer::phone_number::PhoneNumberTransformer;
 7 | use crate::transformer::random::RandomTransformer;
 8 | use crate::transformer::redacted::RedactedTransformer;
 9 | use crate::transformer::transient::TransientTransformer;
10 | use crate::types::Column;
11 | 
12 | pub mod credit_card;
13 | pub mod email;
14 | pub mod first_name;
15 | pub mod keep_first_char;
16 | pub mod phone_number;
17 | pub mod random;
18 | pub mod redacted;
19 | pub mod transient;
20 | 
21 | // FIXME: CI release build is broken because of feature flag
22 | //#[cfg(feature = "wasm")]
23 | pub mod custom_wasm;
24 | 
25 | pub fn transformers() -> Vec<Box<dyn Transformer>> {
26 |     vec![
27 |         Box::new(EmailTransformer::default()),
28 |         Box::new(FirstNameTransformer::default()),
29 |         Box::new(PhoneNumberTransformer::default()),
30 |         Box::new(RandomTransformer::default()),
31 |         Box::new(KeepFirstCharTransformer::default()),
32 |         Box::new(TransientTransformer::default()),
33 |         Box::new(CreditCardTransformer::default()),
34 |         Box::new(RedactedTransformer::default()),
35 |         Box::new(CustomWasmTransformer::default()),
36 |     ]
37 | }
38 | 
39 | /// Trait to implement to create a custom Transformer.
40 | pub trait Transformer {
41 |     fn id(&self) -> &str;
42 |     fn description(&self) -> &str;
43 |     fn database_name(&self) -> &str;
44 |     fn table_name(&self) -> &str;
45 |     fn column_name(&self) -> &str;
46 |     fn quoted_table_name(&self) -> String {
47 |         let table_name = self.table_name();
48 | 
49 |         if table_name.to_lowercase() != table_name {
50 |             return format!("\"{}\"", table_name);
51 |         }
52 | 
53 |         String::from(table_name)
54 |     }
55 | 
56 |     fn database_and_table_name(&self) -> String {
57 |         format!("{}.{}", self.database_name(), self.table_name())
58 |     }
59 | 
60 |     fn database_and_table_and_column_name(&self) -> String {
61 |         format!(
62 |             "{}.{}.{}",
63 |             self.database_name(),
64 |             self.table_name(),
65 |             self.column_name()
66 |         )
67 |     }
68 | 
69 |     fn database_and_quoted_table_and_column_name(&self) -> String {
70 |         format!(
71 |             "{}.{}.{}",
72 |             self.database_name(),
73 |             self.quoted_table_name(),
74 |             self.column_name()
75 |         )
76 |     }
77 | 
78 |     fn table_and_column_name(&self) -> String {
79 |         format!(
80 |             "{}.{}",
81 |             self.table_name(),
82 |             self.column_name()
83 |         )
84 |     }
85 | 
86 |     fn transform(&self, column: Column) -> Column;
87 | }
88 | 


--------------------------------------------------------------------------------
/replibyte/src/migration/rename_backups_to_dumps.rs:
--------------------------------------------------------------------------------
 1 | use std::{
 2 |     io::{Error, ErrorKind},
 3 |     str::FromStr,
 4 | };
 5 | 
 6 | use log::info;
 7 | use serde_json::{json, Value};
 8 | 
 9 | use crate::datastore::Datastore;
10 | 
11 | use super::{Migration, Version};
12 | 
13 | pub struct RenameBackupsToDump {}
14 | 
15 | impl RenameBackupsToDump {
16 |     pub fn default() -> Self {
17 |         Self {}
18 |     }
19 | }
20 | 
21 | impl Migration for RenameBackupsToDump {
22 |     fn minimal_version(&self) -> Version {
23 |         Version::from_str("0.7.3").unwrap()
24 |     }
25 | 
26 |     fn run(&self, datastore: &Box<dyn Datastore>) -> Result<(), Error> {
27 |         info!("migrate: rename backups to dumps");
28 | 
29 |         let mut raw_index_file = datastore.raw_index_file()?;
30 |         let _ = rename_backups_to_dumps(&mut raw_index_file)?;
31 |         datastore.write_raw_index_file(&raw_index_file)
32 |     }
33 | }
34 | 
35 | fn rename_backups_to_dumps(metadata_json: &mut Value) -> Result<(), Error> {
36 |     match metadata_json.as_object_mut() {
37 |         Some(metadata) => {
38 |             // we rename the `backups` key to `dumps`
39 |             if metadata.contains_key("backups") {
40 |                 let backups = metadata.get("backups").unwrap_or(&json!([])).clone();
41 |                 metadata.insert("dumps".to_string(), backups);
42 |                 metadata.remove("backups");
43 |             }
44 |             Ok(())
45 |         }
46 |         None => Err(Error::new(
47 |             ErrorKind::Other,
48 |             "migrate: metadata.json is not an object",
49 |         )),
50 |     }
51 | }
52 | 
53 | #[cfg(test)]
54 | mod tests {
55 |     use serde_json::json;
56 | 
57 |     use crate::migration::rename_backups_to_dumps::rename_backups_to_dumps;
58 | 
59 |     #[test]
60 |     fn test_rename_backup_to_dumps() {
61 |         let mut metadata_json = json!({"backups": []});
62 |         assert!(rename_backups_to_dumps(&mut metadata_json).is_ok());
63 |         assert!(metadata_json.get("backups").is_none());
64 |         assert!(metadata_json.get("dumps").is_some());
65 |         assert!(metadata_json.get("dumps").unwrap().is_array());
66 | 
67 |         let mut metadata_json = json!({
68 |             "backups": [
69 |                 {
70 |                     "directory_name":"dump-1653170039392",
71 |                     "size":62279,
72 |                     "created_at":1234,
73 |                     "compressed":true,
74 |                     "encrypted":false
75 |                 }
76 |             ]
77 |         });
78 |         assert!(rename_backups_to_dumps(&mut metadata_json).is_ok());
79 |         assert!(metadata_json.get("backups").is_none());
80 |         assert!(metadata_json.get("dumps").is_some());
81 |         assert!(metadata_json.get("dumps").unwrap().is_array());
82 |         assert!(metadata_json
83 |             .get("dumps")
84 |             .unwrap()
85 |             .as_array()
86 |             .unwrap()
87 |             .contains(&json!({
88 |                 "directory_name":"dump-1653170039392",
89 |                 "size":62279,
90 |                 "created_at":1234,
91 |                 "compressed":true,
92 |                 "encrypted":false
93 |             })));
94 |     }
95 | }
96 | 


--------------------------------------------------------------------------------
/replibyte/src/destination/mysql.rs:
--------------------------------------------------------------------------------
  1 | use std::io::{Error, Write};
  2 | use std::process::{Command, Stdio};
  3 | 
  4 | use crate::connector::Connector;
  5 | use crate::destination::Destination;
  6 | use crate::types::Bytes;
  7 | use crate::utils::{binary_exists, wait_for_command};
  8 | 
  9 | pub struct Mysql<'a> {
 10 |     host: &'a str,
 11 |     port: u16,
 12 |     database: &'a str,
 13 |     username: &'a str,
 14 |     password: &'a str,
 15 | }
 16 | 
 17 | impl<'a> Mysql<'a> {
 18 |     pub fn new(
 19 |         host: &'a str,
 20 |         port: u16,
 21 |         database: &'a str,
 22 |         username: &'a str,
 23 |         password: &'a str,
 24 |     ) -> Self {
 25 |         Mysql {
 26 |             host,
 27 |             port,
 28 |             database,
 29 |             username,
 30 |             password,
 31 |         }
 32 |     }
 33 | }
 34 | 
 35 | impl<'a> Connector for Mysql<'a> {
 36 |     fn init(&mut self) -> Result<(), Error> {
 37 |         let _ = binary_exists("mysql")?;
 38 | 
 39 |         // test MySQL connection
 40 |         let mut process = Command::new("mysql")
 41 |             .args([
 42 |                 "-h",
 43 |                 self.host,
 44 |                 "-P",
 45 |                 self.port.to_string().as_str(),
 46 |                 "-u",
 47 |                 self.username,
 48 |                 &format!("-p{}", self.password),
 49 |                 "-e",
 50 |                 "SELECT 1;",
 51 |             ])
 52 |             .stdout(Stdio::piped())
 53 |             .spawn()?;
 54 | 
 55 |         wait_for_command(&mut process)
 56 |     }
 57 | }
 58 | 
 59 | impl<'a> Destination for Mysql<'a> {
 60 |     fn write(&self, data: Bytes) -> Result<(), Error> {
 61 |         let mut process = Command::new("mysql")
 62 |             .args([
 63 |                 "-h",
 64 |                 self.host,
 65 |                 "-P",
 66 |                 self.port.to_string().as_str(),
 67 |                 "-u",
 68 |                 self.username,
 69 |                 &format!("-p{}", self.password),
 70 |                 self.database,
 71 |             ])
 72 |             .stdin(Stdio::piped())
 73 |             .stdout(Stdio::null())
 74 |             .spawn()?;
 75 | 
 76 |         let _ = process.stdin.take().unwrap().write_all(data.as_slice());
 77 | 
 78 |         wait_for_command(&mut process)
 79 |     }
 80 | }
 81 | 
 82 | #[cfg(test)]
 83 | mod tests {
 84 |     use crate::connector::Connector;
 85 |     use crate::destination::mysql::Mysql;
 86 |     use crate::destination::Destination;
 87 | 
 88 |     fn get_mysql() -> Mysql<'static> {
 89 |         Mysql::new("127.0.0.1", 3306, "mysql", "root", "password")
 90 |     }
 91 | 
 92 |     fn get_invalid_mysql() -> Mysql<'static> {
 93 |         Mysql::new("127.0.0.1", 3306, "mysql", "root", "wrong_password")
 94 |     }
 95 | 
 96 |     #[test]
 97 |     fn connect() {
 98 |         let mut m = get_mysql();
 99 |         let _ = m.init().expect("can't init mysql");
100 |         assert!(m.write(b"SELECT 1;".to_vec()).is_ok());
101 | 
102 |         let mut m = get_invalid_mysql();
103 |         assert!(m.init().is_err());
104 |         assert!(m.write(b"SELECT 1".to_vec()).is_err());
105 |     }
106 | 
107 |     #[test]
108 |     fn test_inserts() {}
109 | }
110 | 


--------------------------------------------------------------------------------
/docs/DESIGN.md:
--------------------------------------------------------------------------------
 1 | ## How RepliByte works
 2 | 
 3 | RepliByte is built to replicate small and very large databases from one place (source) to the other (destination) with a bridge as
 4 | intermediary (bridge). Here is an example of what happens while replicating a PostgreSQL database.
 5 | 
 6 | ```mermaid
 7 | sequenceDiagram
 8 |     participant RepliByte
 9 |     participant PostgreSQL (Source)
10 |     participant AWS S3 (Bridge)
11 |     PostgreSQL (Source)->>RepliByte: 1. Dump data
12 |     loop
13 |         RepliByte->>RepliByte: 2. Subsetting (optional)
14 |         RepliByte->>RepliByte: 3. Hide or fake sensitive data (optional)
15 |         RepliByte->>RepliByte: 4. Compress data (optional)
16 |         RepliByte->>RepliByte: 5. Encrypt data (optional)
17 |     end
18 |     RepliByte->>AWS S3 (Bridge): 6. Upload obfuscated dump data
19 |     RepliByte->>AWS S3 (Bridge): 7. Write index file
20 | ```
21 | 
22 | 1. RepliByte connects to the _PostgreSQL Source_ database and makes a full SQL dump of it.
23 | 2. RepliByte receives the SQL dump, parse it, and generates random/fake information in real-time.
24 | 3. RepliByte streams and uploads the modified SQL dump in real-time on AWS S3.
25 | 4. RepliByte keeps track of the uploaded SQL dump by writing it into an index file.
26 | 
27 | ---
28 | 
29 | Once at least a replica from the source PostgreSQL database is available in the S3 bucket, RepliByte can use and inject it into the
30 | destination PostgreSQL database.
31 | 
32 | ```mermaid
33 | sequenceDiagram
34 |     participant RepliByte
35 |     participant PostgreSQL (Destination)
36 |     participant AWS S3 (Bridge)
37 |     AWS S3 (Bridge)->>RepliByte: 1. Read index file
38 |     AWS S3 (Bridge)->>RepliByte: 2. Download dump SQL file
39 |     loop
40 |         RepliByte->>RepliByte: 3. Decrypt data (if required)
41 |         RepliByte->>RepliByte: 4. Uncompress data (if required)
42 |     end
43 |     RepliByte->>PostgreSQL (Destination): 5. Restore dump SQL
44 | ```
45 | 
46 | 1. RepliByte connects to the S3 bucket and reads the index file to retrieve the latest SQL to download.
47 | 2. RepliByte downloads the SQL dump in a stream bytes.
48 | 3. RepliByte restores the SQL dump in the destination PostgreSQL database in real-time.
49 | 
50 | ## Design
51 | 
52 | ### Low Memory and CPU footprint
53 | 
54 | Written in Rust, RepliByte can run with 512 MB of RAM and 1 CPU to replicate 1 TB of data (we are working on a benchmark). RepliByte
55 | replicate the data in a stream of bytes and does not store anything on a local disk.
56 | 
57 | ### Limitations
58 | 
59 | - Tested with PostgreSQL 13 and 14. It should work with prior versions.
60 | - RepliByte as not been designed to run multiple dumps targeting the same Bridge. The Index File does not manage concurrent write (ATM).
61 | 
62 | ### Index file structure
63 | 
64 | An index file describe the structure of your dumps and all of them.
65 | 
66 | Here is the manifest file that you can find at the root of your target `Bridge` (E.g: S3).
67 | 
68 | ```json
69 | {
70 |   "dumps": [
71 |     {
72 |       "size": 1024000,
73 |       "directory_name": "dump-{epoch timestamp}",
74 |       "created_at": "epoch timestamp",
75 |       "compressed": true,
76 |       "encrypted": true
77 |     }
78 |   ]
79 | }
80 | ```
81 | 
82 | - _size_ is in bytes
83 | - _created_at_ is an epoch timestamp in millis
84 | 


--------------------------------------------------------------------------------
/website/docs/advanced-guides/web-assembly-transformer.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | sidebar_position: 3
 3 | ---
 4 | 
 5 | # Web Assembly transformer
 6 | 
 7 | - This folder contains an example of a wasm (WebAssembly) transformer which reads the column value (in this case, a string) input from stdin, transforms it (in this case, reverses it), and then writes the result to stdout.
 8 | - The file `wasm-transformer-reverse-string.wasm` was originally written in rust, and compiled to a `wasm32-wasi` target binary.
 9 | - A great way to implement your own custom wasm transformer would be to write it in a [language which supports WebAssembly in a WASI environment](https://www.fermyon.com/wasm-languages/webassembly-language-support) and then compile it to a `.wasm` file.
10 | - In the following section, we will demonstrate how to implement a custom wasm transformer by using Rust (to understand how to do this with other languages, we suggest reading more about [`wasm`](https://developer.mozilla.org/en-US/docs/WebAssembly) and [`WASI`](https://wasi.dev/)).
11 | 
12 | ## How it works
13 | 
14 | RepliByte's communication with external `wasm` modules is implemented with the use of pipes:
15 | 1. The column value which needs to be transformed is written to stdin by RepliByte. This will always be a single column value.
16 | 2. The wasm module should read the value from stdin and **transform** it (this is where your custom implementation comes in).
17 | 3. The wasm module should write the transformed value to stdout.
18 | 4. RepliByte reads the transformed value from stdout. RepliByte will expect to read a single column value, anything else will cause a runtime error.
19 | 
20 | As long as you start with reading from stdin and end with printing to stdout, you can go as crazy as you want with the implementation of your custom transformers.
21 | 
22 | ## Implementing a custom transformer with Rust
23 | 
24 | First, start a new cargo project:
25 | 
26 | ```shell
27 | cargo init my-custom-wasm-transformer
28 | ```
29 | 
30 | Go to `src/main.rs` in the newly created project and write some code:
31 | 
32 | ```rust
33 | // This is actually the source of the `.wasm` file in this example. Feel free to edit it !
34 | fn main() {
35 |     // Read input value from stdin
36 |     let mut input = String::new();
37 |     std::io::stdin().read_line(&mut input).unwrap();
38 |     
39 |     // Transform the value as you see fit (in this case we just reverse the string)
40 |     let output: String = input.chars().rev().collect();
41 |     
42 |     // Write transformed value to stdout (simply print)
43 |     println!("{}", output);
44 | }
45 | ```
46 | 
47 | Add `wasm32-wasi` to your targets:
48 | 
49 | ```shell
50 | rustup target add wasm32-wasi
51 | ```
52 | 
53 | Build:
54 | 
55 | ```shell
56 | cargo build --release --target wasm32-wasi
57 | ```
58 | 
59 | You will find your freshly built custom wasm transformer here:
60 | 
61 | `target/wasm32-wasi/release/my-custom-wasm-transformer.wasm`
62 | 
63 | The only thing that's left is to edit the `path` option in `replibyte.yaml`:
64 | 
65 |  ```yaml
66 | # ...
67 |     - database: <your-db-name>
68 |       table: <your-table-name>
69 |       columns:
70 |         - name: <your-column-name>
71 |           transformer_name: custom-wasm
72 |           transformer_options:
73 |             path: "path/to/your/custom-wasm-transformer.wasm"
74 | # ...
75 |  ```
76 | 
77 | That's it!
78 | 
79 | 


--------------------------------------------------------------------------------
/website/docs/guides/2-restore-a-dump.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: II. Restore a dump
 3 | sidebar_position: 2
 4 | ---
 5 | 
 6 | # Restore a dump
 7 | 
 8 | :::note
 9 | 
10 | I assume you did the previous guide, and you have your final `conf.yaml` file
11 | 
12 | :::
13 | 
14 | On the last step, we have created and uploaded our transformed dump in our S3 Datastore. Now, we are ready to restore it in a development database. 
15 | 
16 | :::note
17 | 
18 | The database where you restore must be the same type of the source where you dump. If you created a PostgreSQL dump, then you must restore on a PostgreSQL database.
19 | 
20 | :::
21 | 
22 | Replibyte provides you two options to restore a dump:
23 | 
24 | * **Option 1**: Locally - which is convenient for local development
25 |   * Example use cases:
26 |     * You develop an app locally and wants to work with real data.
27 |     * You want to inspect what the transformed dump looks like.
28 | * **Option 2**: Remote - which is convenient to restore a remote database.
29 |   * Example use cases:
30 |     * You have a dump on your local machine, and you want to restore a database only accessible from a specific network.
31 |     * You have no access to the dumps, only an admin can restore them.
32 | 
33 | ## Option 1: Locally
34 | 
35 | ### With Docker
36 | 
37 | :::caution
38 | 
39 | [Docker](https://www.docker.com/) must be installed and running
40 | 
41 | :::
42 | 
43 | It's the best option to develop locally with a consistent transformed dump coming from your production data. Execute the following command to restore in a local Docker instance the latest dump:
44 | 
45 | ```shell
46 | replibyte -c conf.yaml dump restore local -d postgresql -v latest
47 | ```
48 | 
49 | `-d` parameter accepts `mongodb`, `mysql` and other databases supported by Replibyte.
50 | 
51 | You can also list the available dumps with:
52 | 
53 | ```shell
54 | replibyte -c conf.yaml dump list
55 | 
56 | type          name                  size    when                    compressed  encrypted
57 | PostgreSQL    dump-1647706359405    154MB   Yesterday at 03:00 am   true        true
58 | PostgreSQL    dump-1647731334517    152MB   2 days ago at 03:00 am  true        true
59 | PostgreSQL    dump-1647734369306    149MB   3 days ago at 03:00 am  true        true
60 | ```
61 | 
62 | And restore the dump you want with:
63 | 
64 | ```shell
65 | replibyte -c conf.yaml dump restore local -d postgres -v dump-1647731334517
66 | ```
67 | 
68 | ### In a file
69 | 
70 | You might want to inspect what you have in your dump, and restore it manually, you can execute the same restore command but with the `-o` parameter:
71 | 
72 | ```shell
73 | replibyte -c conf.yaml dump restore local -i postgres -v latest -o > dump.sql
74 | ```
75 | 
76 | ## Option 2: Remote
77 | 
78 | To restore on a remote database, you need to specify the destination connection URI in your `conf.yaml`:
79 | 
80 | ```yaml title="conf.yaml"
81 | destination:
82 |   connection_uri: postgres://user:password@host:port/db
83 |   # Disable public's schema wipe
84 |   # wipe_database: false (default: true)
85 | ```
86 | 
87 | and run the following command:
88 | 
89 | ```shell
90 | replibyte -c conf.yaml dump restore remote -v latest
91 | ```
92 | 
93 | ---
94 | 
95 | You know now how to restore your transformed dump via multiple options, and even choose which version you want to restore. 
96 | 
97 | But now, **what happen if your database is very large?** In the next guide, you will learn how to downscale your database from a large size to a more reasonable one, while keeping it consistent. ➡️
98 | 


--------------------------------------------------------------------------------
/docker-compose-dev.yml:
--------------------------------------------------------------------------------
  1 | version: '3'
  2 | 
  3 | services:
  4 |   source-postgres:
  5 |     image: postgres:13
  6 |     restart: always
  7 |     healthcheck:
  8 |       test: ['CMD', 'pg_isready', '-q', '-d', 'postgres', '-U', 'root']
  9 |       timeout: 45s
 10 |       interval: 10s
 11 |       retries: 10
 12 |     environment:
 13 |       - POSTGRES_USER=root
 14 |       - POSTGRES_PASSWORD=password
 15 |     volumes:
 16 |       - ./db/postgres:/docker-entrypoint-initdb.d/
 17 |     ports:
 18 |       - 5432:5432
 19 |   dest-postgres:
 20 |     image: postgres:13
 21 |     restart: always
 22 |     healthcheck:
 23 |       test: ['CMD', 'pg_isready', '-q', '-d', 'postgres', '-U', 'root']
 24 |       timeout: 45s
 25 |       interval: 10s
 26 |       retries: 10
 27 |     environment:
 28 |       - POSTGRES_USER=root
 29 |       - POSTGRES_PASSWORD=password
 30 |     ports:
 31 |       - 5453:5432
 32 |   source-mysql:
 33 |     image: mysql:8
 34 |     restart: always
 35 |     command: --default-authentication-plugin=mysql_native_password
 36 |     healthcheck:
 37 |       test: ['CMD', 'mysqladmin', 'ping', '-u', 'root', '--password=password']
 38 |       timeout: 45s
 39 |       interval: 10s
 40 |       retries: 10
 41 |     environment:
 42 |       - MYSQL_ROOT_PASSWORD=password
 43 |     volumes:
 44 |       - ./db/mysql:/docker-entrypoint-initdb.d
 45 |     ports:
 46 |       - 3306:3306
 47 |   dest-mysql:
 48 |     image: mysql:8
 49 |     restart: always
 50 |     command: --default-authentication-plugin=mysql_native_password
 51 |     healthcheck:
 52 |       test: ['CMD', 'mysqladmin', 'ping', '-u', 'root', '--password=password']
 53 |       timeout: 45s
 54 |       interval: 10s
 55 |       retries: 10
 56 |     environment:
 57 |       - MYSQL_ROOT_PASSWORD=password
 58 |     ports:
 59 |       - 3307:3306
 60 |   source-mongodb:
 61 |     image: mongo:5
 62 |     restart: always
 63 |     healthcheck:
 64 |       test: echo 'db.runCommand("ping").ok' | mongo localhost:27017/test --quiet
 65 |       timeout: 45s
 66 |       interval: 10s
 67 |       retries: 10
 68 |     environment:
 69 |       - MONGO_INITDB_ROOT_USERNAME=root
 70 |       - MONGO_INITDB_ROOT_PASSWORD=password
 71 |     volumes:
 72 |       - ./db/mongodb:/docker-entrypoint-initdb.d/
 73 |     ports:
 74 |       - 27017:27017
 75 |   dest-mongodb:
 76 |     image: mongo:5
 77 |     restart: always
 78 |     healthcheck:
 79 |       test: echo 'db.runCommand("ping").ok' | mongo localhost:27017/test --quiet
 80 |       timeout: 45s
 81 |       interval: 10s
 82 |       retries: 10
 83 |     environment:
 84 |       - MONGO_INITDB_ROOT_USERNAME=root
 85 |       - MONGO_INITDB_ROOT_PASSWORD=password
 86 |     ports:
 87 |       - 27018:27017
 88 |   bridge-minio:
 89 |     image: minio/minio:RELEASE.2022-03-17T06-34-49Z
 90 |     restart: always
 91 |     command: server --console-address ":9001" /data/minio/
 92 |     healthcheck:
 93 |       test: ['CMD', 'curl', '-f', 'http://localhost:9000/minio/health/live']
 94 |       interval: 30s
 95 |       timeout: 20s
 96 |       retries: 3
 97 |     environment:
 98 |       MINIO_ROOT_USER: minioadmin
 99 |       MINIO_ROOT_PASSWORD: minioadmin
100 |     ports:
101 |       - 9000:9000
102 |       - 9001:9001
103 |   create-minio-bucket:
104 |     image: minio/mc
105 |     depends_on:
106 |       - bridge-minio
107 |     entrypoint: >
108 |       /bin/sh -c "
109 |       /usr/bin/mc config host add myminio http://bridge-minio:9000 minioadmin minioadmin;
110 |       /usr/bin/mc rm -r --force myminio/replibyte-test;
111 |       /usr/bin/mc mb myminio/replibyte-test;
112 |       /usr/bin/mc policy download myminio/replibyte-test;
113 |       exit 0;
114 |       "
115 | 


--------------------------------------------------------------------------------
/subset/src/lib.rs:
--------------------------------------------------------------------------------
  1 | use std::collections::HashSet;
  2 | use std::io::Error;
  3 | 
  4 | mod dedup;
  5 | pub mod postgres;
  6 | mod utils;
  7 | 
  8 | pub type Bytes = Vec<u8>;
  9 | 
 10 | pub trait Subset {
 11 |     fn read<F: FnMut(String), P: FnMut(Progress)>(&self, data: F, progress: P)
 12 |         -> Result<(), Error>;
 13 | }
 14 | 
 15 | pub struct Progress {
 16 |     // total data rows
 17 |     pub total_rows: usize,
 18 |     // total rows to processed
 19 |     pub total_rows_to_process: usize,
 20 |     // rows processed
 21 |     pub processed_rows: usize,
 22 |     // last row processed exec time
 23 |     pub last_process_time: u128,
 24 | }
 25 | 
 26 | impl Progress {
 27 |     pub fn percent(&self) -> u8 {
 28 |         ((self.processed_rows as f64 / self.total_rows_to_process as f64) * 100.0) as u8
 29 |     }
 30 | }
 31 | 
 32 | #[derive(Debug, Hash, Eq, PartialEq)]
 33 | pub struct PassthroughTable<'a> {
 34 |     pub database: &'a str,
 35 |     pub table: &'a str,
 36 | }
 37 | 
 38 | impl<'a> PassthroughTable<'a> {
 39 |     pub fn new<S: Into<&'a str>>(database: S, table: S) -> Self {
 40 |         PassthroughTable {
 41 |             database: database.into(),
 42 |             table: table.into(),
 43 |         }
 44 |     }
 45 | }
 46 | 
 47 | pub struct SubsetOptions<'a> {
 48 |     pub passthrough_tables: &'a HashSet<PassthroughTable<'a>>,
 49 | }
 50 | 
 51 | impl<'a> SubsetOptions<'a> {
 52 |     pub fn new(passthrough_tables: &'a HashSet<PassthroughTable<'a>>) -> Self {
 53 |         SubsetOptions { passthrough_tables }
 54 |     }
 55 | }
 56 | 
 57 | #[derive(Debug, Hash, Eq, PartialEq, Clone)]
 58 | pub struct SubsetTable {
 59 |     pub database: String,
 60 |     pub table: String,
 61 |     pub relations: Vec<SubsetTableRelation>,
 62 | }
 63 | 
 64 | impl SubsetTable {
 65 |     pub fn new<S: Into<String>>(
 66 |         database: S,
 67 |         table: S,
 68 |         relations: Vec<SubsetTableRelation>,
 69 |     ) -> Self {
 70 |         SubsetTable {
 71 |             database: database.into(),
 72 |             table: table.into(),
 73 |             relations,
 74 |         }
 75 |     }
 76 | 
 77 |     pub fn related_tables(&self) -> HashSet<&str> {
 78 |         self.relations
 79 |             .iter()
 80 |             .map(|r| r.table.as_str())
 81 |             .collect::<HashSet<_>>()
 82 |     }
 83 | 
 84 |     pub fn find_related_subset_tables<'a>(
 85 |         &self,
 86 |         subset_tables: &'a Vec<&SubsetTable>,
 87 |     ) -> Vec<&'a SubsetTable> {
 88 |         if subset_tables.is_empty() {
 89 |             return Vec::new();
 90 |         }
 91 | 
 92 |         let related_tables = self.related_tables();
 93 | 
 94 |         subset_tables
 95 |             .iter()
 96 |             .filter(|subset_table| related_tables.contains(subset_table.table.as_str()))
 97 |             .map(|subset_table| *subset_table)
 98 |             .collect::<Vec<_>>()
 99 |     }
100 | }
101 | 
102 | /// Representing a query where...
103 | /// database -> is the targeted database
104 | /// table -> is the targeted table
105 | /// from_property is the parent table property referencing the target table `to_property`
106 | #[derive(Debug, Hash, Eq, PartialEq, Clone)]
107 | pub struct SubsetTableRelation {
108 |     pub database: String,
109 |     pub table: String,
110 |     pub from_property: String,
111 |     pub to_property: String,
112 | }
113 | 
114 | impl SubsetTableRelation {
115 |     pub fn new<S: Into<String>>(database: S, table: S, from_property: S, to_property: S) -> Self {
116 |         SubsetTableRelation {
117 |             database: database.into(),
118 |             table: table.into(),
119 |             from_property: from_property.into(),
120 |             to_property: to_property.into(),
121 |         }
122 |     }
123 | }
124 | 


--------------------------------------------------------------------------------
/replibyte/src/destination/mysql_docker.rs:
--------------------------------------------------------------------------------
  1 | use crate::connector::Connector;
  2 | use crate::destination::docker::{
  3 |     daemon_is_running, Container, ContainerOptions, Image, DOCKER_BINARY_NAME,
  4 | };
  5 | use crate::destination::Destination;
  6 | use crate::types::Bytes;
  7 | use crate::utils::binary_exists;
  8 | use std::io::{Error, ErrorKind, Write};
  9 | 
 10 | const DEFAULT_MYSQL_IMAGE: &str = "mysql";
 11 | pub const DEFAULT_MYSQL_IMAGE_TAG: &str = "8";
 12 | pub const DEFAULT_MYSQL_CONTAINER_PORT: u16 = 3306;
 13 | const DEFAULT_MYSQL_PASSWORD: &str = "password";
 14 | 
 15 | pub struct MysqlDocker {
 16 |     pub image: Image,
 17 |     pub options: ContainerOptions,
 18 |     pub container: Option<Container>,
 19 | }
 20 | 
 21 | impl MysqlDocker {
 22 |     pub fn new(tag: String, port: u16) -> Self {
 23 |         Self {
 24 |             image: Image {
 25 |                 name: DEFAULT_MYSQL_IMAGE.to_string(),
 26 |                 tag,
 27 |             },
 28 |             options: ContainerOptions {
 29 |                 host_port: port,
 30 |                 container_port: DEFAULT_MYSQL_CONTAINER_PORT,
 31 |             },
 32 |             container: None,
 33 |         }
 34 |     }
 35 | }
 36 | 
 37 | impl Connector for MysqlDocker {
 38 |     fn init(&mut self) -> Result<(), Error> {
 39 |         let _ = binary_exists(DOCKER_BINARY_NAME)?;
 40 |         let _ = daemon_is_running()?;
 41 | 
 42 |         let password_env = format!("MYSQL_ROOT_PASSWORD={}", DEFAULT_MYSQL_PASSWORD);
 43 |         let container = Container::new(
 44 |             &self.image,
 45 |             &self.options,
 46 |             vec!["-e", password_env.as_str()],
 47 |             Some(vec![
 48 |                 "mysqld",
 49 |                 "--default-authentication-plugin=mysql_native_password",
 50 |             ]),
 51 |         )?;
 52 | 
 53 |         self.container = Some(container);
 54 |         Ok(())
 55 |     }
 56 | }
 57 | 
 58 | impl Destination for MysqlDocker {
 59 |     fn write(&self, data: Bytes) -> Result<(), Error> {
 60 |         match &self.container {
 61 |             Some(container) => {
 62 |                 let mut container_exec =
 63 |                     container.exec("exec mysql -uroot -p\"$MYSQL_ROOT_PASSWORD\"")?;
 64 |                 let _ = container_exec
 65 |                     .stdin
 66 |                     .take()
 67 |                     .unwrap()
 68 |                     .write_all(data.as_slice());
 69 | 
 70 |                 let exit_status = container_exec.wait()?;
 71 |                 if !exit_status.success() {
 72 |                     return Err(Error::new(
 73 |                         ErrorKind::Other,
 74 |                         format!("command error: {:?}", exit_status.to_string()),
 75 |                     ));
 76 |                 }
 77 | 
 78 |                 Ok(())
 79 |             }
 80 |             None => Err(Error::new(
 81 |                 ErrorKind::Other,
 82 |                 "command error: cannot retrieve container",
 83 |             )),
 84 |         }
 85 |     }
 86 | }
 87 | 
 88 | #[cfg(test)]
 89 | mod tests {
 90 |     use super::MysqlDocker;
 91 |     use crate::connector::Connector;
 92 |     use crate::destination::Destination;
 93 | 
 94 |     fn get_mysql() -> MysqlDocker {
 95 |         MysqlDocker::new("8".to_string(), 3308)
 96 |     }
 97 | 
 98 |     fn get_invalid_mysql() -> MysqlDocker {
 99 |         MysqlDocker::new("bad_tag".to_string(), 3308)
100 |     }
101 | 
102 |     #[test]
103 |     fn connect() {
104 |         let mut p = get_mysql();
105 |         let _ = p.init().expect("can't init mysql");
106 |         assert!(p.write(b"SELECT 1".to_vec()).is_ok());
107 | 
108 |         // cleanup container
109 |         let _ = p.container.unwrap().rm();
110 | 
111 |         let mut p = get_invalid_mysql();
112 |         assert!(p.init().is_err());
113 |         assert!(p.write(b"SELECT 1".to_vec()).is_err());
114 |     }
115 | }
116 | 


--------------------------------------------------------------------------------
/replibyte/src/destination/mongodb.rs:
--------------------------------------------------------------------------------
 1 | use std::io::{Error, Write};
 2 | use std::process::{Command, Stdio};
 3 | 
 4 | use crate::connector::Connector;
 5 | use crate::destination::Destination;
 6 | use crate::types::Bytes;
 7 | use crate::utils::{binary_exists, wait_for_command};
 8 | 
 9 | pub struct MongoDB<'a> {
10 |     uri: &'a str,
11 |     database: &'a str,
12 | }
13 | 
14 | impl<'a> MongoDB<'a> {
15 |     pub fn new(uri: &'a str, database: &'a str) -> Self {
16 |         MongoDB { uri, database }
17 |     }
18 | }
19 | 
20 | impl<'a> Connector for MongoDB<'a> {
21 |     fn init(&mut self) -> Result<(), Error> {
22 |         let _ = binary_exists("mongosh")?;
23 |         let _ = binary_exists("mongorestore")?;
24 |         let _ = check_connection_status(self)?;
25 | 
26 |         Ok(())
27 |     }
28 | }
29 | 
30 | impl<'a> Destination for MongoDB<'a> {
31 |     fn write(&self, data: Bytes) -> Result<(), Error> {
32 |         let mut process = Command::new("mongorestore")
33 |             .args([
34 |                 "--uri",
35 |                 self.uri,
36 |                 format!("--nsFrom='{}.*'", self.database).as_str(),
37 |                 format!("--nsTo='{}.*'", self.database).as_str(),
38 |                 "--archive",
39 |             ])
40 |             .stdin(Stdio::piped())
41 |             .stdout(Stdio::null())
42 |             .spawn()?;
43 | 
44 |         let _ = process
45 |             .stdin
46 |             .take()
47 |             .unwrap()
48 |             .write_all(&data[..data.len() - 1]); // remove trailing null terminator, or else mongorestore will fail
49 | 
50 |         wait_for_command(&mut process)
51 |     }
52 | }
53 | 
54 | fn check_connection_status(db: &MongoDB) -> Result<(), Error> {
55 |     let mut echo_process = Command::new("echo")
56 |         .arg(r#"'db.runCommand("ping").ok'"#)
57 |         .stdout(Stdio::piped())
58 |         .spawn()?;
59 | 
60 |     let mut mongo_process = Command::new("mongosh")
61 |         .args([db.uri, "--quiet"])
62 |         .stdin(echo_process.stdout.take().unwrap())
63 |         .stdout(Stdio::inherit())
64 |         .spawn()?;
65 | 
66 |     wait_for_command(&mut mongo_process)
67 | }
68 | 
69 | #[cfg(test)]
70 | mod tests {
71 |     use dump_parser::utils::decode_hex;
72 | 
73 |     use crate::connector::Connector;
74 |     use crate::destination::mongodb::MongoDB;
75 |     use crate::destination::Destination;
76 | 
77 |     fn get_mongodb() -> MongoDB<'static> {
78 |         MongoDB::new("mongodb://root:password@localhost:27018", "test")
79 |     }
80 | 
81 |     fn get_invalid_mongodb() -> MongoDB<'static> {
82 |         MongoDB::new("mongodb://root:wrongpassword@localhost:27018", "test")
83 |     }
84 | 
85 |     #[test]
86 |     fn connect() {
87 |         let mut p = get_mongodb();
88 |         let _ = p.init().expect("can't init mongodb");
89 |         let bytes = decode_hex("6de299816600000010636f6e63757272656e745f636f6c6c656374696f6e7300040000000276657273696f6e0004000000302e3100027365727665725f76657273696f6e0006000000352e302e360002746f6f6c5f76657273696f6e00080000003130302e352e320000020100000264620005000000746573740002636f6c6c656374696f6e0006000000757365727300026d6574616461746100ad0000007b22696e6465786573223a5b7b2276223a7b22246e756d626572496e74223a2232227d2c226b6579223a7b225f6964223a7b22246e756d626572496e74223a2231227d7d2c226e616d65223a225f69645f227d5d2c2275756964223a223464363734323637316333613463663938316439386164373831343735333234222c22636f6c6c656374696f6e4e616d65223a227573657273222c2274797065223a22636f6c6c656374696f6e227d001073697a6500000000000274797065000b000000636f6c6c656374696f6e0000ffffffff3b0000000264620005000000746573740002636f6c6c656374696f6e000600000075736572730008454f4600001243524300000000000000000000ffffffff3b0000000264620005000000746573740002636f6c6c656374696f6e000600000075736572730008454f4600011243524300000000000000000000ffffffff00").unwrap();
90 |         assert!(p.write(bytes.to_vec()).is_ok());
91 | 
92 |         let mut p = get_invalid_mongodb();
93 |         assert!(p.init().is_err());
94 |         assert!(p.write(bytes.to_vec()).is_err());
95 |     }
96 |     //TODO add more tests
97 | }
98 | 


--------------------------------------------------------------------------------
/replibyte/src/tasks/full_dump.rs:
--------------------------------------------------------------------------------
  1 | use std::io::{Error, ErrorKind};
  2 | use std::sync::mpsc;
  3 | use std::thread;
  4 | 
  5 | use crate::datastore::Datastore;
  6 | use crate::source::SourceOptions;
  7 | use crate::tasks::{MaxBytes, Message, Task, TransferredBytes};
  8 | use crate::types::{to_bytes, Queries};
  9 | use crate::Source;
 10 | 
 11 | type DataMessage = (u16, Queries);
 12 | 
 13 | /// FullDumpTask is a wrapping struct to execute the synchronization between a *Source* and a *Datastore*
 14 | pub struct FullDumpTask<'a, S>
 15 | where
 16 |     S: Source,
 17 | {
 18 |     source: S,
 19 |     datastore: Box<dyn Datastore>,
 20 |     options: SourceOptions<'a>,
 21 | }
 22 | 
 23 | impl<'a, S> FullDumpTask<'a, S>
 24 | where
 25 |     S: Source,
 26 | {
 27 |     pub fn new(source: S, datastore: Box<dyn Datastore>, options: SourceOptions<'a>) -> Self {
 28 |         FullDumpTask {
 29 |             source,
 30 |             datastore,
 31 |             options,
 32 |         }
 33 |     }
 34 | }
 35 | 
 36 | impl<'a, S> Task for FullDumpTask<'a, S>
 37 | where
 38 |     S: Source,
 39 | {
 40 |     fn run<F: FnMut(TransferredBytes, MaxBytes)>(
 41 |         mut self,
 42 |         mut progress_callback: F,
 43 |     ) -> Result<(), Error> {
 44 |         // initialize the source
 45 |         let _ = self.source.init()?;
 46 | 
 47 |         let (tx, rx) = mpsc::sync_channel::<Message<DataMessage>>(1);
 48 |         let datastore = self.datastore;
 49 | 
 50 |         let join_handle = thread::spawn(move || -> Result<(), Error> {
 51 |             // managing Datastore (S3) upload here
 52 |             let datastore = datastore;
 53 | 
 54 |             loop {
 55 |                 let result = match rx.recv() {
 56 |                     Ok(Message::Data((chunk_part, queries))) => Ok((chunk_part, queries)),
 57 |                     Ok(Message::EOF) => break,
 58 |                     Err(err) => Err(Error::new(ErrorKind::Other, format!("{}", err))),
 59 |                 };
 60 | 
 61 |                 if let Ok((chunk_part, queries)) = result {
 62 |                     let _ = match datastore.write(chunk_part, to_bytes(queries)) {
 63 |                         Ok(_) => {}
 64 |                         Err(err) => return Err(Error::new(ErrorKind::Other, format!("{}", err))),
 65 |                     };
 66 |                 }
 67 |             }
 68 | 
 69 |             Ok(())
 70 |         });
 71 | 
 72 |         // buffer of 100MB in memory to use and re-use to upload data into datastore
 73 |         let buffer_size = 100 * 1024 * 1024;
 74 |         let mut queries = vec![];
 75 |         let mut consumed_buffer_size = 0usize;
 76 |         let mut total_transferred_bytes = 0usize;
 77 |         let mut chunk_part = 0u16;
 78 | 
 79 |         // init progress
 80 |         progress_callback(
 81 |             total_transferred_bytes,
 82 |             buffer_size * (chunk_part as usize + 1),
 83 |         );
 84 | 
 85 |         let _ = self.source.read(self.options, |_original_query, query| {
 86 |             if consumed_buffer_size + query.data().len() > buffer_size {
 87 |                 chunk_part += 1;
 88 |                 consumed_buffer_size = 0;
 89 |                 // TODO .clone() - look if we do not consume more mem
 90 | 
 91 |                 let message = Message::Data((chunk_part, queries.clone()));
 92 | 
 93 |                 let _ = tx.send(message); // FIXME catch SendError?
 94 |                 let _ = queries.clear();
 95 |             }
 96 | 
 97 |             consumed_buffer_size += query.data().len();
 98 |             total_transferred_bytes += query.data().len();
 99 |             progress_callback(
100 |                 total_transferred_bytes,
101 |                 buffer_size * (chunk_part as usize + 1),
102 |             );
103 |             queries.push(query);
104 |         })?;
105 | 
106 |         progress_callback(total_transferred_bytes, total_transferred_bytes);
107 | 
108 |         chunk_part += 1;
109 |         let _ = tx.send(Message::Data((chunk_part, queries)));
110 |         let _ = tx.send(Message::EOF);
111 |         // wait for end of upload execution
112 |         join_handle.join().unwrap()?;
113 | 
114 |         Ok(())
115 |     }
116 | }
117 | 


--------------------------------------------------------------------------------
/replibyte/src/destination/postgres_docker.rs:
--------------------------------------------------------------------------------
  1 | use crate::connector::Connector;
  2 | use crate::destination::docker::{
  3 |     daemon_is_running, Container, ContainerOptions, Image, DOCKER_BINARY_NAME,
  4 | };
  5 | use crate::destination::Destination;
  6 | use crate::types::Bytes;
  7 | use crate::utils::binary_exists;
  8 | use std::io::{Error, ErrorKind, Write};
  9 | 
 10 | const DEFAULT_POSTGRES_IMAGE: &str = "postgres";
 11 | pub const DEFAULT_POSTGRES_IMAGE_TAG: &str = "13";
 12 | pub const DEFAULT_POSTGRES_CONTAINER_PORT: u16 = 5432;
 13 | pub const DEFAULT_POSTGRES_USER: &str = "postgres";
 14 | pub const DEFAULT_POSTGRES_PASSWORD: &str = "password";
 15 | pub const DEFAULT_POSTGRES_DB: &str = "postgres";
 16 | 
 17 | pub struct PostgresDocker {
 18 |     pub image: Image,
 19 |     pub options: ContainerOptions,
 20 |     pub container: Option<Container>,
 21 | }
 22 | 
 23 | impl PostgresDocker {
 24 |     pub fn new(tag: String, port: u16) -> Self {
 25 |         Self {
 26 |             image: Image {
 27 |                 name: DEFAULT_POSTGRES_IMAGE.to_string(),
 28 |                 tag,
 29 |             },
 30 |             options: ContainerOptions {
 31 |                 host_port: port,
 32 |                 container_port: DEFAULT_POSTGRES_CONTAINER_PORT,
 33 |             },
 34 |             container: None,
 35 |         }
 36 |     }
 37 | }
 38 | 
 39 | impl Connector for PostgresDocker {
 40 |     fn init(&mut self) -> Result<(), Error> {
 41 |         let _ = binary_exists(DOCKER_BINARY_NAME)?;
 42 |         let _ = daemon_is_running()?;
 43 | 
 44 |         let password_env = format!("POSTGRES_PASSWORD={}", DEFAULT_POSTGRES_PASSWORD);
 45 |         let user_env = format!("POSTGRES_USER={}", DEFAULT_POSTGRES_USER);
 46 |         let container = Container::new(
 47 |             &self.image,
 48 |             &self.options,
 49 |             vec!["-e", password_env.as_str(), "-e", user_env.as_str()],
 50 |             None,
 51 |         )?;
 52 | 
 53 |         self.container = Some(container);
 54 |         Ok(())
 55 |     }
 56 | }
 57 | 
 58 | impl Destination for PostgresDocker {
 59 |     fn write(&self, data: Bytes) -> Result<(), Error> {
 60 |         let cmd = format!(
 61 |             "PGPASSWORD={} psql --username {} {}",
 62 |             DEFAULT_POSTGRES_PASSWORD, DEFAULT_POSTGRES_USER, DEFAULT_POSTGRES_DB
 63 |         );
 64 | 
 65 |         match &self.container {
 66 |             Some(container) => {
 67 |                 let mut container_exec = container.exec(&cmd)?;
 68 |                 let _ = container_exec
 69 |                     .stdin
 70 |                     .take()
 71 |                     .unwrap()
 72 |                     .write_all(data.as_slice());
 73 | 
 74 |                 let exit_status = container_exec.wait()?;
 75 |                 if !exit_status.success() {
 76 |                     return Err(Error::new(
 77 |                         ErrorKind::Other,
 78 |                         format!("command error: {:?}", exit_status.to_string()),
 79 |                     ));
 80 |                 }
 81 | 
 82 |                 Ok(())
 83 |             }
 84 |             None => Err(Error::new(
 85 |                 ErrorKind::Other,
 86 |                 "command error: cannot retrieve container",
 87 |             )),
 88 |         }
 89 |     }
 90 | }
 91 | 
 92 | #[cfg(test)]
 93 | mod tests {
 94 |     use super::PostgresDocker;
 95 |     use crate::connector::Connector;
 96 |     use crate::destination::Destination;
 97 | 
 98 |     fn get_postgres() -> PostgresDocker {
 99 |         PostgresDocker::new("13".to_string(), 5454)
100 |     }
101 | 
102 |     fn get_invalid_postgres() -> PostgresDocker {
103 |         PostgresDocker::new("bad_tag".to_string(), 5454)
104 |     }
105 | 
106 |     #[test]
107 |     fn connect() {
108 |         let mut p = get_postgres();
109 |         let _ = p.init().expect("can't init postgres");
110 |         assert!(p.write(b"SELECT 1".to_vec()).is_ok());
111 | 
112 |         // cleanup container
113 |         let _ = p.container.unwrap().rm();
114 | 
115 |         let mut p = get_invalid_postgres();
116 |         assert!(p.init().is_err());
117 |         assert!(p.write(b"SELECT 1".to_vec()).is_err());
118 |     }
119 | }
120 | 


--------------------------------------------------------------------------------
/website/docusaurus.config.js:
--------------------------------------------------------------------------------
  1 | // @ts-check
  2 | // Note: type annotations allow type checking and IDEs autocompletion
  3 | 
  4 | const lightCodeTheme = require('prism-react-renderer/themes/github');
  5 | const darkCodeTheme = require('prism-react-renderer/themes/dracula');
  6 | 
  7 | /** @type {import('@docusaurus/types').Config} */
  8 | const config = {
  9 |   title: 'Replibyte',
 10 |   tagline: 'Seed your dev database with real data',
 11 |   url: 'https://www.replibyte.com',
 12 |   baseUrl: '/',
 13 |   onBrokenLinks: 'throw',
 14 |   onBrokenMarkdownLinks: 'warn',
 15 |   favicon: 'img/favicon.ico',
 16 |   organizationName: 'Qovery', // Usually your GitHub org/user name.
 17 |   projectName: 'replibyte', // Usually your repo name.
 18 | 
 19 |   plugins: [require.resolve("@cmfcmf/docusaurus-search-local")],
 20 |   presets: [
 21 |     [
 22 |       '@docusaurus/preset-classic',
 23 |       /** @type {import('@docusaurus/preset-classic').Options} */
 24 |       ({
 25 |         docs: {
 26 |           sidebarPath: require.resolve('./sidebars.js'),
 27 |           editUrl: 'https://github.com/Qovery/replibyte/tree/main/website/',
 28 |           remarkPlugins: [require('mdx-mermaid')],
 29 |         },
 30 |         blog: {
 31 |           showReadingTime: true,
 32 |           editUrl:
 33 |             'https://github.com/Qovery/replibyte/tree/main/website/',
 34 |         },
 35 |         theme: {
 36 |           customCss: require.resolve('./src/css/custom.css'),
 37 |         },
 38 |       }),
 39 |     ],
 40 |   ],
 41 | 
 42 |   themeConfig:
 43 |   /** @type {import('@docusaurus/preset-classic').ThemeConfig} */
 44 |     ({
 45 |       metadata: [{
 46 |         name: 'keywords', content: 'seed database, postgresql, postgres, mysql, mongodb, database, preview environment'
 47 |       }],
 48 |       navbar: {
 49 |         title: 'Replibyte',
 50 |         logo: {
 51 |           alt: 'Replibyte Logo',
 52 |           src: 'img/logo.svg',
 53 |         },
 54 |         items: [
 55 |           {
 56 |             type: 'doc',
 57 |             docId: 'introduction',
 58 |             position: 'left',
 59 |             label: 'Documentation',
 60 |           },
 61 |           {
 62 |             href: 'https://discord.qovery.com',
 63 |             label: 'Discord',
 64 |             position: 'left',
 65 |           },
 66 |           {
 67 |             href: 'https://www.qovery.com',
 68 |             label: 'Replibyte Cloud ⚡️',
 69 |             position: 'right',
 70 |           },
 71 |           {
 72 |             href: 'https://github.com/Qovery/replibyte',
 73 |             label: 'GitHub',
 74 |             position: 'right',
 75 |           },
 76 |         ],
 77 |       },
 78 |       footer: {
 79 |         //style: 'dark',
 80 |         links: [
 81 |           {
 82 |             title: 'Docs',
 83 |             items: [
 84 |               {
 85 |                 label: 'Tutorial',
 86 |                 to: '/docs/introduction',
 87 |               },
 88 |             ],
 89 |           },
 90 |           {
 91 |             title: 'Community',
 92 |             items: [
 93 |               {
 94 |                 label: 'Discord',
 95 |                 href: 'https://discord.qovery.com',
 96 |               },
 97 |               {
 98 |                 label: 'Twitter',
 99 |                 href: 'https://twitter.com/Qovery_',
100 |               },
101 |             ],
102 |           },
103 |           {
104 |             title: 'More',
105 |             items: [
106 |               {
107 |                 label: 'GitHub',
108 |                 href: 'https://github.com/Qovery/replibyte',
109 |               },
110 |               {
111 |                 label: 'Qovery',
112 |                 href: 'https://www.qovery.com',
113 |               },
114 |             ],
115 |           },
116 |         ],
117 |         copyright: `Copyright © ${new Date().getFullYear()} Replibyte by <a href="https://www.qovery.com">Qovery</a>`,
118 |       },
119 |       prism: {
120 |         theme: lightCodeTheme,
121 |         darkTheme: darkCodeTheme,
122 |         additionalLanguages: ['rust', 'yaml', 'bash']
123 |       },
124 |       colorMode: {
125 |         defaultMode: 'dark'
126 |       }
127 |     }),
128 | };
129 | 
130 | module.exports = config;
131 | 


--------------------------------------------------------------------------------
/replibyte/src/destination/postgres.rs:
--------------------------------------------------------------------------------
  1 | use std::io::{Error, ErrorKind, Write};
  2 | use std::process::{Command, Stdio};
  3 | 
  4 | use crate::connector::Connector;
  5 | use crate::destination::Destination;
  6 | use crate::types::Bytes;
  7 | use crate::utils::{binary_exists, wait_for_command};
  8 | 
  9 | pub struct Postgres<'a> {
 10 |     host: &'a str,
 11 |     port: u16,
 12 |     database: &'a str,
 13 |     username: &'a str,
 14 |     password: &'a str,
 15 |     wipe_database: bool,
 16 | }
 17 | 
 18 | impl<'a> Postgres<'a> {
 19 |     pub fn new(
 20 |         host: &'a str,
 21 |         port: u16,
 22 |         database: &'a str,
 23 |         username: &'a str,
 24 |         password: &'a str,
 25 |         wipe_database: bool,
 26 |     ) -> Self {
 27 |         Postgres {
 28 |             host,
 29 |             port,
 30 |             database,
 31 |             username,
 32 |             password,
 33 |             wipe_database,
 34 |         }
 35 |     }
 36 | }
 37 | 
 38 | impl<'a> Connector for Postgres<'a> {
 39 |     fn init(&mut self) -> Result<(), Error> {
 40 |         let _ = binary_exists("psql")?;
 41 | 
 42 |         if self.wipe_database {
 43 |             let s_port = self.port.to_string();
 44 |             let wipe_db_query = wipe_database_query(self.username);
 45 | 
 46 |             let exit_status = Command::new("psql")
 47 |                 .env("PGPASSWORD", self.password)
 48 |                 .args([
 49 |                     "-h",
 50 |                     self.host,
 51 |                     "-p",
 52 |                     s_port.as_str(),
 53 |                     "-d",
 54 |                     self.database,
 55 |                     "-U",
 56 |                     self.username,
 57 |                     "-c",
 58 |                     wipe_db_query.as_str(),
 59 |                 ])
 60 |                 .stdout(Stdio::null())
 61 |                 .spawn()?
 62 |                 .wait()?;
 63 | 
 64 |             if !exit_status.success() {
 65 |                 return Err(Error::new(
 66 |                     ErrorKind::Other,
 67 |                     format!("command error: {:?}", exit_status.to_string()),
 68 |                 ));
 69 |             }
 70 |         }
 71 | 
 72 |         Ok(())
 73 |     }
 74 | }
 75 | 
 76 | impl<'a> Destination for Postgres<'a> {
 77 |     fn write(&self, data: Bytes) -> Result<(), Error> {
 78 |         let s_port = self.port.to_string();
 79 | 
 80 |         let mut process = Command::new("psql")
 81 |             .env("PGPASSWORD", self.password)
 82 |             .args([
 83 |                 "-h",
 84 |                 self.host,
 85 |                 "-p",
 86 |                 s_port.as_str(),
 87 |                 "-d",
 88 |                 self.database,
 89 |                 "-U",
 90 |                 self.username,
 91 |             ])
 92 |             .stdin(Stdio::piped())
 93 |             .stdout(Stdio::null())
 94 |             .spawn()?;
 95 | 
 96 |         let _ = process.stdin.take().unwrap().write_all(data.as_slice());
 97 | 
 98 |         wait_for_command(&mut process)
 99 |     }
100 | }
101 | 
102 | fn wipe_database_query(username: &str) -> String {
103 |     format!(
104 |         "\
105 |     DROP SCHEMA public CASCADE; \
106 |     CREATE SCHEMA public; \
107 |     GRANT ALL ON SCHEMA public TO \"{}\"; \
108 |     GRANT ALL ON SCHEMA public TO public;\
109 |     ",
110 |         username
111 |     )
112 | }
113 | 
114 | #[cfg(test)]
115 | mod tests {
116 |     use crate::connector::Connector;
117 |     use crate::destination::postgres::Postgres;
118 |     use crate::destination::Destination;
119 | 
120 |     fn get_postgres() -> Postgres<'static> {
121 |         Postgres::new("localhost", 5453, "root", "root", "password", true)
122 |     }
123 | 
124 |     fn get_invalid_postgres() -> Postgres<'static> {
125 |         Postgres::new("localhost", 5453, "root", "root", "wrongpassword", true)
126 |     }
127 | 
128 |     #[test]
129 |     fn connect() {
130 |         let mut p = get_postgres();
131 |         let _ = p.init().expect("can't init postgres");
132 |         assert!(p.write(b"SELECT 1".to_vec()).is_ok());
133 | 
134 |         let mut p = get_invalid_postgres();
135 |         assert!(p.init().is_err());
136 |         assert!(p.write(b"SELECT 1".to_vec()).is_err());
137 |     }
138 | 
139 |     #[test]
140 |     fn test_inserts() {}
141 | }
142 | 


--------------------------------------------------------------------------------
/website/docs/introduction.mdx:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: Introduction
  3 | description: Replibyte is a tool to seed your development database with your production data while keeping sensitive data safe
  4 | hide_title: true
  5 | sidebar_position: 1
  6 | ---
  7 | 
  8 | import ThemedImage from '@theme/ThemedImage';
  9 | import useBaseUrl from "@docusaurus/useBaseUrl";
 10 | 
 11 | <ThemedImage
 12 |   alt="Replibyte logo"
 13 |   sources={{
 14 |     light: useBaseUrl('/img/full_logo_light.svg'),
 15 |     dark: useBaseUrl('/img/full_logo.svg'),
 16 |   }}
 17 | />
 18 | 
 19 | Replibyte is a blazingly fast tool to seed your databases with your production data while keeping sensitive data safe ⚡️
 20 | 
 21 | [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
 22 | ![stability badge](https://img.shields.io/badge/stability-stable-green.svg?style=flat-square)
 23 | ![build and tests badge](https://github.com/Qovery/replibyte/actions/workflows/build-and-test.yml/badge.svg?style=flat-square)
 24 | [![discord server](https://img.shields.io/discord/688766934917185556?label=discord&style=flat-square)](https://discord.qovery.com)
 25 | 
 26 | ## Prerequisites
 27 | 
 28 | - MacOSX / Linux / Windows
 29 | - Nothing more! Replibyte is stateless and does not require anything special.
 30 | 
 31 | ## Usage
 32 | 
 33 | Create a dump
 34 | 
 35 | ```shell
 36 | replibyte -c conf.yaml dump create
 37 | ```
 38 | 
 39 | List all dumps
 40 | 
 41 | ```shell
 42 | replibyte -c conf.yaml dump list
 43 | 
 44 | type          name                  size    when                    compressed  encrypted
 45 | PostgreSQL    dump-1647706359405    154MB   Yesterday at 03:00 am   true        true
 46 | PostgreSQL    dump-1647731334517    152MB   2 days ago at 03:00 am  true        true
 47 | PostgreSQL    dump-1647734369306    149MB   3 days ago at 03:00 am  true        true
 48 | ```
 49 | 
 50 | Restore the latest dump in a local container
 51 | 
 52 | ```shell
 53 | replibyte -c conf.yaml dump restore local -v latest -i postgres -p 5432
 54 | ```
 55 | 
 56 | Restore the latest dump in a remote database
 57 | 
 58 | ```shell
 59 | replibyte -c conf.yaml dump restore remote -v latest
 60 | ```
 61 | 
 62 | ## Features
 63 | 
 64 | - [x] Support data backup and restore for PostgreSQL, MySQL and MongoDB
 65 | - [x] Replace sensitive data with fake data
 66 | - [x] Works on large database (> 10GB)
 67 | - [x] Database Subsetting: Scale down a production database to a more reasonable size 🔥
 68 | - [x] Start a local database with the prod data in a single command 🔥
 69 | - [x] On-the-fly data (de)compression (Zlib)
 70 | - [x] On-the-fly data de/encryption (AES-256)
 71 | - [x] Fully stateless (no server, no daemon) and lightweight binary 🍃
 72 | - [x] Use custom transformers
 73 | 
 74 | Here are the features we plan to support
 75 | 
 76 | - [ ] Auto-detect and version database schema change
 77 | - [ ] Auto-detect sensitive fields
 78 | - [ ] Auto-clean backed up data
 79 | 
 80 | ## Getting Started
 81 | 
 82 | * [How Replibyte works](/docs/how-replibyte-works)
 83 | * Initial setup:
 84 |   1. [Install](/docs/getting-started/installation)
 85 |   2. [Configure](/docs/getting-started/configuration)
 86 | * Step-by-step guides:
 87 |   1. [Create a dump](/docs/guides/create-a-dump)
 88 |   2. [Restore a dump](/docs/guides/restore-a-dump)
 89 |   3. [Subset a dump](/docs/guides/subset-a-dump)
 90 |   4. [Delete a dump](/docs/guides/delete-a-dump)
 91 |   5. Deploy Replibyte
 92 |       1. [Container](https://www.replibyte.com/docs/guides/deploy-replibyte/container)
 93 |       2. [Qovery](https://www.replibyte.com/docs/guides/deploy-replibyte/qovery)
 94 | 
 95 | ## Demo
 96 | 
 97 | [![What is RepliByte](../../assets/video_.png)](https://www.youtube.com/watch?v=IKeLnZvECQw)
 98 | 
 99 | ## Motivation
100 | 
101 | At [Qovery](https://www.qovery.com) (the company behind Replibyte), developers can clone their applications and databases just with one
102 | click. However, the cloning process can be tedious and time-consuming, and we end up copying the information multiple times. With RepliByte,
103 | the Qovery team wants to provide a comprehensive way to seed cloud databases from one place to another.
104 | 
105 | The long-term motivation behind RepliByte is to provide a way to clone any database in real-time. This project starts small, but has big
106 | ambition!
107 | 


--------------------------------------------------------------------------------
/replibyte/src/transformer/email.rs:
--------------------------------------------------------------------------------
  1 | use crate::transformer::Transformer;
  2 | use crate::types::Column;
  3 | use fake::faker::internet::raw::SafeEmail;
  4 | use fake::locales::EN;
  5 | use fake::Fake;
  6 | 
  7 | /// This struct is dedicated to replacing a string by an email address.
  8 | pub struct EmailTransformer {
  9 |     database_name: String,
 10 |     table_name: String,
 11 |     column_name: String,
 12 | }
 13 | 
 14 | impl EmailTransformer {
 15 |     pub fn new<S>(database_name: S, table_name: S, column_name: S) -> Self
 16 |     where
 17 |         S: Into<String>,
 18 |     {
 19 |         EmailTransformer {
 20 |             database_name: database_name.into(),
 21 |             table_name: table_name.into(),
 22 |             column_name: column_name.into(),
 23 |         }
 24 |     }
 25 | }
 26 | 
 27 | impl Default for EmailTransformer {
 28 |     fn default() -> Self {
 29 |         EmailTransformer {
 30 |             database_name: String::default(),
 31 |             table_name: String::default(),
 32 |             column_name: String::default(),
 33 |         }
 34 |     }
 35 | }
 36 | 
 37 | impl Transformer for EmailTransformer {
 38 |     fn id(&self) -> &str {
 39 |         "email"
 40 |     }
 41 | 
 42 |     fn description(&self) -> &str {
 43 |         "Generate an email address (string only). [john.doe@company.com]->[tony.stark@avengers.com]"
 44 |     }
 45 | 
 46 |     fn database_name(&self) -> &str {
 47 |         self.database_name.as_str()
 48 |     }
 49 | 
 50 |     fn table_name(&self) -> &str {
 51 |         self.table_name.as_str()
 52 |     }
 53 | 
 54 |     fn column_name(&self) -> &str {
 55 |         self.column_name.as_str()
 56 |     }
 57 | 
 58 |     fn transform(&self, column: Column) -> Column {
 59 |         match column {
 60 |             Column::StringValue(column_name, value) => {
 61 |                 let new_value = match value.len() {
 62 |                     len if len == 0 => value,
 63 |                     _ => SafeEmail(EN).fake(),
 64 |                 };
 65 | 
 66 |                 Column::StringValue(column_name, new_value)
 67 |             }
 68 |             column => column,
 69 |         }
 70 |     }
 71 | }
 72 | 
 73 | #[cfg(test)]
 74 | mod tests {
 75 |     use crate::{transformer::Transformer, types::Column};
 76 | 
 77 |     use super::EmailTransformer;
 78 | 
 79 |     #[test]
 80 |     fn transform_email_with_number_value() {
 81 |         let expected_value = 34;
 82 |         let transformer = get_transformer();
 83 |         let column = Column::NumberValue("email".to_string(), expected_value);
 84 |         let transformed_column = transformer.transform(column);
 85 |         let transformed_value = transformed_column.number_value().unwrap();
 86 | 
 87 |         assert_eq!(transformed_value.to_owned(), expected_value)
 88 |     }
 89 | 
 90 |     #[test]
 91 |     fn transform_email_with_float_value() {
 92 |         let expected_value = 1.5;
 93 |         let transformer = get_transformer();
 94 |         let column = Column::FloatNumberValue("email".to_string(), expected_value);
 95 |         let transformed_column = transformer.transform(column);
 96 |         let transformed_value = transformed_column.float_number_value().unwrap();
 97 | 
 98 |         assert_eq!(transformed_value.to_owned(), expected_value)
 99 |     }
100 | 
101 |     #[test]
102 |     fn transform_email_with_empty_string_value() {
103 |         let expected_value = "";
104 |         let transformer = get_transformer();
105 |         let column = Column::StringValue("email".to_string(), expected_value.to_string());
106 |         let transformed_column = transformer.transform(column);
107 |         let transformed_value = transformed_column.string_value().unwrap();
108 | 
109 |         assert_eq!(transformed_value, expected_value)
110 |     }
111 | 
112 |     #[test]
113 |     fn transform_email_with_string_value() {
114 |         let transformer = get_transformer();
115 |         let column = Column::StringValue("email".to_string(), "john.doe@company.com".to_string());
116 |         let transformed_column = transformer.transform(column);
117 |         let transformed_value = transformed_column.string_value().unwrap();
118 | 
119 |         assert!(!transformed_value.is_empty());
120 |         assert_ne!(transformed_value, "john.doe@company.com".to_string());
121 |     }
122 | 
123 |     fn get_transformer() -> EmailTransformer {
124 |         EmailTransformer::new("github", "users", "email")
125 |     }
126 | }
127 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <p align="center"> <img src="assets/RepliByte%20Logo.png" alt="replibyte logo"/> </p>
  2 | 
  3 | <h3 align="center">Seed Your Development Database With Real Data ⚡️</h3>
  4 | <p align="center">Replibyte is a blazingly fast tool to seed your databases with your production data while keeping sensitive data safe 🔥</p>
  5 | 
  6 | <p align="center">
  7 | <a href="https://opensource.org/licenses/MIT"> <img alt="MIT License" src="https://img.shields.io/badge/License-MIT-yellow.svg"> </a>
  8 | <img src="https://img.shields.io/badge/stability-stable-green.svg?style=flat-square" alt="stable badge">
  9 | <img src="https://img.shields.io/badge/stability-stable-green.svg?style=flat-square" alt="stable badge">
 10 | <img src="https://github.com/Qovery/replibyte/actions/workflows/build-and-test.yml/badge.svg?style=flat-square" alt="Build and Tests">
 11 | <a href="https://discord.qovery.com"> <img alt="Discord" src="https://img.shields.io/discord/688766934917185556?label=discord&style=flat-square"> </a>
 12 | </p>
 13 | 
 14 | <p align="center">
 15 | <a href="https://runacap.com/ross-index/q3-2022/" target="_blank" rel="noopener">
 16 | <img style="width: 260px; height: 56px" src="https://runacap.com/wp-content/uploads/2022/10/ROSS_badge_black_Q3_2022.svg" alt="ROSS Index - Fastest Growing Open-Source Startups in Q3 2022 | Runa Capital" width="260" height="56" />
 17 | </a>
 18 | 
 19 | </p>
 20 | 
 21 | ## Prerequisites
 22 | 
 23 | - MacOSX / Linux / Windows
 24 | - Nothing more! Replibyte is stateless and does not require anything special.
 25 | 
 26 | ## Usage
 27 | 
 28 | Create a dump
 29 | 
 30 | ```shell
 31 | replibyte -c conf.yaml dump create
 32 | ```
 33 | 
 34 | List all dumps
 35 | 
 36 | ```shell
 37 | replibyte -c conf.yaml dump list
 38 | 
 39 | type          name                  size    when                    compressed  encrypted
 40 | PostgreSQL    dump-1647706359405    154MB   Yesterday at 03:00 am   true        true
 41 | PostgreSQL    dump-1647731334517    152MB   2 days ago at 03:00 am  true        true
 42 | PostgreSQL    dump-1647734369306    149MB   3 days ago at 03:00 am  true        true
 43 | ```
 44 | 
 45 | Restore the latest dump in a local container
 46 | 
 47 | ```shell
 48 | replibyte -c conf.yaml dump restore local -v latest -i postgres -p 5432
 49 | ```
 50 | 
 51 | Restore the latest dump in a remote database
 52 | 
 53 | ```shell
 54 | replibyte -c conf.yaml dump restore remote -v latest
 55 | ```
 56 | 
 57 | ## Features
 58 | 
 59 | - [x] Support data dump and restore for PostgreSQL, MySQL and MongoDB
 60 | - [x] Analyze your data schema 🔎
 61 | - [x] Replace sensitive data with fake data
 62 | - [x] Works on large database (> 10GB)
 63 | - [x] Database Subsetting: Scale down a production database to a more reasonable size 🔥
 64 | - [x] Start a local database with the production data in a single command 🔥
 65 | - [x] On-the-fly data (de)compression (Zlib)
 66 | - [x] On-the-fly data de/encryption (AES-256)
 67 | - [x] Fully stateless (no server, no daemon) and lightweight binary 🍃
 68 | - [x] Use [custom transformers](examples/wasm)
 69 | 
 70 | Here are the features we plan to support
 71 | 
 72 | - [ ] Auto-detect and version database schema change
 73 | - [ ] Auto-detect sensitive fields
 74 | - [ ] Auto-clean backed up data
 75 | 
 76 | ## Getting Started
 77 | 
 78 | 1. [How Replibyte works](https://www.replibyte.com/docs/how-replibyte-works)
 79 | 2. Initial setup: 
 80 |    1. [Install](https://www.replibyte.com/docs/getting-started/installation)
 81 |    2. [Configure](https://www.replibyte.com/docs/getting-started/configuration)
 82 | 3. Step-by-step guides:
 83 |    1. [Create a dump](https://www.replibyte.com/docs/guides/create-a-dump)
 84 |    2. [Restore a dump](https://www.replibyte.com/docs/guides/restore-a-dump)
 85 |    3. [Subset a dump](https://www.replibyte.com/docs/guides/subset-a-dump)
 86 |    4. [Delete a dump](https://www.replibyte.com/docs/guides/delete-a-dump)
 87 |    5. Deploy Replibyte
 88 |       1. [Container](https://www.replibyte.com/docs/guides/deploy-replibyte/container)
 89 |       2. [Qovery](https://www.replibyte.com/docs/guides/deploy-replibyte/qovery)
 90 | 
 91 | ## Demo
 92 | 
 93 | [![What is RepliByte](assets/video_.png)](https://www.youtube.com/watch?v=IKeLnZvECQw)
 94 | 
 95 | ## Contributing
 96 | 
 97 | Check [here](https://www.replibyte.com/docs/contributing).
 98 | 
 99 | ## Thanks
100 | 
101 | Thanks to all people sharing their ideas to make Replibyte better. We do appreciate it. I would also thank [AirByte](https://airbyte.com/),
102 | a great product and a trustworthy source of inspiration for this project.
103 | 
104 | ---
105 | 
106 | Replibyte is initiated and maintained by [Qovery](https://www.qovery.com?ref=replibyte-readme). 
107 | 


--------------------------------------------------------------------------------
/replibyte/src/cli.rs:
--------------------------------------------------------------------------------
  1 | use std::path::PathBuf;
  2 | 
  3 | use clap::{Args, Parser, Subcommand};
  4 | 
  5 | /// Replibyte is a tool to seed your databases with your production data while keeping sensitive data safe, just pass `-h`
  6 | #[derive(Parser, Debug)]
  7 | #[clap(version, about, long_about = None)]
  8 | #[clap(propagate_version = true)]
  9 | pub struct CLI {
 10 |     /// Replibyte configuration file
 11 |     #[clap(short, long, parse(from_os_str), value_name = "configuration file")]
 12 |     pub config: PathBuf,
 13 |     #[clap(subcommand)]
 14 |     pub sub_commands: SubCommand,
 15 |     /// disable telemetry
 16 |     #[clap(short, long)]
 17 |     pub no_telemetry: bool,
 18 | }
 19 | 
 20 | /// sub commands
 21 | #[derive(Subcommand, Debug)]
 22 | pub enum SubCommand {
 23 |     /// all dump commands
 24 |     #[clap(subcommand)]
 25 |     Dump(DumpCommand),
 26 |     /// all source commands
 27 |     #[clap(subcommand)]
 28 |     Source(SourceCommand),
 29 |     /// all transformer commands
 30 |     #[clap(subcommand)]
 31 |     Transformer(TransformerCommand),
 32 | }
 33 | 
 34 | /// all dump commands
 35 | #[derive(Subcommand, Debug)]
 36 | pub enum DumpCommand {
 37 |     /// list available dumps
 38 |     List,
 39 |     /// launch dump -- use `-h` to show all the options
 40 |     Create(DumpCreateArgs),
 41 |     /// all restore commands
 42 |     #[clap(subcommand)]
 43 |     Restore(RestoreCommand),
 44 |     /// delete a dump from the defined datastore
 45 |     Delete(DumpDeleteArgs),
 46 | }
 47 | 
 48 | /// all transformer commands
 49 | #[derive(Subcommand, Debug)]
 50 | pub enum TransformerCommand {
 51 |     /// list available transformers
 52 |     List,
 53 | }
 54 | 
 55 | /// all restore commands
 56 | #[derive(Subcommand, Debug)]
 57 | pub enum RestoreCommand {
 58 |     /// Restore dump inside a local Docker container
 59 |     Local(RestoreLocalArgs),
 60 |     /// Restore dump inside the configured destination
 61 |     Remote(RestoreArgs),
 62 | }
 63 | 
 64 | /// all restore commands
 65 | #[derive(Args, Debug)]
 66 | pub struct RestoreArgs {
 67 |     /// restore dump -- set `latest` or `<dump name>` - use `dump list` command to list all dumps available
 68 |     #[clap(short, long, value_name = "[latest | dump name]")]
 69 |     pub value: String,
 70 |     /// stream output on stdout
 71 |     #[clap(short, long)]
 72 |     pub output: bool,
 73 | }
 74 | 
 75 | /// restore dump in a local Docker container
 76 | #[derive(Args, Debug)]
 77 | pub struct RestoreLocalArgs {
 78 |     /// restore dump -- set `latest` or `<dump name>` - use `dump list` command to list all dumps available
 79 |     #[clap(short, long, value_name = "[latest | dump name]")]
 80 |     pub value: String,
 81 |     /// stream output on stdout
 82 |     #[clap(short, long)]
 83 |     pub output: bool,
 84 |     /// Docker image tag for the container to spawn
 85 |     #[clap(short, long)]
 86 |     pub tag: Option<String>,
 87 |     /// Docker container port to map on the host
 88 |     #[clap(short, long)]
 89 |     pub port: Option<u16>,
 90 |     /// Remove the Docker container on Ctrl-c
 91 |     #[clap(short, long)]
 92 |     pub remove: bool,
 93 |     /// Docker image type
 94 |     #[clap(short, long, value_name = "[postgresql | mysql | mongodb]")]
 95 |     pub image: Option<String>,
 96 | }
 97 | 
 98 | /// all dump run commands
 99 | #[derive(Args, Debug)]
100 | pub struct DumpCreateArgs {
101 |     #[clap(name = "source_type", short, long, value_name = "[postgresql | mysql | mongodb]", possible_values = &["postgresql", "mysql", "mongodb"], requires = "input")]
102 |     /// database source type to import
103 |     pub source_type: Option<String>,
104 |     /// import dump from stdin
105 |     #[clap(name = "input", short, long, requires = "source_type")]
106 |     pub input: bool,
107 |     #[clap(short, long, parse(from_os_str), value_name = "dump file")]
108 |     /// dump file
109 |     pub file: Option<PathBuf>,
110 |     /// dump name
111 |     #[clap(short, long)]
112 |     pub name: Option<String>,
113 | }
114 | 
115 | #[derive(Args, Debug)]
116 | #[clap(group = clap::ArgGroup::new("delete-mode").multiple(false))]
117 | pub struct DumpDeleteArgs {
118 |     /// Name of the dump to delete
119 |     #[clap(group = "delete-mode")]
120 |     pub dump: Option<String>,
121 |     /// Remove all dumps older than the specified number of days. Example: `14d` for deleting dumps older than 14 days
122 |     #[clap(long, group = "delete-mode")]
123 |     pub older_than: Option<String>,
124 |     /// Keep only the last N dumps
125 |     #[clap(long, group = "delete-mode")]
126 |     pub keep_last: Option<usize>,
127 | }
128 | 
129 | /// all source commands
130 | #[derive(Subcommand, Debug)]
131 | pub enum SourceCommand {
132 |     /// Show the database schema. When used with MongoDB, the schema will be probabilistic and returned as a JSON document
133 |     Schema,
134 | }
135 | 


--------------------------------------------------------------------------------
/replibyte/src/destination/docker.rs:
--------------------------------------------------------------------------------
  1 | use std::io::{Error, ErrorKind};
  2 | use std::process::{Child, Command, Stdio};
  3 | use std::thread;
  4 | use std::time::Duration;
  5 | 
  6 | pub const DOCKER_BINARY_NAME: &str = "docker";
  7 | 
  8 | pub struct Image {
  9 |     pub name: String,
 10 |     pub tag: String,
 11 | }
 12 | 
 13 | pub struct ContainerOptions {
 14 |     pub host_port: u16,
 15 |     pub container_port: u16,
 16 | }
 17 | 
 18 | pub struct Container {
 19 |     pub id: String,
 20 | }
 21 | 
 22 | impl Container {
 23 |     pub fn new(
 24 |         image: &Image,
 25 |         options: &ContainerOptions,
 26 |         args: Vec<&str>,
 27 |         command: Option<Vec<&str>>,
 28 |     ) -> Result<Container, Error> {
 29 |         let port_mapping = format!("{}:{}", options.host_port, options.container_port);
 30 |         let image_version = format!("{}:{}", image.name, image.tag);
 31 |         let mut run_args = vec!["run", "-p", port_mapping.as_str()];
 32 | 
 33 |         for arg in args {
 34 |             run_args.push(arg);
 35 |         }
 36 | 
 37 |         run_args.push("-d");
 38 |         run_args.push(image_version.as_str());
 39 | 
 40 |         if let Some(command) = command {
 41 |             for arg in command {
 42 |                 run_args.push(arg);
 43 |             }
 44 |         }
 45 | 
 46 |         let output = Command::new(DOCKER_BINARY_NAME).args(run_args).output()?;
 47 | 
 48 |         // FIX: this is a workaround to wait until the container is up
 49 |         thread::sleep(Duration::from_millis(20_000));
 50 | 
 51 |         match output.status.success() {
 52 |             true => match String::from_utf8(output.stdout) {
 53 |                 Ok(container_id) => Ok(Container { id: container_id }),
 54 |                 Err(err) => Err(Error::new(ErrorKind::Other, format!("{}", err))),
 55 |             },
 56 |             false => match String::from_utf8(output.stderr) {
 57 |                 Ok(stderr) => Err(Error::new(ErrorKind::Other, format!("{}", stderr))),
 58 |                 Err(err) => Err(Error::new(ErrorKind::Other, format!("{}", err))),
 59 |             },
 60 |         }
 61 |     }
 62 | 
 63 |     pub fn stop(&self) -> Result<(), Error> {
 64 |         let _process = Command::new(DOCKER_BINARY_NAME)
 65 |             .args(["stop", &self.id[..12]])
 66 |             .stdout(Stdio::null())
 67 |             .spawn()?;
 68 | 
 69 |         Ok(())
 70 |     }
 71 | 
 72 |     pub fn rm(&self) -> Result<(), Error> {
 73 |         let _process = Command::new(DOCKER_BINARY_NAME)
 74 |             .args(["rm", "-f", &self.id[..12]])
 75 |             .stdout(Stdio::null())
 76 |             .spawn()?;
 77 | 
 78 |         // TODO: should I drop the struct?
 79 |         drop(&self);
 80 | 
 81 |         Ok(())
 82 |     }
 83 | 
 84 |     pub fn exec(&self, cmd: &str) -> Result<Child, Error> {
 85 |         Command::new(DOCKER_BINARY_NAME)
 86 |             .args(["exec", "-i", &self.id[..12], "/bin/bash", "-c", cmd])
 87 |             .stdin(Stdio::piped())
 88 |             .stdout(Stdio::piped())
 89 |             .spawn()
 90 |     }
 91 | }
 92 | 
 93 | /// checks if the `dockerd` daemon runs
 94 | pub fn daemon_is_running() -> Result<(), Error> {
 95 |     let mut process = Command::new(DOCKER_BINARY_NAME)
 96 |         .args(["ps"])
 97 |         .stdout(Stdio::null())
 98 |         .spawn()?;
 99 | 
100 |     match process.wait() {
101 |         Ok(exit_status) => {
102 |             if exit_status.success() {
103 |                 Ok(())
104 |             } else {
105 |                 Err(Error::new(
106 |                     ErrorKind::Other,
107 |                     format!(
108 |                         "cannot connect to the Docker daemon: exit_status {}",
109 |                         exit_status.to_string()
110 |                     ),
111 |                 ))
112 |             }
113 |         }
114 |         Err(err) => Err(Error::new(
115 |             ErrorKind::Other,
116 |             format!("cannot connect to the Docker daemon: {}", err),
117 |         )),
118 |     }
119 | }
120 | 
121 | #[cfg(test)]
122 | mod tests {
123 |     use super::{Container, ContainerOptions, Image};
124 | 
125 |     #[test]
126 |     fn handle_containers() {
127 |         let image = Image {
128 |             name: "postgres".to_string(),
129 |             tag: "13".to_string(),
130 |         };
131 | 
132 |         let options = ContainerOptions {
133 |             host_port: 5433,
134 |             container_port: 5432,
135 |         };
136 | 
137 |         let args = vec![
138 |             "-e",
139 |             "POSTGRES_PASSWORD=password",
140 |             "-e",
141 |             "POSTGRES_USER=root",
142 |         ];
143 | 
144 |         let container = Container::new(&image, &options, args, None).unwrap();
145 | 
146 |         assert!(container.id != "".to_string());
147 |         assert!(container.stop().is_ok());
148 |         assert!(container.rm().is_ok());
149 |     }
150 | }
151 | 


--------------------------------------------------------------------------------
/replibyte/src/transformer/first_name.rs:
--------------------------------------------------------------------------------
  1 | use crate::transformer::Transformer;
  2 | use crate::types::Column;
  3 | use fake::faker::name::raw::FirstName;
  4 | use fake::locales::EN;
  5 | use fake::Fake;
  6 | 
  7 | /// This struct is dedicated to replacing string by a first name.
  8 | pub struct FirstNameTransformer {
  9 |     database_name: String,
 10 |     table_name: String,
 11 |     column_name: String,
 12 | }
 13 | 
 14 | impl FirstNameTransformer {
 15 |     pub fn new<S>(database_name: S, table_name: S, column_name: S) -> Self
 16 |     where
 17 |         S: Into<String>,
 18 |     {
 19 |         FirstNameTransformer {
 20 |             database_name: database_name.into(),
 21 |             table_name: table_name.into(),
 22 |             column_name: column_name.into(),
 23 |         }
 24 |     }
 25 | }
 26 | 
 27 | impl Default for FirstNameTransformer {
 28 |     fn default() -> Self {
 29 |         FirstNameTransformer {
 30 |             database_name: String::default(),
 31 |             table_name: String::default(),
 32 |             column_name: String::default(),
 33 |         }
 34 |     }
 35 | }
 36 | 
 37 | impl Transformer for FirstNameTransformer {
 38 |     fn id(&self) -> &str {
 39 |         "first-name"
 40 |     }
 41 | 
 42 |     fn description(&self) -> &str {
 43 |         "Generate a first name (string only). [Lucas]->[Georges]"
 44 |     }
 45 | 
 46 |     fn database_name(&self) -> &str {
 47 |         self.database_name.as_str()
 48 |     }
 49 | 
 50 |     fn table_name(&self) -> &str {
 51 |         self.table_name.as_str()
 52 |     }
 53 | 
 54 |     fn column_name(&self) -> &str {
 55 |         self.column_name.as_str()
 56 |     }
 57 | 
 58 |     fn transform(&self, column: Column) -> Column {
 59 |         match column {
 60 |             Column::NumberValue(column_name, value) => Column::NumberValue(column_name, value),
 61 |             Column::FloatNumberValue(column_name, value) => {
 62 |                 Column::FloatNumberValue(column_name, value)
 63 |             }
 64 |             Column::StringValue(column_name, value) => {
 65 |                 let new_value = if value == "" {
 66 |                     "".to_string()
 67 |                 } else {
 68 |                     FirstName(EN).fake()
 69 |                 };
 70 | 
 71 |                 Column::StringValue(column_name, new_value)
 72 |             }
 73 |             Column::CharValue(column_name, value) => Column::CharValue(column_name, value),
 74 |             Column::BooleanValue(column_name, value) => Column::BooleanValue(column_name, value),
 75 |             Column::None(column_name) => Column::None(column_name),
 76 |         }
 77 |     }
 78 | }
 79 | 
 80 | #[cfg(test)]
 81 | mod tests {
 82 |     use crate::{transformer::Transformer, types::Column};
 83 | 
 84 |     use super::FirstNameTransformer;
 85 | 
 86 |     #[test]
 87 |     fn transform_first_name_with_number_value() {
 88 |         let expected_value = 34;
 89 |         let transformer = get_transformer();
 90 |         let column = Column::NumberValue("first_name".to_string(), expected_value);
 91 |         let transformed_column = transformer.transform(column);
 92 |         let transformed_value = transformed_column.number_value().unwrap();
 93 | 
 94 |         assert_eq!(transformed_value.to_owned(), expected_value)
 95 |     }
 96 | 
 97 |     #[test]
 98 |     fn transform_first_name_with_float_value() {
 99 |         let expected_value = 1.5;
100 |         let transformer = get_transformer();
101 |         let column = Column::FloatNumberValue("first_name".to_string(), expected_value);
102 |         let transformed_column = transformer.transform(column);
103 |         let transformed_value = transformed_column.float_number_value().unwrap();
104 | 
105 |         assert_eq!(transformed_value.to_owned(), expected_value)
106 |     }
107 | 
108 |     #[test]
109 |     fn transform_first_name_with_empty_string_value() {
110 |         let expected_value = "";
111 |         let transformer = get_transformer();
112 |         let column = Column::StringValue("first_name".to_string(), expected_value.to_string());
113 |         let transformed_column = transformer.transform(column);
114 |         let transformed_value = transformed_column.string_value().unwrap();
115 | 
116 |         assert_eq!(transformed_value, expected_value)
117 |     }
118 | 
119 |     #[test]
120 |     fn transform_first_name_with_string_value() {
121 |         let transformer = get_transformer();
122 |         let column = Column::StringValue("first_name".to_string(), "Lucas".to_string());
123 |         let transformed_column = transformer.transform(column);
124 |         let transformed_value = transformed_column.string_value().unwrap();
125 | 
126 |         assert!(!transformed_value.is_empty());
127 |         assert_ne!(transformed_value, "Lucas".to_string());
128 |     }
129 | 
130 |     fn get_transformer() -> FirstNameTransformer {
131 |         FirstNameTransformer::new("github", "users", "first_name")
132 |     }
133 | }
134 | 


--------------------------------------------------------------------------------
/website/docs/datastores.mdx:
--------------------------------------------------------------------------------
  1 | ---
  2 | sidebar_position: 6
  3 | ---
  4 | 
  5 | # Datastores
  6 | 
  7 | Replibyte supports multiple datastores like AWS S3, GCP Cloud Storage (any other S3 compatible service), and your local disk.
  8 | 
  9 | ## AWS S3
 10 | 
 11 | You can use the default ways of configuring AWS credentials, but if you need to specify customized credentials or a profile
 12 | to use you can do so via the datastore configuration.
 13 | 
 14 | ### Generate API Keys
 15 | 
 16 | To generate your API keys:
 17 | 
 18 | ![](/img/datastore/aws/01.png)
 19 | 
 20 | 1. Sign in into your AWS console
 21 | 2. Click on **IAM** or search for **IAM**
 22 | 3. Select the **Users** page.
 23 | 4. Click on **Add users**.
 24 | 
 25 | 5. Create a user with "**Access Key - Programmatic Access**"
 26 | ![](/img/datastore/aws/02.png)
 27 | 
 28 | :::caution
 29 | 
 30 | Restrict access as much as possible. Replibyte just need write/read access within a dedicated bucket.
 31 | 
 32 | :::
 33 | 
 34 | 6. Set the permissions you need for this user.
 35 | ![](/img/datastore/aws/03.png)
 36 | 
 37 | Here's an example IAM Policy for a user to be able to read/write to a specific bucket.
 38 | ```json
 39 | {
 40 |     "Version": "2012-10-17",
 41 |     "Statement": [
 42 |         {
 43 |             "Sid": "VisualEditor0",
 44 |             "Effect": "Allow",
 45 |             "Action": [
 46 |                 "s3:Get*",
 47 |                 "s3:List*",
 48 |                 "s3:Put*"
 49 |             ],
 50 |             "Resource": [
 51 |                 "arn:aws:s3:::your-bucket-name-here",
 52 |                 "arn:aws:s3:::your-bucket-name-here/*"
 53 |             ]
 54 |         }
 55 |     ]
 56 | }
 57 | ```
 58 | 
 59 | 7. Copy the **Access key** and the **Secret**
 60 | 
 61 | ### Replibyte configuration
 62 | 
 63 | Here is the datastore configuration to use:
 64 | 
 65 | ```yaml
 66 | ...
 67 | datastore:
 68 |   aws:
 69 |     bucket: <your_bucket>
 70 |     profile: <your_profile> # optional
 71 |     region: <your_region> # optional
 72 |     credentials: # optional
 73 |       access_key_id: XXX
 74 |       secret_access_key: XXX
 75 |       session_token: XXX # optional
 76 | ...
 77 | ```
 78 | 
 79 | You can omit the optional properties in which case the default configuration mechanisms will be used, like with the AWS CLI.
 80 | 
 81 | ## GCP Cloud Storage
 82 | 
 83 | ### Generate API Keys
 84 | 
 85 | To generate your API keys compatible with the S3 protocol:
 86 | 
 87 | ![Screenshot to configure GCP cloud storage](/img/datastore/gcp/01.jpg)
 88 | 
 89 | 1. Sign in into your GCP console
 90 | 2. Click on the Cloud Storage
 91 | 3. Click on the **INTEROPERABILITY** tab
 92 | 4. Click on **CREATE A KEY**
 93 | 5. Copy the **Access key** and the **Secret**
 94 | 
 95 | ### Create bucket
 96 | 
 97 | Replibyte does not create the bucket automatically for GCP Cloud Storage. You'll need to create it manually.
 98 | 
 99 | :::caution
100 | 
101 | Do not turn on object "versioning".
102 | 
103 | :::
104 | 
105 | ### Replibyte configuration
106 | 
107 | Here is the datastore configuration to use:
108 | 
109 | ```yaml
110 | ...
111 | datastore:
112 |   gcp:
113 |     bucket: your-bucket-name
114 |     region: us-central1
115 |     access_key: $GS_ACCESS_KEY
116 |     secret: $GS_SECRET
117 | ...
118 | ```
119 | 
120 | You can find the GCP Cloud Storage bucket locations [here](https://cloud.google.com/storage/docs/locations).
121 | 
122 | ## Other S3 compatible
123 | 
124 | Refer to [AWS S3](#aws-s3) for the default S3 wire compatible protocol and the custom endpoint parameter:
125 | 
126 | ```yaml
127 | ...
128 | datastore:
129 |   aws:
130 |     bucket: <your_bucket>
131 |     region: <your_region>
132 |     credentials:
133 |       access_key_id: XXX
134 |       secret_access_key: XXX
135 |     endpoint:
136 |       custom: 'https://your-s3-compatible-endpoint'
137 | ...
138 | ```
139 | 
140 | `access_key_id` and `secret_access_key` must be valid hash-based message authentication code (HMAC) keys. Refer to the service to use to get those keys.
141 | 
142 | ## Local disk
143 | 
144 | ### Create a directory
145 | 
146 | Replibyte does not create the directory automatically for you. You'll need to create it manually.
147 | 
148 | For example, you can create a directory by running:
149 | 
150 | ```sh
151 | mkdir /data/replibyte
152 | ```
153 | 
154 | ### Replibyte configuration
155 | 
156 | Here is the datastore configuration to use:
157 | 
158 | ```yaml
159 | ...
160 | datastore:
161 |   local_disk:
162 |     dir: <your_directory_path>
163 | ...
164 | ```
165 | 
166 | `dir` must be a readable and writable directory to the user running `replibyte`.
167 | 
168 | So, to use our previously created `/data/replibyte` directory, the datastore config must be:
169 | 
170 | ```yaml
171 | ...
172 | datastore:
173 |   local_disk:
174 |     dir: /data/replibyte
175 | ...
176 | ```
177 | 
178 | ## Add another datastore
179 | 
180 | Do you need another datastore? Replibyte is extensible and any datastore can be supported. You are free to contribute by opening an issue or/and a pull request.
181 | 
182 | To contribute, please see the [contributing](/docs/contributing) page.
183 | 


--------------------------------------------------------------------------------
/replibyte/src/destination/mongodb_docker.rs:
--------------------------------------------------------------------------------
  1 | use crate::connector::Connector;
  2 | use crate::destination::docker::{
  3 |     daemon_is_running, Container, ContainerOptions, Image, DOCKER_BINARY_NAME,
  4 | };
  5 | use crate::destination::Destination;
  6 | use crate::types::Bytes;
  7 | use crate::utils::binary_exists;
  8 | use std::io::{Error, ErrorKind, Write};
  9 | 
 10 | const DEFAULT_MONGO_IMAGE: &str = "mongo";
 11 | pub const DEFAULT_MONGO_IMAGE_TAG: &str = "5";
 12 | pub const DEFAULT_MONGO_CONTAINER_PORT: u16 = 27017;
 13 | const DEFAULT_MONGO_USER: &str = "root";
 14 | const DEFAULT_MONGO_PASSWORD: &str = "password";
 15 | 
 16 | pub struct MongoDBDocker {
 17 |     pub image: Image,
 18 |     pub options: ContainerOptions,
 19 |     pub container: Option<Container>,
 20 | }
 21 | 
 22 | impl MongoDBDocker {
 23 |     pub fn new(tag: String, port: u16) -> Self {
 24 |         Self {
 25 |             image: Image {
 26 |                 name: DEFAULT_MONGO_IMAGE.to_string(),
 27 |                 tag,
 28 |             },
 29 |             options: ContainerOptions {
 30 |                 host_port: port,
 31 |                 container_port: DEFAULT_MONGO_CONTAINER_PORT,
 32 |             },
 33 |             container: None,
 34 |         }
 35 |     }
 36 | }
 37 | 
 38 | impl Connector for MongoDBDocker {
 39 |     fn init(&mut self) -> Result<(), Error> {
 40 |         let _ = binary_exists(DOCKER_BINARY_NAME)?;
 41 |         let _ = daemon_is_running()?;
 42 | 
 43 |         let password_env = format!("MONGO_INITDB_ROOT_USERNAME={}", DEFAULT_MONGO_USER);
 44 |         let user_env = format!("MONGO_INITDB_ROOT_PASSWORD={}", DEFAULT_MONGO_PASSWORD);
 45 |         let container = Container::new(
 46 |             &self.image,
 47 |             &self.options,
 48 |             vec!["-e", password_env.as_str(), "-e", user_env.as_str()],
 49 |             None,
 50 |         )?;
 51 | 
 52 |         self.container = Some(container);
 53 |         Ok(())
 54 |     }
 55 | }
 56 | 
 57 | impl Destination for MongoDBDocker {
 58 |     fn write(&self, data: Bytes) -> Result<(), Error> {
 59 |         let cmd = format!(
 60 |             "mongorestore --authenticationDatabase admin -u {} -p {} --archive",
 61 |             DEFAULT_MONGO_USER, DEFAULT_MONGO_PASSWORD,
 62 |         );
 63 | 
 64 |         match &self.container {
 65 |             Some(container) => {
 66 |                 let mut container_exec = container.exec(&cmd)?;
 67 |                 let _ = container_exec
 68 |                     .stdin
 69 |                     .take()
 70 |                     .unwrap()
 71 |                     .write_all(&data[..data.len() - 1]); // remove trailing null terminator, or else mongorestore will fail
 72 | 
 73 |                 let exit_status = container_exec.wait()?;
 74 |                 if !exit_status.success() {
 75 |                     return Err(Error::new(
 76 |                         ErrorKind::Other,
 77 |                         format!("command error: {:?}", exit_status.to_string()),
 78 |                     ));
 79 |                 }
 80 | 
 81 |                 Ok(())
 82 |             }
 83 |             None => Err(Error::new(
 84 |                 ErrorKind::Other,
 85 |                 "command error: cannot retrieve container",
 86 |             )),
 87 |         }
 88 |     }
 89 | }
 90 | 
 91 | #[cfg(test)]
 92 | mod tests {
 93 |     use dump_parser::utils::decode_hex;
 94 | 
 95 |     use crate::connector::Connector;
 96 |     use crate::destination::mongodb_docker::MongoDBDocker;
 97 |     use crate::destination::Destination;
 98 | 
 99 |     fn get_mongodb() -> MongoDBDocker {
100 |         MongoDBDocker::new("5".to_string(), 27021)
101 |     }
102 | 
103 |     fn get_invalid_mongodb() -> MongoDBDocker {
104 |         MongoDBDocker::new("bad_tag".to_string(), 27021)
105 |     }
106 | 
107 |     #[test]
108 |     fn connect() {
109 |         let mut p = get_mongodb();
110 |         let _ = p.init().expect("can't init mongodb");
111 |         let bytes = decode_hex("6de299816600000010636f6e63757272656e745f636f6c6c656374696f6e7300040000000276657273696f6e0004000000302e3100027365727665725f76657273696f6e0006000000352e302e360002746f6f6c5f76657273696f6e00080000003130302e352e320000020100000264620005000000746573740002636f6c6c656374696f6e0006000000757365727300026d6574616461746100ad0000007b22696e6465786573223a5b7b2276223a7b22246e756d626572496e74223a2232227d2c226b6579223a7b225f6964223a7b22246e756d626572496e74223a2231227d7d2c226e616d65223a225f69645f227d5d2c2275756964223a223464363734323637316333613463663938316439386164373831343735333234222c22636f6c6c656374696f6e4e616d65223a227573657273222c2274797065223a22636f6c6c656374696f6e227d001073697a6500000000000274797065000b000000636f6c6c656374696f6e0000ffffffff3b0000000264620005000000746573740002636f6c6c656374696f6e000600000075736572730008454f4600001243524300000000000000000000ffffffff3b0000000264620005000000746573740002636f6c6c656374696f6e000600000075736572730008454f4600011243524300000000000000000000ffffffff00").unwrap();
112 |         assert!(p.write(bytes.to_vec()).is_ok());
113 | 
114 |         // cleanup container
115 |         let _ = p.container.unwrap().rm();
116 | 
117 |         let mut p = get_invalid_mongodb();
118 |         assert!(p.init().is_err());
119 |         assert!(p.write(bytes.to_vec()).is_err());
120 |     }
121 | }
122 | 


--------------------------------------------------------------------------------
/website/docs/guides/deploy-replibyte/container.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: Container
  3 | sidebar_position: 1
  4 | ---
  5 | 
  6 | # Deploy Replibyte as a container
  7 | 
  8 | You are using Replibyte on your local machine to [create](/docs/guides/create-a-dump) and [restore dumps](/docs/guides/restore-a-dump), it's great, but now you might want to deploy it close to your production and development environments to automate the process. This step-by-step guide explains how to do it and share you best practices.
  9 | 
 10 | :::note for qovery users
 11 | 
 12 | To deploy with [Qovery](https://www.qovery.com), follow [this guide](/docs/guides/deploy-replibyte/qovery)
 13 | 
 14 | :::
 15 | 
 16 | Here is a schema of what we are going to put in place.
 17 | 
 18 | ![schema Replibyte](/img/replibyte_dump_and_restore.jpg)
 19 | 
 20 | 1. In production:
 21 |    1. Replibyte periodically dump the production database and..
 22 |    2. upload a dump **without the sensitive data** on a S3 bucket.
 23 | 2. In development:
 24 |    1. Replibyte periodically restore the development database with the latest dump.
 25 | 
 26 | Let's go!
 27 | 
 28 | ## Run Replibyte container locally
 29 | 
 30 | ### Download the official Replibyte image
 31 | 
 32 | ```sh
 33 | docker pull ghcr.io/qovery/replibyte
 34 | ```
 35 | 
 36 | Check the [Github package](https://github.com/qovery/replibyte/pkgs/container/replibyte) for available tags (currently `latest` and per git release tag).
 37 | 
 38 | :::
 39 | 
 40 | ### Create Replibyte configuration file
 41 | 
 42 | I will take our final `conf.yaml` file from the ["create a dump"](/docs/guides/create-a-dump) guide and rename it into `replibyte.yaml`.
 43 | 
 44 | :::caution
 45 | 
 46 | You must name your replibyte configuration file `replibyte.yaml`. Otherwise, it will not work.
 47 | 
 48 | :::
 49 | 
 50 | ```yaml title="replibyte.yaml"
 51 | encryption_key: $ENCRYPTION_SECRET # put a secure secret here
 52 | source:
 53 |   connection_uri: $SOURCE_CONNECTION_URI
 54 |   transformers:
 55 |     - database: public
 56 |       table: customers
 57 |       columns:
 58 |         - name: first_name
 59 |           transformer_name: first-name
 60 |         - name: last_name
 61 |           transformer_name: random
 62 |         - name: contact_phone
 63 |           transformer_name: phone-number
 64 |         - name: contact_email
 65 |           transformer_name: email
 66 | datastore:
 67 |   aws:
 68 |     bucket: $S3_BUCKET
 69 |     region: $S3_REGION
 70 |     access_key_id: $S3_ACCESS_KEY_ID
 71 |     secret_access_key: $S3_SECRET_ACCESS_KEY
 72 | destination:
 73 |   connection_uri: $DESTINATION_CONNECTION_URI
 74 | ```
 75 | 
 76 | And set your environment variables in a file. You can leave secure environment variables empty so that they read from the shell environment.
 77 | 
 78 | ```sh
 79 | $ cat env.txt
 80 | S3_ACCESS_KEY_ID
 81 | S3_SECRET_ACCESS_KEY
 82 | S3_REGION=us-east-2
 83 | S3_BUCKET=my-test-bucket
 84 | SOURCE_CONNECTION_URI=postgres://...
 85 | DESTINATION_CONNECTION_URI=postgres://...
 86 | ENCRYPTION_SECRET
 87 | ```
 88 | 
 89 | ### Start the container
 90 | 
 91 | ```sh
 92 | docker run -it --name replibyte \
 93 |     --env-file env.txt \
 94 |     -v "$(pwd)/replibyte.yaml":/replibyte.yaml:ro \
 95 |     ghcr.io/qovery/replibyte \
 96 | ```
 97 | 
 98 | ## Running in a cloud environment
 99 | 
100 | ### Deploy with Qovery
101 | 
102 | ---
103 | 
104 | :::info
105 | 
106 | [Qovery](https://www.qovery.com) (the company behind Replibyte) is a platform used by more than 20 000 developers to deploy their apps on AWS in just a few seconds. Replibyte will be natively supported by Qovery in Q4 2022.
107 | 
108 | :::
109 | 
110 | To deploy Replibyte with Qovery - [here are the instructions](/docs/guides/deploy-replibyte/qovery).
111 | 
112 | ---
113 | 
114 | ### Self-hosted Deployment
115 | 
116 | This part depends on the platform (E.g Kubernetes, Docker Swarm, Nomad...) you use to deploy your containers. Basically, you just need to pull the container and run it with the right parameters.
117 | 
118 | ### Parameters for production
119 | 
120 | Here is the command line to dump the production
121 | 
122 | ```bash
123 | docker run -e S3_ACCESS_KEY_ID=XXX \
124 |            -e S3_SECRET_ACCESS_KEY=YYY \
125 |            -e S3_REGION=us-east-2 \
126 |            -e S3_BUCKET=my-test-bucket \
127 |            -e SOURCE_CONNECTION_URI=postgres://... \
128 |            -e DESTINATION_CONNECTION_URI=postgres://... \
129 |            -e ENCRYPTION_SECRET=itIsASecret \
130 |            ghcr.io/qovery/replibyte replibyte dump create
131 | ```
132 | 
133 | ### Parameters to seed development databases
134 | 
135 | Here is the command line to seed your development database with the latest production dump
136 | 
137 | ```bash
138 | docker run -e S3_ACCESS_KEY_ID=XXX \
139 |            -e S3_SECRET_ACCESS_KEY=YYY \
140 |            -e S3_REGION=us-east-2 \
141 |            -e S3_BUCKET=my-test-bucket \
142 |            -e SOURCE_CONNECTION_URI=postgres://... \
143 |            -e DESTINATION_CONNECTION_URI=postgres://... \
144 |            -e ENCRYPTION_SECRET=itIsASecret \
145 |            ghcr.io/qovery/replibyte replibyte dump restore remote -v latest
146 | ```
147 | 
148 | ---
149 | 
150 | Do you have any questions? Feel free to join the channel #replibyte on [our Discord server](https://discord.qovery.com).
151 | 


--------------------------------------------------------------------------------
/replibyte/src/transformer/keep_first_char.rs:
--------------------------------------------------------------------------------
  1 | use crate::transformer::Transformer;
  2 | use crate::types::Column;
  3 | 
  4 | pub struct KeepFirstCharTransformer {
  5 |     database_name: String,
  6 |     table_name: String,
  7 |     column_name: String,
  8 | }
  9 | 
 10 | impl KeepFirstCharTransformer {
 11 |     pub fn new<S>(database_name: S, table_name: S, column_name: S) -> Self
 12 |     where
 13 |         S: Into<String>,
 14 |     {
 15 |         KeepFirstCharTransformer {
 16 |             database_name: database_name.into(),
 17 |             table_name: table_name.into(),
 18 |             column_name: column_name.into(),
 19 |         }
 20 |     }
 21 | }
 22 | 
 23 | impl Default for KeepFirstCharTransformer {
 24 |     fn default() -> Self {
 25 |         KeepFirstCharTransformer {
 26 |             database_name: String::default(),
 27 |             table_name: String::default(),
 28 |             column_name: String::default(),
 29 |         }
 30 |     }
 31 | }
 32 | 
 33 | impl Transformer for KeepFirstCharTransformer {
 34 |     fn id(&self) -> &str {
 35 |         "keep-first-char"
 36 |     }
 37 | 
 38 |     fn description(&self) -> &str {
 39 |         "Keep only the first character of the column."
 40 |     }
 41 | 
 42 |     fn database_name(&self) -> &str {
 43 |         self.database_name.as_str()
 44 |     }
 45 | 
 46 |     fn table_name(&self) -> &str {
 47 |         self.table_name.as_str()
 48 |     }
 49 | 
 50 |     fn column_name(&self) -> &str {
 51 |         self.column_name.as_str()
 52 |     }
 53 | 
 54 |     fn database_and_table_and_column_name(&self) -> String {
 55 |         format!(
 56 |             "{}.{}.{}",
 57 |             self.database_name(),
 58 |             self.table_name(),
 59 |             self.column_name()
 60 |         )
 61 |     }
 62 | 
 63 |     fn transform(&self, column: Column) -> Column {
 64 |         match column {
 65 |             Column::NumberValue(column_name, value) => {
 66 |                 Column::NumberValue(column_name, get_first_digit(value))
 67 |             }
 68 |             Column::StringValue(column_name, value) => {
 69 |                 let new_value = match value.len() {
 70 |                     len if len > 1 => {
 71 |                         if let Some(first_char) = value.chars().nth(0) {
 72 |                             first_char.to_string()
 73 |                         } else {
 74 |                             "".to_string()
 75 |                         }
 76 |                     }
 77 |                     _ => value,
 78 |                 };
 79 | 
 80 |                 Column::StringValue(column_name, new_value)
 81 |             }
 82 |             column => column,
 83 |         }
 84 |     }
 85 | }
 86 | 
 87 | fn get_first_digit(mut number: i128) -> i128 {
 88 |     while number >= 10 {
 89 |         number /= 10;
 90 |     }
 91 | 
 92 |     number
 93 | }
 94 | 
 95 | #[cfg(test)]
 96 | mod tests {
 97 |     use crate::{transformer::Transformer, types::Column};
 98 | 
 99 |     use super::KeepFirstCharTransformer;
100 | 
101 |     #[test]
102 |     fn transform_keep_first_char_only_with_number_value() {
103 |         let transformer = get_transformer();
104 |         let column = Column::NumberValue("a_column".to_string(), 123);
105 |         let transformed_column = transformer.transform(column);
106 |         let transformed_value = transformed_column.number_value().unwrap();
107 |         assert_eq!(transformed_value.to_owned(), 1);
108 | 
109 |         let transformer = get_transformer();
110 |         let column = Column::NumberValue("a_column".to_string(), 1);
111 |         let transformed_column = transformer.transform(column);
112 |         let transformed_value = transformed_column.number_value().unwrap();
113 |         assert_eq!(transformed_value.to_owned(), 1);
114 |     }
115 | 
116 |     #[test]
117 |     fn transform_doesnt_change_with_float_value() {
118 |         let expected_value = 1.5;
119 |         let transformer = get_transformer();
120 |         let column = Column::FloatNumberValue("a_column".to_string(), expected_value);
121 |         let transformed_column = transformer.transform(column);
122 |         let transformed_value = transformed_column.float_number_value().unwrap();
123 | 
124 |         assert_eq!(transformed_value.to_owned(), expected_value);
125 |     }
126 | 
127 |     #[test]
128 |     fn transform_doesnt_change_with_empty_string_value() {
129 |         let expected_value = "";
130 |         let transformer = get_transformer();
131 |         let column = Column::StringValue("a_column".to_string(), expected_value.to_string());
132 |         let transformed_column = transformer.transform(column);
133 |         let transformed_value = transformed_column.string_value().unwrap();
134 |         assert_eq!(transformed_value, expected_value);
135 |     }
136 | 
137 |     #[test]
138 |     fn transform_keep_only_first_char_with_string_value() {
139 |         let transformer = get_transformer();
140 |         let column = Column::StringValue("a_column".to_string(), "Lucas".to_string());
141 |         let transformed_column = transformer.transform(column);
142 |         let transformed_value = transformed_column.string_value().unwrap();
143 |         assert_eq!(transformed_value, "L".to_string());
144 | 
145 |         let column = Column::StringValue("a_column".to_string(), "L".to_string());
146 |         let transformed_column = transformer.transform(column);
147 |         let transformed_value = transformed_column.string_value().unwrap();
148 |         assert_eq!(transformed_value, "L".to_string());
149 |     }
150 | 
151 |     fn get_transformer() -> KeepFirstCharTransformer {
152 |         KeepFirstCharTransformer::new("github", "users", "a_column")
153 |     }
154 | }
155 | 


--------------------------------------------------------------------------------
/.github/workflows/on-release.yml:
--------------------------------------------------------------------------------
  1 | on:
  2 |   release:
  3 |     types:
  4 |       - published
  5 | 
  6 | jobs:
  7 |   build-linux:
  8 |     runs-on: ubuntu-latest
  9 | 
 10 |     steps:
 11 |       - name: Checkout
 12 |         uses: actions/checkout@v1
 13 | 
 14 |       - name: Install latest rust toolchain
 15 |         uses: actions-rs/toolchain@v1
 16 |         with:
 17 |           toolchain: stable
 18 |           default: true
 19 |           override: true
 20 | 
 21 |       - name: Build for linux
 22 |         run: |
 23 |           docker run --rm \
 24 |             --volume "${PWD}":/root/src \
 25 |             --workdir /root/src \
 26 |             joseluisq/rust-linux-darwin-builder:1.60.0 \
 27 |             sh -c "cargo build --release"
 28 | 
 29 |       - name: Prepare release
 30 |         run: |
 31 |           cd target/x86_64-unknown-linux-musl/release
 32 |           EVENT_DATA=$(cat "$GITHUB_EVENT_PATH")
 33 |           RELEASE_NAME=$(echo "$EVENT_DATA" | jq -r .release.tag_name)
 34 |           FILE=replibyte_${RELEASE_NAME}_x86_64-unknown-linux-musl
 35 |           sudo tar -czvf ${FILE}.tar.gz replibyte && sudo rm replibyte
 36 |           sudo touch ${FILE}.tar.gz.sha256sum && sudo chmod 777 ${FILE}.tar.gz.sha256sum
 37 |           sudo sha256sum "${FILE}.tar.gz" | cut -d ' ' -f 1 > ${FILE}.tar.gz.sha256sum
 38 | 
 39 |       - name: Release
 40 |         uses: softprops/action-gh-release@v1
 41 |         with:
 42 |           files: |
 43 |             target/x86_64-unknown-linux-musl/release/replibyte_*
 44 |         env:
 45 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
 46 | 
 47 |   build-windows:
 48 |     runs-on: ubuntu-latest
 49 | 
 50 |     steps:
 51 |       - name: Checkout
 52 |         uses: actions/checkout@v1
 53 | 
 54 |       - name: Install latest rust toolchain
 55 |         uses: actions-rs/toolchain@v1
 56 |         with:
 57 |           toolchain: stable
 58 |           default: true
 59 |           target: x86_64-pc-windows-gnu
 60 |           override: true
 61 | 
 62 |       - name: Build for windows
 63 |         run: |
 64 |           sudo apt-get update && sudo apt-get upgrade -y
 65 |           sudo apt-get install -y g++-mingw-w64-x86-64
 66 |           cargo build --all --release --target x86_64-pc-windows-gnu
 67 | 
 68 |       - name: Prepare release
 69 |         run: |
 70 |           cd target/x86_64-pc-windows-gnu/release
 71 |           EVENT_DATA=$(cat "$GITHUB_EVENT_PATH")
 72 |           RELEASE_NAME=$(echo "$EVENT_DATA" | jq -r .release.tag_name)
 73 |           FILE=replibyte_${RELEASE_NAME}_x86_64-pc-windows-gnu.exe
 74 |           sudo zip -9r ${FILE}.zip replibyte.exe && sudo rm replibyte.exe
 75 |           sudo touch ${FILE}.zip.sha256sum && sudo chmod 777 ${FILE}.zip.sha256sum
 76 |           sudo sha256sum "${FILE}.zip" | cut -d ' ' -f 1 > ${FILE}.zip.sha256sum
 77 | 
 78 |       - name: Release
 79 |         uses: softprops/action-gh-release@v1
 80 |         with:
 81 |           files: target/x86_64-pc-windows-gnu/release/replibyte_*
 82 |         env:
 83 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
 84 | 
 85 |   build-mac:
 86 |     runs-on: ubuntu-latest
 87 |     outputs:
 88 |       sha256sum: ${{ steps.prep.outputs.sha256sum }}
 89 |     steps:
 90 |       - name: Checkout
 91 |         uses: actions/checkout@v1
 92 | 
 93 |       - name: Install latest rust toolchain
 94 |         uses: actions-rs/toolchain@v1
 95 |         with:
 96 |           toolchain: stable
 97 |           target: x86_64-apple-darwin
 98 |           default: true
 99 |           override: true
100 | 
101 |       - name: Build for mac
102 |         run: |
103 |           docker run --rm \
104 |             --volume "${PWD}":/root/src \
105 |             --workdir /root/src \
106 |             joseluisq/rust-linux-darwin-builder:1.60.0 \
107 |             sh -c "CC=o64-clang CXX=o64-clang++ cargo build --release --target x86_64-apple-darwin"
108 | 
109 |       - id: prep
110 |         name: Prepare release
111 |         run: |
112 |           cd target/x86_64-apple-darwin/release
113 |           EVENT_DATA=$(cat "$GITHUB_EVENT_PATH")
114 |           RELEASE_NAME=$(echo "$EVENT_DATA" | jq -r .release.tag_name)
115 |           FILE=replibyte_${RELEASE_NAME}_x86_64-apple-darwin
116 |           sudo zip -9r ${FILE}.zip replibyte && sudo rm replibyte
117 |           sudo touch ${FILE}.zip.sha256sum && sudo chmod 777 ${FILE}.zip.sha256sum
118 |           CHECKSUM=$(sudo sha256sum "${FILE}.zip" | cut -d ' ' -f 1)
119 |           echo "${CHECKSUM}" > ${FILE}.zip.sha256sum
120 |           printf "::set-output name=%s::%s\n" sha256sum "${CHECKSUM}"
121 | 
122 |       - name: Release
123 |         uses: softprops/action-gh-release@v1
124 |         with:
125 |           files: |
126 |             target/x86_64-apple-darwin/release/replibyte_*
127 |         env:
128 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
129 | 
130 |   publish-on-homebrew:
131 |     runs-on: ubuntu-latest
132 |     needs: build-mac
133 |     steps:
134 |       - name: Extract version and sha256sum
135 |         id: extract
136 |         run: |
137 |           printf "::set-output name=%s::%s\n" tag-name "${GITHUB_REF#refs/tags/}"
138 |           printf "::set-output name=%s::%s\n" sha256sum "${{ needs.build-mac.outputs.sha256sum }}"
139 |       - uses: mislav/bump-homebrew-formula-action@v2
140 |         if: "!contains(github.ref, '-')" # skip prereleases
141 |         with:
142 |           formula-name: replibyte
143 |           homebrew-tap: Qovery/homebrew-replibyte
144 |           download-url: https://github.com/Qovery/replibyte/releases/download/${{ steps.extract.outputs.tag-name }}/replibyte_${{ steps.extract.outputs.tag-name }}_x86_64-apple-darwin.zip
145 |           download-sha256: ${{ steps.extract.outputs.sha256sum }}
146 |         env:
147 |           COMMITTER_TOKEN: ${{ secrets.PERSONAL_TOKEN }}
148 | 


--------------------------------------------------------------------------------
/replibyte/src/transformer/redacted.rs:
--------------------------------------------------------------------------------
  1 | use serde::{Deserialize, Serialize};
  2 | 
  3 | use crate::transformer::Transformer;
  4 | use crate::types::Column;
  5 | 
  6 | /// This struct is dedicated to redact a string with a specific character (default to '*').
  7 | pub struct RedactedTransformer {
  8 |     database_name: String,
  9 |     table_name: String,
 10 |     column_name: String,
 11 |     options: RedactedTransformerOptions,
 12 | }
 13 | 
 14 | #[derive(Debug, Serialize, Deserialize, PartialEq, Clone, Copy)]
 15 | pub struct RedactedTransformerOptions {
 16 |     pub character: char,
 17 |     pub width: u8,
 18 | }
 19 | 
 20 | impl Default for RedactedTransformerOptions {
 21 |     fn default() -> Self {
 22 |         RedactedTransformerOptions {
 23 |             character: '*',
 24 |             width: 10,
 25 |         }
 26 |     }
 27 | }
 28 | 
 29 | impl RedactedTransformer {
 30 |     pub fn new<S>(
 31 |         database_name: S,
 32 |         table_name: S,
 33 |         column_name: S,
 34 |         options: RedactedTransformerOptions,
 35 |     ) -> Self
 36 |     where
 37 |         S: Into<String>,
 38 |     {
 39 |         RedactedTransformer {
 40 |             database_name: database_name.into(),
 41 |             table_name: table_name.into(),
 42 |             column_name: column_name.into(),
 43 |             options,
 44 |         }
 45 |     }
 46 | }
 47 | 
 48 | impl Default for RedactedTransformer {
 49 |     fn default() -> Self {
 50 |         RedactedTransformer {
 51 |             database_name: String::default(),
 52 |             table_name: String::default(),
 53 |             column_name: String::default(),
 54 |             options: RedactedTransformerOptions::default(),
 55 |         }
 56 |     }
 57 | }
 58 | 
 59 | impl Transformer for RedactedTransformer {
 60 |     fn id(&self) -> &str {
 61 |         "redacted"
 62 |     }
 63 | 
 64 |     fn description(&self) -> &str {
 65 |         "Obfuscate your sensitive data (string only). [4242 4242 4242 4242]->[424****************]"
 66 |     }
 67 | 
 68 |     fn database_name(&self) -> &str {
 69 |         self.database_name.as_str()
 70 |     }
 71 | 
 72 |     fn table_name(&self) -> &str {
 73 |         self.table_name.as_str()
 74 |     }
 75 | 
 76 |     fn column_name(&self) -> &str {
 77 |         self.column_name.as_str()
 78 |     }
 79 | 
 80 |     fn transform(&self, column: Column) -> Column {
 81 |         match column {
 82 |             Column::StringValue(column_name, value) => {
 83 |                 let new_value = match value.len() {
 84 |                     len if len > 3 => {
 85 |                         format!(
 86 |                             "{}{}",
 87 |                             value.chars().take(3).collect::<String>(),
 88 |                             self.options
 89 |                                 .character
 90 |                                 .to_string()
 91 |                                 .repeat(self.options.width.into())
 92 |                         )
 93 |                     }
 94 |                     _ => value,
 95 |                 };
 96 |                 Column::StringValue(column_name, new_value)
 97 |             }
 98 |             column => column,
 99 |         }
100 |     }
101 | }
102 | 
103 | #[cfg(test)]
104 | mod tests {
105 |     use crate::{transformer::Transformer, types::Column};
106 | 
107 |     use super::{RedactedTransformer, RedactedTransformerOptions};
108 | 
109 |     #[test]
110 |     fn redact() {
111 |         let transformer = get_transformer();
112 |         let column = Column::StringValue(
113 |             "credit_card_number".to_string(),
114 |             "4242 4242 4242 4242".to_string(),
115 |         );
116 |         let transformed_column = transformer.transform(column);
117 |         let transformed_value = transformed_column.string_value().unwrap();
118 |         assert_eq!(transformed_value.to_owned(), "424**********")
119 |     }
120 | 
121 |     #[test]
122 |     fn redact_with_multi_byte_char() {
123 |         let transformer = get_transformer();
124 |         let column = Column::StringValue(
125 |             "multi_byte_column".to_string(),
126 |             "🦀ë池cd".to_string(),
127 |         );
128 |         let transformed_column = transformer.transform(column);
129 |         let transformed_value = transformed_column.string_value().unwrap();
130 |         assert_eq!(transformed_value.to_owned(), "🦀ë池**********")
131 |     }
132 | 
133 |     #[test]
134 |     fn strings_lower_than_3_chars_remains_visible() {
135 |         let transformer = get_transformer();
136 |         let column = Column::StringValue("credit_card_number".to_string(), "424".to_string());
137 |         let transformed_column = transformer.transform(column);
138 |         let transformed_value = transformed_column.string_value().unwrap();
139 |         assert_eq!(transformed_value.to_owned(), "424")
140 |     }
141 | 
142 |     #[test]
143 |     fn redact_with_custom_char() {
144 |         let transformer = RedactedTransformer::new(
145 |             "github",
146 |             "users",
147 |             "credit_card_number",
148 |             RedactedTransformerOptions {
149 |                 character: '#',
150 |                 width: 20,
151 |             },
152 |         );
153 |         let column = Column::StringValue(
154 |             "credit_card_number".to_string(),
155 |             "4242 4242 4242 4242".to_string(),
156 |         );
157 |         let transformed_column = transformer.transform(column);
158 |         let transformed_value = transformed_column.string_value().unwrap();
159 |         assert_eq!(transformed_value.to_owned(), "424####################")
160 |     }
161 | 
162 |     fn get_transformer() -> RedactedTransformer {
163 |         RedactedTransformer::new(
164 |             "github",
165 |             "users",
166 |             "credit_card_number",
167 |             RedactedTransformerOptions::default(),
168 |         )
169 |     }
170 | }
171 | 


--------------------------------------------------------------------------------
/website/docs/getting-started/configuration.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | sidebar_position: 4
  3 | ---
  4 | 
  5 | # Configuration
  6 | 
  7 | Create your `conf.yaml` configuration file to source your production database.
  8 | 
  9 | ```yaml
 10 | encryption_key: $MY_PRIVATE_ENC_KEY # optional - encrypt data on datastore
 11 | source:
 12 |   connection_uri: postgres://user:password@host:port/db # you can use $DATABASE_URL
 13 | datastore:
 14 |   aws:
 15 |     bucket: $BUCKET_NAME
 16 |     region: $S3_REGION
 17 |     credentials:
 18 |       access_key_id: $ACCESS_KEY_ID
 19 |       secret_access_key: $AWS_SECRET_ACCESS_KEY
 20 | destination:
 21 |   connection_uri: postgres://user:password@host:port/db # you can use $DATABASE_URL
 22 | ```
 23 | 
 24 | :::info
 25 | 
 26 | Environment variables are substituted by their value at runtime. An error is thrown if the environment variable does not exist.
 27 | 
 28 | :::
 29 | 
 30 | Run the app for the source:
 31 | 
 32 | ```shell
 33 | replibyte -c conf.yaml
 34 | ```
 35 | 
 36 | ## Source and Destination
 37 | 
 38 | Replibyte supports multiple databases.
 39 | 
 40 | - [PostgreSQL](/docs/databases#postgresql)
 41 | - [MySQL](/docs/databases#mysql)
 42 | - [MongoDB](/docs/databases#mongodb)
 43 | 
 44 | ## Transformer
 45 | 
 46 | A transformer is useful to change/hide the value of a specified column. Replibyte provides pre-made transformers. You can
 47 | also [build your own Transformer in web assembly](/docs/transformers#wasm).
 48 | 
 49 | Here is a list of all the [transformers available](/docs/transformers).
 50 | 
 51 | | id              | description                                                                                        | doc                                             |
 52 | |-----------------|----------------------------------------------------------------------------------------------------|-------------------------------------------------|
 53 | | transient       | Does not modify the value                                                                          | [link](/docs/transformers#transient)            |
 54 | | random          | Randomize value but keep the same length (string only). [AAA]->[BBB]                               | [link](/docs/transformers#random)               |
 55 | | first-name      | Replace the string value by a first name                                                           | [link](/docs/transformers#first-name)           |
 56 | | email           | Replace the string value by an email address                                                       | [link](/docs/transformers#email)                |
 57 | | keep-first-char | Keep only the first char for strings and digit for numbers                                         | [link](/docs/transformers#keep-first-character) |
 58 | | phone-number    | Replace the string value by a phone number                                                         | [link](/docs/transformers#phone-number)         |
 59 | | credit-card     | Replace the string value by a credit card number                                                   | [link](/docs/transformers#credit-card)          |
 60 | | redacted        | Obfuscate your sensitive data (>3 characters strings only). [4242 4242 4242 4242]->[424**********] | [link](/docs/transformers#redacted)             |
 61 | 
 62 | ## Datastore
 63 | 
 64 | A Datastore is where Replibyte store the created dump to make them accessible from the destination databases.
 65 | 
 66 | | Cloud Service Provider | S3 service name                                                           | S3 compatible  |
 67 | |------------------------|---------------------------------------------------------------------------|----------------|
 68 | | Amazon Web Services    | [S3](https://aws.amazon.com/s3/)                                          | Yes (Original) |
 69 | | Google Cloud Platform  | [Cloud Storage](https://cloud.google.com/storage)                         | Yes            |
 70 | | Microsoft Azure        | [Blob Storage](https://azure.microsoft.com/en-us/services/storage/blobs/) | Yes            |
 71 | | Digital Ocean          | [Spaces](https://www.digitalocean.com/products/spaces)                    | Yes            |
 72 | | Scaleway               | [Object Storage](https://www.scaleway.com/en/object-storage/)             | Yes            |
 73 | | Minio                  | [Object Storage](https://min.io/)                                         | Yes            |
 74 | 
 75 | :::info
 76 | 
 77 | Any datastore compatible with the S3 protocol is a valid datastore.
 78 | 
 79 | :::
 80 | 
 81 | ## Example
 82 | 
 83 | Here is a configuration file including some transformations and different options like the database subset.
 84 | 
 85 | ```yaml
 86 | encryption_key: $MY_PRIVATE_ENC_KEY # optional - encrypt data on datastore
 87 | source:
 88 |   connection_uri: postgres://user:password@host:port/db # you can use $DATABASE_URL
 89 |   database_subset: # optional - downscale database while keeping it consistent
 90 |     database: public
 91 |     table: orders
 92 |     strategy_name: random
 93 |     strategy_options:
 94 |       percent: 50
 95 |     passthrough_tables:
 96 |       - us_states
 97 |   transformers: # optional - hide sensitive data
 98 |     - database: public
 99 |       table: employees
100 |       columns:
101 |         - name: last_name
102 |           transformer_name: random
103 |         - name: birth_date
104 |           transformer_name: random-date
105 |         - name: first_name
106 |           transformer_name: first-name
107 |         - name: email
108 |           transformer_name: email
109 |         - name: username
110 |           transformer_name: keep-first-char
111 |     - database: public
112 |       table: customers
113 |       columns:
114 |         - name: phone
115 |           transformer_name: phone-number
116 |   only_tables: # optional - dumps only specified tables.
117 |     - database: public
118 |       table: orders
119 |     - database: public
120 |       table: customers
121 | datastore:
122 |   aws:
123 |     bucket: $BUCKET_NAME
124 |     region: $S3_REGION
125 |     credentials:
126 |       access_key_id: $ACCESS_KEY_ID
127 |       secret_access_key: $AWS_SECRET_ACCESS_KEY
128 | destination:
129 |   connection_uri: postgres://user:password@host:port/db # you can use $DATABASE_URL
130 | ```
131 | 


--------------------------------------------------------------------------------