├── .dockerignore
├── .github
    ├── ISSUE_TEMPLATE.md
    └── PULL_REQUEST_TEMPLATE.md
├── .gitignore
├── CHANGELOG.md
├── Cargo.toml
├── Dockerfile
├── LICENSE
├── README.md
├── appveyor.yml
├── circle.yml
├── config
    └── default.toml
├── rustfmt.toml
└── src
    ├── api_client.rs
    ├── db_client.rs
    ├── errors.rs
    ├── importer.rs
    ├── main.rs
    ├── mysql_pool
        ├── mod.rs
        └── pool.rs
    ├── settings.rs
    └── type_converter.rs


/.dockerignore:
--------------------------------------------------------------------------------
1 | target/
2 | **/*.rs.bk
3 | config/local.toml
4 | Cargo.lock


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | Please read the following carefully before opening a new issue.
 2 | Your issue may be closed if it does not provide the information required by this template.
 3 | 
 4 | We use GitHub Issues, as well as our help email at canvasdatahelp@instructure.com for tracking issues. Although it should be noted this is not officially supported by Instructure, the Canvas Data support team will ensure it is in a working state.
 5 | 
 6 | - If you have a question about how to use the CLI; please ask your CSM, or email canvasdatahelp@instructure.com
 7 | - If you have a feature request, that should be posted on the community site: [HERE][community_link]
 8 | 
 9 | - Make sure your issue reproduces on the latest version!
10 | 
11 | --- Delete everything above this line ---
12 | 
13 | ### Description ###
14 | 
15 | Explain what you did, what you expected to happen, and what actually happens.
16 | 
17 | ### Additional Information ###
18 | 
19 | * Rust Version [FILL THIS OUT: Can be grabbed with: `rustc --version` on your CLI.]
20 | * Platform: [FILL THIS OUT: Windows, Mac, or Linux? Which Version?]
21 | * Logs: (If you can please run the CLI with: `RUST_LOG=trace` at the beginning and provide us the debug logs.)
22 | 
23 | [community_link]: https://community.canvaslms.com/community/answers/data


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | Thanks for submitting a PR! We want to make contributing to the Canvas Data Loader as easy as possible.
 2 | Please read these instructions carefully:
 3 | 
 4 | - [ ] Explain the **motivation** for making this change.
 5 | - [ ] Provide a **test plan** demonstrating that the code is solid.
 6 | - [ ] Match the **code formatting** of the rest of the codebase.
 7 | - [ ] Make sure to **add tests** to help keep code coverage up.
 8 | 
 9 | ## Motivation (required) ##
10 | 
11 | What existing problem does the pull request solve?
12 | 
13 | ## Test Plan (required) ##
14 | 
15 | A good test plan has the exact commands you ran and their output.
16 | 
17 | If you have added code that should be tested, add tests.
18 | 
19 | ## Next Steps ##
20 | 
21 | - Small pull requests are much easier to review and more likely to get merged. Make sure the PR does only one thing, otherwise please split it.
22 | - Make sure all **tests pass**, we will run automated tests, but you can run it yourself by running `cargo test`.


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | target/
2 | **/*.rs.bk
3 | config/local.toml
4 | Cargo.lock


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | ## 0.3.0
 2 | 
 3 | * Add Option to only import latest files.
 4 | * Update Dependencies.
 5 | * Add Comments.
 6 | 
 7 | ## 0.2.0
 8 | 
 9 | * Support for MySQL Databases.
10 | 
11 | ## 0.1.0
12 | 
13 | * Initial Release of Canvas Data Loader.
14 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "cdl-runner"
 3 | version = "0.4.0"
 4 | authors = [
 5 |   "Canvas Data Support Team <canvasdatahelp@instructure.com>",
 6 |   "Instructure Engineering <eng@instructure.com>",
 7 |   "Eric Coan <ecoan@rust-lang.life>"
 8 | ]
 9 | 
10 | [features]
11 | default = [ "postgres_compat", "mysql_compat" ]
12 | postgres_compat = [ "postgres", "r2d2_postgres" ]
13 | mysql_compat = [ "mysql" ]
14 | 
15 | [dependencies]
16 | base64 = "^0.9"
17 | chrono = { version = "^0.4", features = [ "serde" ] }
18 | config = "^0.9"
19 | error-chain = "^0.12"
20 | env_logger = "^0.5"
21 | flate2 = { version = "^1.0", features = ["zlib"], default-features = false }
22 | futures = "^0.1"
23 | log = "^0.4"
24 | glob = "^0.2"
25 | lazy_static = "^1.1"
26 | r2d2 = "^0.8"
27 | rayon = "^1.0.2"
28 | regex = "^1.0"
29 | reqwest = "^0.9"
30 | ring = "^0.13"
31 | rocksdb = "^0.10"
32 | serde = "^1.0"
33 | serde_derive = "^1.0"
34 | serde_json = "^1.0"
35 | tokio-core = "^0.1"
36 | 
37 | # Postgres Deps
38 | postgres = { version = "^0.15", optional = true }
39 | r2d2_postgres = { version = "^0.14", optional = true }
40 | 
41 | # Mysql Deps
42 | mysql = { version = "^14", optional = true, features = ["ssl"] }
43 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM rust:latest as cdl-build
 2 | 
 3 | RUN apt-get update && apt-get -y --no-install-recommends install clang && apt-get clean && rm -rf /var/lib/apt/lists/*
 4 | 
 5 | # Cache dependencies for faster builds
 6 | RUN cargo install cargo-build-deps
 7 | RUN cd /tmp && USER=root cargo new --bin canvas-data-loader
 8 | WORKDIR /tmp/canvas-data-loader
 9 | COPY Cargo.toml ./
10 | RUN cargo-build-deps --release
11 | 
12 | # Copy in our source and build it
13 | COPY src /tmp/canvas-data-loader/src
14 | RUN cargo build --release
15 | 
16 | # Start a new build from a minimal image that we can copy the binary into
17 | FROM debian:stretch-slim
18 | 
19 | RUN apt-get update && apt-get -y --no-install-recommends install libssl1.1 ca-certificates && apt-get clean && rm -rf /var/lib/apt/lists/*
20 | COPY --from=cdl-build /tmp/canvas-data-loader/target/release/cdl-runner .
21 | COPY ./config ./config
22 | 
23 | ENV RUST_LOG info
24 | CMD ./cdl-runner
25 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License
 2 | 
 3 | Copyright (c) 2017 Instructure
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Canvas Data Loader #
 2 | 
 3 | This is the source code for the Canvas Data Loader. The Canvas Data Loader is an example application
 4 | that downloads your data, and imports it into a Database. The process is completely automated, and is
 5 | able to handle things like Historical Refreshes, Schema Changes, and the 24-36 hour variance all without
 6 | issue.
 7 | 
 8 | It should be noted although there are better options out there, there isn't a reason why you couldn't use
 9 | the loader to handle all of your imports everyday. The Canvas Data Loader could for example handle your
10 | imports at first, and then later off be handed to a more stable process.
11 | 
12 | ## Support ##
13 | 
14 | Although this is under the Instructure Repo this is purely an example application, and as such is not fully supported by Instructure.
15 | 
16 | However, the Canvas Data Support team is happy to field requests about usage in the standard canvasdatahelp@instructure.com email.
17 | 
18 | ## How Do I Use It? ##
19 | 
20 | The following instructions are for a linux server, but steps 1-5 should work universally.
21 | You'll just need to use your systems way of scheduling a repeating task instead of crons if you
22 | are not using linux.
23 | 
24 | * Clone this repository.
25 | * Copy the default configuration, and modify it to your needs:
26 |   * `cp ./config/default.toml ./config/local.toml`
27 |   * `my_text_editor ./config/local.toml`
28 | * Choose a home for the importer, and copy this repository there.
29 | * [Install Rust](https://www.rust-lang.org/en-US/install.html)
30 | * Build a release version: `cargo build --release`.
31 | * Setup a crontab to run the importer every hour:
32 |   * `crontab -e`
33 |   * Enter on it's own line, replacing the path to your importer: `0 * * * * cd <my_cdl_location> && RUST_LOG=info ./target/release/cdl-runner > /var/log/cdl-log 2>&1`
34 | * Tadah!
35 | 
36 | ### Configuration Using Environment Variables
37 | 
38 | Configuration can also be done using environment variables instead of, or in addition to the `./config/local.toml` file. For example, you may wish to use environment variables for the API key/secret and use the file for the remaining configuration.
39 | 
40 | Example:
41 | 
42 | `export cdl__canvasdataauth__api_key=abcdefg123456`
43 | `export cdl__canvasdataauth__api_secret=123456abcdefg`
44 | 
45 | Possible environment variables:
46 | 
47 | - `cdl__canvasdataauth__api_key`
48 | - `cdl__canvasdataauth__api_secret`
49 | - `cdl__database__db_type`
50 | - `cdl__database__url` 
51 | - `cdl__only_load_final`
52 | - `cdl__rocksdb_location`
53 | - `cdl__save_location`
54 | - `cdl__skip_historical_imports`
55 | 
56 | ## License ##
57 | 
58 | The Canvas Data Loader is Licensed under MIT.
59 | 


--------------------------------------------------------------------------------
/appveyor.yml:
--------------------------------------------------------------------------------
 1 | # Appveyor configuration template for Rust using rustup for Rust installation
 2 | # https://github.com/starkat99/appveyor-rust
 3 | 
 4 | ## Operating System (VM environment) ##
 5 | 
 6 | # Rust needs at least Visual Studio 2013 Appveyor OS for MSVC targets.
 7 | os: Visual Studio 2015
 8 | 
 9 | ## Build Matrix ##
10 | environment:
11 |   matrix:
12 | 
13 | ### MSVC Toolchains ###
14 | 
15 |   # Stable 64-bit MSVC
16 |     - channel: stable
17 |       target: x86_64-pc-windows-msvc
18 |   # Stable 32-bit MSVC
19 |     - channel: stable
20 |       target: i686-pc-windows-msvc
21 | 
22 | ### GNU Toolchains ###
23 | 
24 |   # Stable 64-bit GNU
25 |     - channel: stable
26 |       target: x86_64-pc-windows-gnu
27 |   # Stable 32-bit GNU
28 |     - channel: stable
29 |       target: i686-pc-windows-gnu
30 | 
31 | ## Install Script ##
32 | install:
33 |   - appveyor DownloadFile https://win.rustup.rs/ -FileName rustup-init.exe
34 |   - rustup-init -yv --default-toolchain %channel% --default-host %target%
35 |   - set PATH=%PATH%;%USERPROFILE%\.cargo\bin
36 |   - rustc -vV
37 |   - cargo -vV
38 | 
39 | ## Build Script ##
40 | build: false
41 | test_script:
42 |   - cargo build --verbose %cargoflags%


--------------------------------------------------------------------------------
/circle.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | version: 2
 3 | executorType: docker
 4 | containerInfo:
 5 |   - image: jimmycuadra/rust:latest
 6 | stages:
 7 |   build:
 8 |     workDir: /source
 9 |     steps:
10 |       - type: shell
11 |         shell: /bin/bash
12 |         command: apt-get update
13 |       - type: shell
14 |         shell: /bin/bash
15 |         command: apt-get install openssh-client -y
16 |       - type: checkout
17 |       - type: shell
18 |         shell: /bin/bash
19 |         command: cargo build
20 | 


--------------------------------------------------------------------------------
/config/default.toml:
--------------------------------------------------------------------------------
 1 | # This determines the log level
 2 | save_location = "/tmp/cdl-save"
 3 | rocksdb_location = "/tmp/cdl-rocksdb"
 4 | skip_historical_imports = true
 5 | # uncomment this line below, to only import the latest dump: Note you may miss dumps on
 6 | # days where we do historical refreshes. It's good for a first initial import.
 7 | # only_load_final = true
 8 | 
 9 | # uncomment this line below to value speed of imports over, availability of data.
10 | # specifically the CDL will drop tables, and reimport them from scratch everytime
11 | # no matter the table. Instead of doing targeted DELETE/INSERTs.
12 | # all_tables_volatile = true
13 | 
14 | [canvasdataauth]
15 | api_key = "FILL_ME_OUT"
16 | api_secret = "FILL_ME_OUT"
17 | 
18 | [database]
19 | url = "postgres://localhost/canvas_data_loader"
20 | # Valid Values are Psql, Mysql
21 | db_type = "Psql"
22 | 


--------------------------------------------------------------------------------
/rustfmt.toml:
--------------------------------------------------------------------------------
1 | tab_spaces = 2
2 | max_width = 120


--------------------------------------------------------------------------------
/src/api_client.rs:
--------------------------------------------------------------------------------
  1 | //! Provides an API Client for the Canvas Data API.
  2 | 
  3 | use base64::encode as B64Encode;
  4 | use chrono::prelude::*;
  5 | use errors::*;
  6 | use rayon::prelude::*;
  7 | use regex::Regex;
  8 | use reqwest::{Client as HttpClient, Method, Request};
  9 | use reqwest::header::HeaderValue;
 10 | use ring::{digest, hmac};
 11 | use settings::Settings;
 12 | use std::collections::BTreeMap;
 13 | use std::fs::{self, File};
 14 | use std::io;
 15 | use std::path::Path;
 16 | 
 17 | lazy_static! {
 18 |   static ref REQREG: Regex = Regex::new(r"^requests.*?$").expect("Invalid Static Requests Regex");
 19 | }
 20 | 
 21 | /// The API Client for Canvas Data.
 22 | #[derive(Clone)]
 23 | pub struct CanvasDataApiClient {
 24 |   /// The API Key to use for Canvas Data.
 25 |   api_key: String,
 26 |   /// The API Secret to use for Canvas Data.
 27 |   api_secret: String,
 28 |   /// The place to save files.
 29 |   save_location: String,
 30 |   /// The Reqwest Client,
 31 |   client: HttpClient,
 32 | }
 33 | 
 34 | impl CanvasDataApiClient {
 35 |   /// Creates a new Canvas Data API Client.
 36 |   ///
 37 |   /// Creates a Canvas Data API Client that talks to the core portal.inshosteddata.com.
 38 |   ///
 39 |   /// * `settings` - The settings to use for this API Client.
 40 |   pub fn new(settings: &Settings) -> Self {
 41 |     CanvasDataApiClient {
 42 |       api_key: settings.get_canvas_data_api_key(),
 43 |       api_secret: settings.get_canvas_data_api_secret(),
 44 |       save_location: settings.get_save_location(),
 45 |       client: HttpClient::new(),
 46 |     }
 47 |   }
 48 | 
 49 |   /// Computes the authorization header.
 50 |   ///
 51 |   /// Computes the authorization header needed for authenticating to the Canvas Data API.
 52 |   ///
 53 |   /// * `http_method` - The HTTP Method you're using.
 54 |   /// * `host` - The Host Header you're using.
 55 |   /// * `content_type` - The Content Type you're using.
 56 |   /// * `content_md5` - The Content MD5 Header you're sending.
 57 |   /// * `path` - The path of your request.
 58 |   /// * `query_params` - The query parameters of your request.
 59 |   /// * `date_header` - The Date Header you're using.
 60 |   pub fn compute_auth_header(
 61 |     &self,
 62 |     http_method: &str,
 63 |     host: &str,
 64 |     content_type: &str,
 65 |     content_md5: &str,
 66 |     path: &str,
 67 |     query_params: &str,
 68 |     date_header: &str,
 69 |   ) -> String {
 70 | 
 71 |     let pre_sign =
 72 |       format!(
 73 |       "{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}",
 74 |       http_method,
 75 |       host,
 76 |       content_type,
 77 |       content_md5,
 78 |       path,
 79 |       query_params,
 80 |       date_header,
 81 |       self.api_secret,
 82 |     );
 83 |     debug!("Compute Auth Header was passed: {:?}", pre_sign);
 84 | 
 85 |     let signing_key = hmac::SigningKey::new(&digest::SHA256, self.api_secret.clone().as_bytes());
 86 |     let output = hmac::sign(&signing_key, pre_sign.as_bytes());
 87 |     let encoded_val = B64Encode(&output);
 88 |     format!("HMACAuth {}:{}", self.api_key, encoded_val)
 89 |   }
 90 | 
 91 |   /// Gets the current date.
 92 |   ///
 93 |   /// Gets the current date in the format needed for compute_auth_header.
 94 |   pub fn get_current_date(&self) -> String {
 95 |     Utc::now().format("%Y-%m-%dT%H:%M:%S%.3fZ").to_string()
 96 |   }
 97 | 
 98 |   /// Determines if a dump is a historical refresh based on the files in dump response.
 99 |   ///
100 |   /// * `resp` - The Files in dump response to check.
101 |   pub fn is_historical_refresh(&self, resp: FilesInDumpResponse) -> bool {
102 |     let mut has_found_all_requests_table = true;
103 |     'outer: for artifact in resp.artifacts_by_table.values() {
104 |       for file in artifact.files.iter() {
105 |         if !REQREG.is_match(&file.filename) {
106 |           has_found_all_requests_table = false;
107 |           break 'outer;
108 |         }
109 |       }
110 |     }
111 |     has_found_all_requests_table
112 |   }
113 | 
114 |   /// Gets a current list of Dumps for your Canvas Data Instance.
115 |   pub fn get_dumps(&self) -> Result<Vec<DumpInList>> {
116 |     trace!("Get Dumps was called.");
117 | 
118 |     let uri = try!("https://portal.inshosteddata.com/api/account/self/dump".parse());
119 |     let mut req: Request = Request::new(Method::GET, uri);
120 |     let date_str = self.get_current_date();
121 |     req.headers_mut().insert("Date", HeaderValue::from_str(&date_str).expect("Couldn't turn string into header value!"));
122 |     req.headers_mut().insert(
123 |       "Content-Type",
124 |       HeaderValue::from_static("application/json"),
125 |     );
126 |     req.headers_mut().insert(
127 |       "Authorization",
128 |       HeaderValue::from_str(&self.compute_auth_header(
129 |         "GET",
130 |         "portal.inshosteddata.com",
131 |         "application/json",
132 |         "",
133 |         "/api/account/self/dump",
134 |         "",
135 |         &date_str,
136 |       )).expect("Couldn't turn string into header value!"),
137 |     );
138 | 
139 |     Ok(try!(self.client.execute(req).and_then(|mut res| {
140 |       res.json()
141 |     }).map_err(|e| {
142 |       io::Error::new(io::ErrorKind::Other, e)
143 |     })))
144 |   }
145 | 
146 |   /// Gets the latest schema.
147 |   pub fn get_latest_schema(&self) -> Result<SchemaDefinition> {
148 |     trace!("Get latest schema was called");
149 | 
150 |     let uri = try!("https://portal.inshosteddata.com/api/schema/latest".parse());
151 |     let mut req: Request = Request::new(Method::GET, uri);
152 |     let date_str = self.get_current_date();
153 |     req.headers_mut().insert("Date", HeaderValue::from_str(&date_str).expect("Failed to turn string into header value!"));
154 |     req.headers_mut().insert(
155 |       "Content-Type",
156 |       HeaderValue::from_static("application/json"),
157 |     );
158 |     req.headers_mut().insert(
159 |       "Authorization",
160 |       HeaderValue::from_str(&self.compute_auth_header(
161 |         "GET",
162 |         "portal.inshosteddata.com",
163 |         "application/json",
164 |         "",
165 |         "/api/schema/latest",
166 |         "",
167 |         &date_str,
168 |       )).expect("Failed to turn string into header value!"),
169 |     );
170 | 
171 |     Ok(try!(self.client.execute(req).and_then(|mut res| {
172 |       res.json()
173 |     }).map_err(|e| {
174 |       io::Error::new(io::ErrorKind::Other, e)
175 |     })))
176 |   }
177 | 
178 |   /// Gets the Table Definition for a Specific Table.
179 |   ///
180 |   /// * `table_name` - The Table name to get the definition for.
181 |   pub fn get_table_definition(&self, table_name: String) -> Result<Option<TableDefinition>> {
182 |     trace!("get_table_definition was called for: [ {} ]", table_name);
183 | 
184 |     let uri = try!("https://portal.inshosteddata.com/api/schema/latest".parse());
185 |     let mut req: Request = Request::new(Method::GET, uri);
186 |     let date_str = self.get_current_date();
187 |     req.headers_mut().insert("Date", HeaderValue::from_str(&date_str).expect("Failed to turn string into headervalue!"));
188 |     req.headers_mut().insert(
189 |       "Content-Type",
190 |       HeaderValue::from_static("application/json"),
191 |     );
192 |     req.headers_mut().insert(
193 |       "Authorization",
194 |       HeaderValue::from_str(&self.compute_auth_header(
195 |         "GET",
196 |         "portal.inshosteddata.com",
197 |         "application/json",
198 |         "",
199 |         "/api/schema/latest",
200 |         "",
201 |         &date_str,
202 |       )).expect("Failed to turn string into headervalue!"),
203 |     );
204 | 
205 |     Ok(try!(self.client.execute(req).and_then(|mut res| {
206 |       res.json()
207 |     }).map(|res: SchemaDefinition| {
208 |       let mut ret = None;
209 | 
210 |       for table_def in res.schema.values() {
211 |         if table_def.table_name.to_lowercase() == table_name {
212 |           ret = Some(table_def.clone());
213 |           break;
214 |         }
215 |       }
216 | 
217 |       ret
218 |     }).map_err(|e| {
219 |       io::Error::new(io::ErrorKind::Other, e)
220 |     })))
221 |   }
222 | 
223 |   /// Gets the list of files for a specific dump.
224 |   ///
225 |   /// * `dump_id` - The Dump ID to grab the list of files for.
226 |   pub fn get_files_for_dump(&self, dump_id: String) -> Result<FilesInDumpResponse> {
227 |     trace!(
228 |       "Get files for dump was called with dump id: [ {} ]",
229 |       dump_id
230 |     );
231 | 
232 |     let path = format!("/api/account/self/file/byDump/{}", dump_id);
233 |     let uri = try!(format!("https://portal.inshosteddata.com{}", &path).parse());
234 |     let mut req: Request = Request::new(Method::GET, uri);
235 |     let date_str = self.get_current_date();
236 |     req.headers_mut().insert("Date", HeaderValue::from_str(&date_str).expect("Failed to turn string into header value!"));
237 |     req.headers_mut().insert(
238 |       "Content-Type",
239 |       HeaderValue::from_static("application/json"),
240 |     );
241 |     req.headers_mut().insert(
242 |       "Authorization",
243 |       HeaderValue::from_str(&self.compute_auth_header(
244 |         "GET",
245 |         "portal.inshosteddata.com",
246 |         "application/json",
247 |         "",
248 |         &path,
249 |         "",
250 |         &date_str,
251 |       )).expect("Failed to turn string into headervalue!"),
252 |     );
253 | 
254 |     Ok(try!(self.client.execute(req).and_then(|mut res| {
255 |       res.json()
256 |     }).map_err(|e| {
257 |       io::Error::new(io::ErrorKind::Other, e)
258 |     })))
259 |   }
260 | 
261 |   /// Download all files for a specific dump.
262 |   ///
263 |   /// * `dump_id` - The Dump ID of the files to download.
264 |   pub fn download_files_for_dump(&self, dump_id: String) -> Result<()> {
265 |     trace!(
266 |       "Download files for dump was called with dump id: [ {} ]",
267 |       dump_id
268 |     );
269 |     let save_location = format!("{}/{}", self.save_location, &dump_id);
270 |     try!(fs::create_dir_all(save_location.clone()));
271 |     let files_in_dump = try!(self.get_files_for_dump(dump_id.clone()));
272 | 
273 |     files_in_dump.artifacts_by_table.par_iter().map(move |(_, table_artifact)| {
274 |       for file_to_download in table_artifact.files.iter().cloned() {
275 |         let finalized_to_download_path = format!("{}/{}", &save_location, &file_to_download.filename);
276 |         let cloned_download_path = finalized_to_download_path.clone();
277 |         let path = Path::new(&finalized_to_download_path);
278 |         if path.exists() {
279 |           debug!(
280 |             "{:?} exists, skipping entire artifact",
281 |             cloned_download_path
282 |           );
283 |           // Assume the entire artifact is downloaded.
284 |           continue;
285 |         } else {
286 |           debug!(
287 |             "{:?} does not exist, downloading files",
288 |             cloned_download_path
289 |           );
290 |           let uri = file_to_download.url.parse().expect("Failed to parse file url form hosted-data!");
291 |           let req = Request::new(Method::GET, uri);
292 |           self.client.execute(req).map(move |mut res| {
293 |             let download_path = cloned_download_path;
294 |             let mut file = File::create(Path::new(&download_path)).expect("Failed to create download file!");
295 | 
296 |             res.copy_to(&mut file).expect("Failed to copy to file!")
297 |           }).expect("Failed to download table!");
298 |         }
299 |       }
300 |     }).count();
301 | 
302 |     trace!("Done Downloading Files for: {}", dump_id);
303 | 
304 |     Ok(())
305 |   }
306 | }
307 | 
308 | /// Represents a Dump returned from the list dumps endpoint.
309 | #[derive(Clone, Debug, Deserialize)]
310 | pub struct DumpInList {
311 |   /// The ID of this particular Dumpm.
312 |   #[serde(rename = "dumpId")]
313 |   pub dump_id: String,
314 |   /// The Sequence number of this dump.
315 |   pub sequence: i64,
316 |   /// The Account ID this dump is for.
317 |   #[serde(rename = "accountId")]
318 |   pub account_id: String,
319 |   /// The Number of Files this dump is reporting.
320 |   #[serde(rename = "numFiles")]
321 |   pub num_files: i64,
322 |   /// If this dump is finished.
323 |   pub finished: bool,
324 |   /// When this dump is set to expire.
325 |   pub expires: i64,
326 |   /// When this dump was last updated.
327 |   #[serde(rename = "updatedAt")]
328 |   pub updated_at: DateTime<Utc>,
329 |   /// When this dump was created.
330 |   #[serde(rename = "createdAt")]
331 |   pub created_at: DateTime<Utc>,
332 |   /// The Schema Version this dump is using.
333 |   #[serde(rename = "schemaVersion")]
334 |   pub schema_version: String,
335 | }
336 | unsafe impl Send for DumpInList {}
337 | 
338 | /// The list of files returned from a file in dump response.
339 | #[derive(Clone, Debug, Deserialize)]
340 | pub struct FilesInDumpResponse {
341 |   /// The Account ID these files are for.
342 |   #[serde(rename = "accountId")]
343 |   pub account_id: String,
344 |   /// When these files expire.
345 |   pub expires: i64,
346 |   /// The sequence of the dump these files are apart of.
347 |   pub sequence: i64,
348 |   /// When these files were last updated.
349 |   #[serde(rename = "updatedAt")]
350 |   pub updated_at: DateTime<Utc>,
351 |   /// The schema version these files are at.
352 |   #[serde(rename = "schemaVersion")]
353 |   pub schema_version: String,
354 |   /// The number of files that exist.
355 |   #[serde(rename = "numFiles")]
356 |   pub num_files: i64,
357 |   /// When the dump was created these files are apart of.
358 |   #[serde(rename = "createdAt")]
359 |   pub created_at: DateTime<Utc>,
360 |   /// The Dump ID these files are related to.
361 |   #[serde(rename = "dumpId")]
362 |   pub dump_id: String,
363 |   /// Whether the dump is finished or not.
364 |   pub finished: bool,
365 |   /// A list of the "artifacts" or files per table.
366 |   #[serde(rename = "artifactsByTable")]
367 |   pub artifacts_by_table: BTreeMap<String, ArtifactByTable>,
368 | }
369 | unsafe impl Send for FilesInDumpResponse {}
370 | 
371 | /// A list of artifacts per table.
372 | #[derive(Clone, Debug, Deserialize)]
373 | pub struct ArtifactByTable {
374 |   /// The table name these artifacts are apart of.
375 |   #[serde(rename = "tableName")]
376 |   pub table_name: String,
377 |   /// Whether or not this is a partial table.
378 |   pub partial: bool,
379 |   /// A List of files for this table.
380 |   pub files: Vec<BasicFile>,
381 | }
382 | unsafe impl Send for ArtifactByTable {}
383 | 
384 | /// A File object returned in ArtifactsByTable.
385 | #[derive(Clone, Debug, Deserialize)]
386 | pub struct BasicFile {
387 |   /// The URL for this file to download from.
388 |   pub url: String,
389 |   /// The filename of this file.
390 |   pub filename: String,
391 | }
392 | unsafe impl Send for BasicFile {}
393 | 
394 | /// The Schema Definition returned by Canvas Data.
395 | #[derive(Clone, Debug, Deserialize)]
396 | pub struct SchemaDefinition {
397 |   /// The Version of the schema.
398 |   pub version: String,
399 |   /// The Actual Schema Object itself.
400 |   pub schema: BTreeMap<String, TableDefinition>,
401 | }
402 | unsafe impl Send for SchemaDefinition {}
403 | 
404 | /// A Definition for a Table returned by the Schema API.
405 | #[derive(Clone, Debug, Deserialize)]
406 | pub struct TableDefinition {
407 |   /// The DW Type (dimension, or fact).
408 |   pub dw_type: String,
409 |   /// An optional Description of the table.
410 |   pub description: Option<String>,
411 |   /// Any hints about how a table, almost always empty, may occasionally provide a sort key, or something of the like.
412 |   pub hints: BTreeMap<String, String>,
413 |   /// Whther this table is incremental.
414 |   pub incremental: bool,
415 |   /// The table name of this table,
416 |   #[serde(rename = "tableName")]
417 |   pub table_name: String,
418 |   /// A List of it's columns.
419 |   pub columns: Vec<ColumnDefinition>,
420 | }
421 | unsafe impl Send for TableDefinition {}
422 | 
423 | /// A Definition for a Column returned by the Schema API.
424 | #[derive(Clone, Debug, Deserialize)]
425 | pub struct ColumnDefinition {
426 |   /// The Type this column is.
427 |   #[serde(rename = "type")]
428 |   pub db_type: String,
429 |   /// An optional description of this column.
430 |   pub description: Option<String>,
431 |   /// The name of this column/
432 |   pub name: String,
433 |   /// An optional length to apply to this column.
434 |   pub length: Option<i64>,
435 |   /// Optional information about the dimension.
436 |   pub dimension: Option<DimensionDefinition>,
437 | }
438 | unsafe impl Send for ColumnDefinition {}
439 | 
440 | /// Dimension information returned by the Schema API.
441 | #[derive(Clone, Debug, Deserialize)]
442 | pub struct DimensionDefinition {
443 |   /// The name of this dimension.
444 |   pub name: String,
445 |   /// The ID of this dimension.
446 |   pub id: String,
447 |   /// An optional role to attach to this dimension.
448 |   pub role: Option<String>,
449 | }
450 | unsafe impl Send for DimensionDefinition {}
451 | 


--------------------------------------------------------------------------------
/src/db_client.rs:
--------------------------------------------------------------------------------
  1 | //! Provides the Database Client for the CDL Runner.
  2 | //! This will control all the connections/inserts/updates/etc.
  3 | 
  4 | use errors::*;
  5 | use r2d2::{ManageConnection, Pool};
  6 | use std::clone::Clone;
  7 | use std::collections::BTreeMap;
  8 | use settings::{DatabaseType, Settings};
  9 | use type_converter::get_cast_as;
 10 | 
 11 | #[cfg(feature = "postgres_compat")]
 12 | use r2d2_postgres::{TlsMode, PostgresConnectionManager};
 13 | 
 14 | #[cfg(feature = "mysql_compat")]
 15 | use mysql_pool::{CreateManager, MysqlConnectionManager};
 16 | 
 17 | /// The Database Client Structure.
 18 | pub struct DatabaseClient<T: ManageConnection> {
 19 |   /// The Type of the Database.
 20 |   pub db_type: DatabaseType,
 21 |   /// The Underlying Connection Pool.
 22 |   underlying_pool: Pool<T>,
 23 | }
 24 | 
 25 | impl<T: ManageConnection> Clone for DatabaseClient<T> {
 26 |   fn clone(&self) -> DatabaseClient<T> {
 27 |     DatabaseClient {
 28 |       db_type: self.db_type.clone(),
 29 |       underlying_pool: self.underlying_pool.clone(),
 30 |     }
 31 |   }
 32 | }
 33 | 
 34 | /// Something the importer can use to talk to the database.
 35 | pub trait ImportDatabaseAdapter {
 36 |   /// Gets the Database Type.
 37 |   fn get_db_type(&self) -> DatabaseType;
 38 | 
 39 |   /// Drops a Table in the Database.
 40 |   ///
 41 |   /// * `table_name` - The Table name to Drop.
 42 |   fn drop_table(&self, table_name: String) -> Result<()>;
 43 | 
 44 |   /// Creates a Table in the Database.
 45 |   ///
 46 |   /// * `table_name` - The Table name to Create.
 47 |   /// * `columns` - The column definition to create <column_name, column_type>.
 48 |   fn create_table(&self, table_name: String, columns: BTreeMap<String, String>) -> Result<()>;
 49 | 
 50 |   /// Drops a Record in the Database.
 51 |   ///
 52 |   /// * `table_name` - The Table Name to drop from.
 53 |   /// * `column_types` - The types of columns
 54 |   /// * `column_name` - The column name to use in the WHERE clause.
 55 |   /// * `value` - The columnv value to use in the WHERE clause.
 56 |   fn drop_record(
 57 |     &self,
 58 |     table_name: String,
 59 |     column_types: BTreeMap<String, String>,
 60 |     column_name: String,
 61 |     value: String,
 62 |   ) -> Result<()>;
 63 | 
 64 |   /// Inserts a Record into the Database.
 65 |   ///
 66 |   /// * `table_name` - The table name to insert the record into.
 67 |   /// * `columns` - The columns to insert into the table <column_name, column_value>.
 68 |   /// * `column_types` - The types of columns to use.
 69 |   fn insert_record(
 70 |     &self,
 71 |     table_name: String,
 72 |     column_types: BTreeMap<String, String>,
 73 |     columns: BTreeMap<String, Option<String>>,
 74 |   ) -> Result<()>;
 75 | }
 76 | 
 77 | #[cfg(feature = "postgres_compat")]
 78 | impl DatabaseClient<PostgresConnectionManager> {
 79 |   /// Creates a New Database Client for Postgres.
 80 |   ///
 81 |   /// `settings` - The underlying settings object to configure ourselves with.
 82 |   pub fn new(settings: &Settings) -> Result<DatabaseClient<PostgresConnectionManager>> {
 83 |     let manager = PostgresConnectionManager::new(settings.get_database_url(), TlsMode::None);
 84 |     if manager.is_err() {
 85 |       return Err(ErrorKind::PostgresErr.into());
 86 |     }
 87 |     let manager = manager.unwrap();
 88 |     let pool = Pool::new(manager).expect(
 89 |       "Failed to turn connection into pool. This should never happen",
 90 |     );
 91 |     Ok(DatabaseClient::<PostgresConnectionManager> {
 92 |       db_type: DatabaseType::Psql,
 93 |       underlying_pool: pool,
 94 |     })
 95 |   }
 96 | }
 97 | 
 98 | #[cfg(feature = "mysql_compat")]
 99 | impl DatabaseClient<MysqlConnectionManager> {
100 |   /// Creates a New Database Client for Mysql.
101 |   ///
102 |   /// `settings` - The underlying settings object to configure ourselves with.
103 |   pub fn new(settings: &Settings) -> Result<DatabaseClient<MysqlConnectionManager>> {
104 |     let manager = MysqlConnectionManager::new(settings.get_database_url().as_str());
105 |     if manager.is_err() {
106 |       return Err(ErrorKind::MysqlErr.into());
107 |     }
108 |     let manager = manager.unwrap();
109 |     let pool = Pool::new(manager).expect(
110 |       "Failed to turn a connection into pool. This should never happen",
111 |     );
112 |     Ok(DatabaseClient::<MysqlConnectionManager> {
113 |       db_type: DatabaseType::Mysql,
114 |       underlying_pool: pool,
115 |     })
116 |   }
117 | }
118 | 
119 | #[cfg(feature = "postgres_compat")]
120 | impl ImportDatabaseAdapter for DatabaseClient<PostgresConnectionManager> {
121 |   fn get_db_type(&self) -> DatabaseType {
122 |     trace!("get_db_type was called");
123 |     self.db_type.clone()
124 |   }
125 | 
126 |   fn drop_table(&self, table_name: String) -> Result<()> {
127 |     trace!("drop_table was called for: [ {} ]", table_name);
128 |     // Get a aconnection from the pool.
129 |     let connection = self.underlying_pool.get();
130 |     if connection.is_err() {
131 |       return Err(ErrorKind::PostgresErr.into());
132 |     }
133 |     let connection = connection.unwrap();
134 | 
135 |     // Execute drop table statement.
136 |     let result = connection.execute(&format!("DROP TABLE IF EXISTS {}", table_name), &[]);
137 |     if result.is_err() {
138 |       error!("drop_table err");
139 |       error!("{:?}", result.err().unwrap());
140 |       return Err(ErrorKind::PostgresErr.into());
141 |     } else {
142 |       trace!("drop_table was successful");
143 |       return Ok(());
144 |     }
145 |   }
146 | 
147 |   fn create_table(&self, table_name: String, columns: BTreeMap<String, String>) -> Result<()> {
148 |     trace!("create_table was called for: [ {} ]", table_name);
149 |     // Get a Connection from the underlying DB Connection Pool.
150 |     let connection = self.underlying_pool.get();
151 |     if connection.is_err() {
152 |       return Err(ErrorKind::PostgresErr.into());
153 |     }
154 |     let connection = connection.unwrap();
155 | 
156 |     // Create the create table statement. `default` is reseverd word, so replace with
157 |     // `_default`.
158 |     let mut creation_string = format!("CREATE TABLE IF NOT EXISTS {} (\n", table_name);
159 |     for (key, val) in columns.into_iter() {
160 |       creation_string += &format!("{} {},\n", key.replace("default", "_default"), val);
161 |     }
162 |     // Cut off the newline + trailing comma.
163 |     let len = creation_string.len();
164 |     creation_string.truncate(len - 2);
165 |     // Append final parentheses.
166 |     creation_string += ")";
167 |     trace!(
168 |       "Using the following creation string: \n {}",
169 |       creation_string
170 |     );
171 | 
172 |     // Execute Create Table Statement.
173 |     let result = connection.execute(&creation_string, &[]);
174 |     if result.is_err() {
175 |       error!("create_table err");
176 |       error!("{:?}", result.err().unwrap());
177 |       return Err(ErrorKind::PostgresErr.into());
178 |     } else {
179 |       trace!("create_table was successful!");
180 |       return Ok(());
181 |     }
182 |   }
183 | 
184 |   fn drop_record(
185 |     &self,
186 |     table_name: String,
187 |     column_types: BTreeMap<String, String>,
188 |     column_name: String,
189 |     value: String,
190 |   ) -> Result<()> {
191 |     trace!(
192 |       "Drop record was called for table: {} on column: {} with value: {}",
193 |       table_name,
194 |       column_name,
195 |       value
196 |     );
197 |     // Get a Connection from the underlying pool.
198 |     let connection = self.underlying_pool.get();
199 |     if connection.is_err() {
200 |       return Err(ErrorKind::PostgresErr.into());
201 |     }
202 |     let connection = connection.unwrap();
203 | 
204 |     // Prepare a statemtn for deleting from a table.
205 |     let mut prepared =
206 |       format!(
207 |       "DELETE FROM {} WHERE {} = ",
208 |       table_name,
209 |       column_name.clone(),
210 |     );
211 |     let the_type = column_types.get(&column_name).unwrap();
212 | 
213 |     // Make sure the column gets inserted as the right type to prevent db errors.
214 |     let cast_as = get_cast_as(the_type.to_owned(), self.db_type.clone());
215 |     if cast_as == "" {
216 |       prepared += &format!("{:?}", value.replace("'", "").replace("\"", "")).replace("\"", "'");
217 |     } else {
218 |       prepared += &format!(
219 |         "{:?}::{}",
220 |         value.replace("'", "").replace("\"", ""),
221 |         cast_as
222 |       ).replace("\"", "'");
223 |     }
224 | 
225 |     // Execute the preapred delete statement.
226 |     let statement = connection.execute(&prepared, &[]);
227 |     if statement.is_err() {
228 |       error!("drop_record err");
229 |       error!("{:?}", statement.err().unwrap());
230 |       return Err(ErrorKind::PostgresErr.into());
231 |     } else {
232 |       return Ok(());
233 |     }
234 |   }
235 | 
236 |   fn insert_record(
237 |     &self,
238 |     table_name: String,
239 |     column_types: BTreeMap<String, String>,
240 |     columns: BTreeMap<String, Option<String>>,
241 |   ) -> Result<()> {
242 |     trace!("insert_record was called for table: {}", table_name);
243 |     // Get a connection from the underlying pool.
244 |     let connection = self.underlying_pool.get();
245 |     if connection.is_err() {
246 |       return Err(ErrorKind::PostgresErr.into());
247 |     }
248 |     let connection = connection.unwrap();
249 | 
250 |     // Create the insert into statement.
251 |     let mut insert_string = format!("INSERT INTO {} (", table_name);
252 |     let mut types = BTreeMap::new();
253 | 
254 |     // We need to know all the types of the keys for the INSERT INTO () VALUES ()
255 |     for (pos, key) in columns.keys().enumerate() {
256 |       insert_string += &format!("{},", key.replace("default", "_default"));
257 |       types.insert(pos, column_types.get(key).unwrap().to_owned());
258 |     }
259 |     let mut len = insert_string.len();
260 |     // Remove Trailing Comma.
261 |     insert_string.truncate(len - 1);
262 | 
263 |     // Loop over actual values.
264 |     insert_string += ") VALUES (";
265 |     for (pos, val) in columns.values().enumerate() {
266 |       // Handle Nulls
267 |       if val.is_none() {
268 |         insert_string += "NULL,";
269 |       } else {
270 |         let the_type = types.get(&pos).unwrap();
271 |         // Cast the value as the right type.
272 |         let cast_as = get_cast_as(the_type.to_owned(), self.db_type.clone());
273 |         if cast_as == "" {
274 |           insert_string += &format!(
275 |             "{:?},",
276 |             val.clone().unwrap().replace("'", "").replace("\"", "")
277 |           ).replace("\"", "'");
278 |         } else {
279 |           insert_string += &format!(
280 |             "{:?}::{},",
281 |             val.clone().unwrap().replace("'", "").replace("\"", ""),
282 |             cast_as
283 |           ).replace("\"", "'");
284 |         }
285 |       }
286 |     }
287 |     len = insert_string.len();
288 | 
289 |     // Remove Trailing Comma.
290 |     insert_string.truncate(len - 1);
291 |     insert_string += ")";
292 |     debug!("Insert_record string looks like: \n {}", insert_string);
293 | 
294 |     // Execute.
295 |     let statement = connection.execute(&insert_string, &[]);
296 |     if statement.is_err() {
297 |       error!("insert error");
298 |       error!("{:?}", statement.err().unwrap());
299 |       return Err(ErrorKind::PostgresErr.into());
300 |     } else {
301 |       return Ok(());
302 |     }
303 |   }
304 | }
305 | 
306 | 
307 | #[cfg(feature = "mysql_compat")]
308 | impl ImportDatabaseAdapter for DatabaseClient<MysqlConnectionManager> {
309 |   fn get_db_type(&self) -> DatabaseType {
310 |     trace!("get_db_type was called");
311 |     self.db_type.clone()
312 |   }
313 | 
314 |   fn drop_table(&self, table_name: String) -> Result<()> {
315 |     trace!("drop_table was called for: [ {} ]", table_name);
316 | 
317 |     // Get connection from the underlying pool.
318 |     let connection = self.underlying_pool.get();
319 |     if connection.is_err() {
320 |       return Err(ErrorKind::MysqlErr.into());
321 |     }
322 |     let mut connection = connection.unwrap();
323 | 
324 |     // Create DropTable statement.
325 |     let result = connection.query(&format!("DROP TABLE IF EXISTS {}", table_name));
326 |     if result.is_err() {
327 |       error!("drop_table err");
328 |       error!("{:?}", result.err().unwrap());
329 |       return Err(ErrorKind::MysqlErr.into());
330 |     } else {
331 |       trace!("drop_table was successful");
332 |       return Ok(());
333 |     }
334 |   }
335 | 
336 |   fn create_table(&self, table_name: String, columns: BTreeMap<String, String>) -> Result<()> {
337 |     trace!("create_table was called for: [ {} ]", table_name);
338 |     // Get connection from the underlying pool.
339 |     let connection = self.underlying_pool.get();
340 |     if connection.is_err() {
341 |       return Err(ErrorKind::MysqlErr.into());
342 |     }
343 |     let mut connection = connection.unwrap();
344 | 
345 |     // Form Creation String. `default`, and `generated` are reserved words.
346 |     let mut creation_string = format!("CREATE TABLE IF NOT EXISTS {} (\n", table_name);
347 |     for (key, val) in columns.into_iter() {
348 |       creation_string += &format!(
349 |         "{} {},\n",
350 |         key.replace("default", "_default").replace(
351 |           "generated",
352 |           "_generated",
353 |         ),
354 |         val
355 |       );
356 |     }
357 |     let len = creation_string.len();
358 |     // Remove Trailing newline, and comma.
359 |     creation_string.truncate(len - 2);
360 |     // Ensure Character set is utf8mb4.
361 |     creation_string += ") CHARACTER SET utf8mb4";
362 |     trace!(
363 |       "Using the following creation string: \n {}",
364 |       creation_string
365 |     );
366 | 
367 |     // Execute.
368 |     let result = connection.query(&creation_string);
369 |     if result.is_err() {
370 |       error!("create_table err");
371 |       error!("{:?}", result.err().unwrap());
372 |       return Err(ErrorKind::MysqlErr.into());
373 |     } else {
374 |       trace!("create_table was successful!");
375 |       return Ok(());
376 |     }
377 |   }
378 | 
379 |   fn drop_record(
380 |     &self,
381 |     table_name: String,
382 |     column_types: BTreeMap<String, String>,
383 |     column_name: String,
384 |     value: String,
385 |   ) -> Result<()> {
386 |     trace!(
387 |       "Drop record was called for table: {} on column: {} with value: {}",
388 |       table_name,
389 |       column_name,
390 |       value
391 |     );
392 |     // Grab a Connection from the pool.
393 |     let connection = self.underlying_pool.get();
394 |     if connection.is_err() {
395 |       return Err(ErrorKind::MysqlErr.into());
396 |     }
397 |     let mut connection = connection.unwrap();
398 | 
399 |     // Start Preparing a Delete from statement.
400 |     let mut prepared =
401 |       format!(
402 |       "DELETE FROM {} WHERE {} = ",
403 |       table_name,
404 |       column_name.clone(),
405 |     );
406 |     let the_type = column_types.get(&column_name).unwrap();
407 | 
408 |     // Cast the type correctly.
409 |     let cast_as = get_cast_as(the_type.to_owned(), self.db_type.clone());
410 |     if cast_as == "" {
411 |       prepared += &format!("{:?}", value.replace("'", "").replace("\"", "")).replace("\"", "'");
412 |     } else {
413 |       prepared += &format!(
414 |         "CAST({:?} as {})",
415 |         value.replace("'", "").replace("\"", ""),
416 |         cast_as
417 |       ).replace("\"", "'");
418 |     }
419 | 
420 |     // Execute.
421 |     let statement = connection.query(&prepared);
422 |     if statement.is_err() {
423 |       error!("drop_record err");
424 |       error!("{:?}", statement.err().unwrap());
425 |       return Err(ErrorKind::MysqlErr.into());
426 |     } else {
427 |       return Ok(());
428 |     }
429 |   }
430 | 
431 |   fn insert_record(
432 |     &self,
433 |     table_name: String,
434 |     column_types: BTreeMap<String, String>,
435 |     columns: BTreeMap<String, Option<String>>,
436 |   ) -> Result<()> {
437 |     trace!("insert_record was called for table: {}", table_name);
438 |     // Get connection from the underlying pool.
439 |     let connection = self.underlying_pool.get();
440 |     if connection.is_err() {
441 |       return Err(ErrorKind::PostgresErr.into());
442 |     }
443 |     let mut connection = connection.unwrap();
444 | 
445 |     // Start Preparing insert into statements.
446 |     let mut insert_string = format!("INSERT INTO {} (", table_name);
447 |     let mut types = BTreeMap::new();
448 | 
449 |     // We need the types for INSERT INTO () VALUES (). Get Those.
450 |     for (pos, key) in columns.keys().enumerate() {
451 |       insert_string += &format!(
452 |         "{},",
453 |         key.replace("default", "_default").replace(
454 |           "generated",
455 |           "_generated",
456 |         )
457 |       );
458 |       types.insert(pos, column_types.get(key).unwrap().to_owned());
459 |     }
460 |     let mut len = insert_string.len();
461 |     // Remove trailing comma.
462 |     insert_string.truncate(len - 1);
463 | 
464 |     // Start Inserting Values.
465 |     insert_string += ") VALUES (";
466 |     for (pos, val) in columns.values().enumerate() {
467 |       if val.is_none() {
468 |         // Handle NULLs.
469 |         insert_string += "NULL,";
470 |       } else {
471 |         let the_type = types.get(&pos).unwrap();
472 |         // Cast the type correctly.
473 |         let cast_as = get_cast_as(the_type.to_owned(), self.db_type.clone());
474 |         if cast_as == "" {
475 |           insert_string += &format!(
476 |             "{:?},",
477 |             val.clone().unwrap().replace("'", "").replace("\"", "")
478 |           ).replace("\"", "'");
479 |         } else {
480 |           insert_string += &format!(
481 |             "CAST({:?} AS {}),",
482 |             val.clone().unwrap().replace("'", "").replace("\"", ""),
483 |             cast_as
484 |           ).replace("\"", "'");
485 |         }
486 |       }
487 |     }
488 |     len = insert_string.len();
489 |     // Remove trailing commas.
490 |     insert_string.truncate(len - 1);
491 |     insert_string += ")";
492 |     debug!("Insert_record string looks like: \n {}", insert_string);
493 | 
494 |     // Execute.
495 |     let statement = connection.query(&insert_string);
496 |     if statement.is_err() {
497 |       error!("insert error");
498 |       error!("{:?}", statement.err().unwrap());
499 |       return Err(ErrorKind::MysqlErr.into());
500 |     } else {
501 |       return Ok(());
502 |     }
503 |   }
504 | }
505 | 


--------------------------------------------------------------------------------
/src/errors.rs:
--------------------------------------------------------------------------------
 1 | //! Provides all errors for the cdl-runner crate.
 2 | 
 3 | use glob;
 4 | use reqwest;
 5 | use std::io;
 6 | 
 7 | error_chain! {
 8 | 
 9 |   errors {
10 |     InvalidTypeToConvert(the_type: String) {
11 |       description("Cannot convert type to a Database Type!")
12 |       display("Invalid Type: [ {} ] to convert to DB", the_type)
13 |     }
14 | 
15 |     PostgresErr {
16 |       description("Underlying postgres error!")
17 |       display("Underlying postgres error!")
18 |     }
19 | 
20 |     MysqlErr {
21 |       description("Underlying Mysql error!")
22 |       display("Underlying Mysql error!")
23 |     }
24 | 
25 |     ImportErr {
26 |       description("Underlying import errror!")
27 |       display("Underlying import error!")
28 |     }
29 |   }
30 | 
31 |   foreign_links {
32 |     Globerror(glob::PatternError);
33 |     HttpError(reqwest::Error);
34 |     HttpUrlError(reqwest::UrlError);
35 |     Ioerror(io::Error);
36 |   }
37 | 
38 | }
39 | 


--------------------------------------------------------------------------------
/src/importer.rs:
--------------------------------------------------------------------------------
  1 | //! Actually imports the data into a database.
  2 | 
  3 | use api_client::{CanvasDataApiClient, TableDefinition};
  4 | use db_client::ImportDatabaseAdapter;
  5 | use errors::*;
  6 | use flate2::read::GzDecoder;
  7 | use glob::glob;
  8 | use rayon::prelude::*;
  9 | use std::collections::BTreeMap;
 10 | use std::fs::File;
 11 | use std::io::prelude::*;
 12 | use std::sync::atomic::{AtomicBool, Ordering};
 13 | use type_converter::convert_type_for_db;
 14 | 
 15 | lazy_static! {
 16 |   /// A list of tables that may not have constant IDs, or
 17 |   /// single field PKs, and as such need to be dropped/recreated
 18 |   /// on each import.
 19 |   static ref VOLATILE_TABLES: Vec<String> = vec![
 20 |     "module_completion_requirement_fact".to_owned(),
 21 |     "module_fact".to_owned(),
 22 |     "module_item_fact".to_owned(),
 23 |     "module_prerequisite_fact".to_owned(),
 24 |     "module_progression_completion_requirement_fact".to_owned(),
 25 |     "module_progression_fact".to_owned(),
 26 |     "quiz_fact".to_owned(),
 27 |     "quiz_question_answer_fact".to_owned(),
 28 |     "quiz_question_fact".to_owned(),
 29 |     "quiz_question_group_fact".to_owned(),
 30 |     "quiz_submission_fact".to_owned(),
 31 |     "quiz_submission_historical_fact".to_owned(),
 32 |     "module_completion_requirement_dim".to_owned(),
 33 |     "module_dim".to_owned(),
 34 |     "module_item_dim".to_owned(),
 35 |     "module_prerequisite_dim".to_owned(),
 36 |     "module_progression_completion_requirement_dim".to_owned(),
 37 |     "module_progression_dim".to_owned(),
 38 |     "quiz_dim".to_owned(),
 39 |     "quiz_question_answer_dim".to_owned(),
 40 |     "quiz_question_dim".to_owned(),
 41 |     "quiz_question_group_dim".to_owned(),
 42 |     "quiz_submission_dim".to_owned(),
 43 |     "quiz_submission_historical_dim".to_owned(),
 44 |     "submission_comment_participant_dim".to_owned(),
 45 |     "requests".to_owned(),
 46 |     "assignment_override_user_rollup_fact".to_owned(),
 47 |     "enrollment_rollup_dim".to_owned(),
 48 |   ];
 49 | }
 50 | 
 51 | /// The Root Importer Object.
 52 | pub struct Importer<T: ImportDatabaseAdapter> {
 53 |   /// The Canvas Data API Client.
 54 |   api_client: CanvasDataApiClient,
 55 |   /// The Dump ID to process.
 56 |   dump_id: String,
 57 |   /// The location of where to save stuff.
 58 |   save_location: String,
 59 |   /// The Importing Database Adapter.
 60 |   db_adapter: T,
 61 | }
 62 | unsafe impl<T: ImportDatabaseAdapter> Send for Importer<T> {}
 63 | unsafe impl<T: ImportDatabaseAdapter> Sync for Importer<T> {}
 64 | 
 65 | /// A representation of the filenaame.
 66 | struct FileNameSplit {
 67 |   /// The Table name of this file.
 68 |   pub table_name: String,
 69 |   /// The part of the internal shard for this file.
 70 |   pub sharded_part: String,
 71 |   /// The part of the internal hash for this file.
 72 |   pub hash_part: String,
 73 |   /// The extension for this file.
 74 |   pub extension: String,
 75 | }
 76 | 
 77 | impl FileNameSplit {
 78 |   /// Split a file name that has been downloaded up into pieces to match on table names, and such easier.
 79 |   ///
 80 |   /// * `split_from` - The filename to go ahead, and split.
 81 |   pub fn new(split_from: String) -> Option<Self> {
 82 |     if split_from.find("-").is_none() {
 83 |       return None;
 84 |     }
 85 |     let as_split: Vec<_> = split_from.split("-").collect();
 86 |     if as_split.len() != 3 {
 87 |       return None;
 88 |     }
 89 |     let to_split_part = as_split[2].to_owned();
 90 |     let part_with_file_extension: Vec<_> = to_split_part.split(".").collect();
 91 |     let hash_part_frd = part_with_file_extension[0].to_owned();
 92 |     let extension_frd = part_with_file_extension[1].to_owned();
 93 | 
 94 |     Some(FileNameSplit {
 95 |       table_name: as_split[0].to_owned(),
 96 |       sharded_part: as_split[1].to_owned(),
 97 |       hash_part: hash_part_frd,
 98 |       extension: extension_frd,
 99 |     })
100 |   }
101 | }
102 | 
103 | impl<T: ImportDatabaseAdapter> Importer<T> {
104 |   /// Creates a new Importer.
105 |   ///
106 |   /// * `api_client` - The API Client to use.
107 |   /// * `db_adapter` - The Database Adapter to Import Into.
108 |   /// * `dump_id` - The Dump ID to import.
109 |   /// * `save_location` - The Save location.
110 |   pub fn new(api_client: CanvasDataApiClient, db_adapter: T, dump_id: String, save_location: String) -> Self {
111 |     Importer {
112 |       api_client: api_client,
113 |       dump_id: dump_id,
114 |       save_location: save_location,
115 |       db_adapter: db_adapter,
116 |     }
117 |   }
118 | 
119 |   /// Gets the table info from the definition.
120 |   ///
121 |   /// Gets the table info we need for processing from the definition. Specifically returns the
122 |   /// (<Column Names>, <Column Name, Column Type>) items.
123 |   ///
124 |   /// * `table_def` - The Table Definition.
125 |   fn get_table_info_from_def(&self, table_def: TableDefinition) -> (Vec<String>, BTreeMap<String, String>) {
126 |     let mut finalized_vec = Vec::new();
127 |     let mut finalized_map = BTreeMap::new();
128 | 
129 |     for column in table_def.columns.iter() {
130 |       finalized_vec.push(column.name.clone());
131 |       finalized_map.insert(
132 |         column.name.clone(),
133 |         convert_type_for_db(column.db_type.clone(), self.db_adapter.get_db_type())
134 |           .expect("Failed to Convert Type for DB!"),
135 |       );
136 |     }
137 | 
138 |     (finalized_vec, finalized_map)
139 |   }
140 | 
141 |   /// Gets an "ID" Like column from a list of columns, and a table name.
142 |   ///
143 |   /// Used to automatically "guess" a primary key for a table since our methods of naming in the schema
144 |   /// are mostly deterministic.
145 |   ///
146 |   /// * `table_name` - the name of the table these columns provide for.
147 |   /// * `columns` - A Reference to the list of columns.
148 |   fn get_id_like_column_from_columns(
149 |     &self,
150 |     table_name: String,
151 |     columns: &BTreeMap<String, Option<String>>,
152 |   ) -> Option<String> {
153 |     debug!("Finding ID Like column for: {}", table_name);
154 |     // Check if we have an ID Column. If so, that's what we should use.
155 |     if columns.contains_key("id") {
156 |       debug!("Has ID Column!");
157 |       return Some("id".to_owned());
158 |     } else {
159 |       debug!("Looking up name!");
160 |       // Other tables are labeled like assignment_fact, and have assignment_id. Handle those.
161 |       let find_table_name_potential = table_name.rfind("_");
162 |       if find_table_name_potential.is_some() {
163 |         let (the_final_table_name, _) = table_name
164 |           .split_at(find_table_name_potential.unwrap())
165 |           .to_owned();
166 |         debug!("Looking up: {}_id", the_final_table_name);
167 |         if columns.contains_key(&format!("{}_id", the_final_table_name.clone())) {
168 |           debug!("Found per table ID!");
169 |           return Some(format!("{}_id", the_final_table_name));
170 |         }
171 |         let find_final_table_name_potential = the_final_table_name.rfind("_");
172 |         if find_final_table_name_potential.is_some() {
173 |           let (the_final_table_name_frd, _) = the_final_table_name
174 |             .split_at(find_final_table_name_potential.unwrap())
175 |             .to_owned();
176 |           debug!("Looking up: {}_id", the_final_table_name_frd);
177 |           if columns.contains_key(&format!("{}_id", the_final_table_name_frd.clone())) {
178 |             debug!("Found per table ID!");
179 |             return Some(format!("{}_id", the_final_table_name_frd));
180 |           }
181 |         }
182 |       }
183 |     }
184 |     debug!("No ID Found!");
185 |     None
186 |   }
187 | 
188 |   /// Processes a Dump. Aka Imports it.
189 |   pub fn process(&self, is_all_volatile: bool) -> Result<()> {
190 |     trace!("Process Called for dump: {}", self.dump_id);
191 | 
192 |     // Download the Files for this dump.
193 |     try!(self.api_client.download_files_for_dump(
194 |       self.dump_id.clone(),
195 |     ));
196 | 
197 |     // Glob to find downloaded files.
198 |     let saved_location_glob = format!("{}/{}/*.gz", &self.save_location, &self.dump_id);
199 |     let mut collected: Vec<_> = try!(glob(&saved_location_glob)).collect();
200 | 
201 |     // Keep a seperate have failed for our iterator, and the tables we've already dropped.
202 |     // Don't want to drop a table multiple times.
203 |     let has_failed = AtomicBool::from(false);
204 | 
205 |     // Drop tables first if first.
206 |     collected.iter_mut().map(|entry| {
207 |       // If we've already failed, skip. Don't try to keep importing.
208 |       if has_failed.load(Ordering::Relaxed) {
209 |         trace!("Skipping Entry: {:?} , due to failing", entry);
210 |         return;
211 |       }
212 | 
213 |       if let &mut Ok(ref mut path) = entry {
214 |         let path_frd = path;
215 |         let file_name = path_frd.file_name().unwrap().to_str().unwrap().to_owned();
216 |         let file_name_split = FileNameSplit::new(file_name).unwrap();
217 | 
218 |         if VOLATILE_TABLES.contains(&file_name_split.table_name) || is_all_volatile {
219 |           let drop_res = self.db_adapter.drop_table(file_name_split.table_name);
220 |           if drop_res.is_err() {
221 |                 error!("process -> is_volatile -> drop_res -> is_err");
222 |                 error!("{:?}", drop_res.err().unwrap());
223 |                 has_failed.store(true, Ordering::Relaxed);
224 |                 return;
225 |           }
226 |         }
227 |       }
228 |     }).count();
229 | 
230 |     let _: Vec<_> = collected
231 |       .par_iter_mut()
232 |       .map(|entry| {
233 |         // If we've already failed, skip. Don't try to keep importing.
234 |         if has_failed.load(Ordering::Relaxed) {
235 |           trace!("Skipping Entry: {:?} , due to failing", entry);
236 |           return;
237 |         }
238 | 
239 |         // If we have a path of a downloaded file.
240 |         if let &mut Ok(ref mut path) = entry {
241 |           // Get the filename of the downloaded file, and parse it since filenames are determinsitic.
242 |           trace!("Got Path");
243 |           let path_frd = path.clone();
244 |           let file_name = path_frd.file_name().unwrap().to_str().unwrap().to_owned();
245 |           let file_name_split = FileNameSplit::new(file_name).unwrap();
246 |           trace!("Post Split!");
247 | 
248 |           // Get the table definition for the downloaded table we're looking at.
249 |           let table_def = self.api_client.get_table_definition(
250 |             file_name_split.table_name.clone(),
251 |           );
252 |           if table_def.is_err() {
253 |             error!("process -> table_def -> is_err");
254 |             error!("{:?}", table_def.err().unwrap());
255 |             has_failed.store(true, Ordering::Relaxed);
256 |             return;
257 |           }
258 |           let table_def = table_def.unwrap().unwrap();
259 |           let is_volatile_table = VOLATILE_TABLES.contains(&file_name_split.table_name) || is_all_volatile;
260 | 
261 |           // Get the columns for our table.
262 |           let (column_names, column_defs) = self.get_table_info_from_def(table_def);
263 |           trace!("Post Table Def!");
264 | 
265 |           // Open up the file for readaing.
266 |           let file = File::open(path_frd);
267 |           if file.is_err() {
268 |             error!("process -> file -> is_err");
269 |             error!("{:?}", file.err().unwrap());
270 |             has_failed.store(true, Ordering::Relaxed);
271 |             return;
272 |           }
273 |           let mut file = file.unwrap();
274 |           trace!("Post File Open");
275 | 
276 |           // Read the entire file into a buffer.
277 |           // TODO: Maybe oneday switch to a buffered reader?
278 |           let mut buffer = Vec::new();
279 |           let res = file.read_to_end(&mut buffer);
280 |           if res.is_err() {
281 |             error!("process -> res -> is_err");
282 |             error!("{:?}", res.err().unwrap());
283 |             has_failed.store(true, Ordering::Relaxed);
284 |             return;
285 |           }
286 |           trace!("Post Reader");
287 | 
288 |           // Uncompress the file.
289 |           let mut decoder = GzDecoder::new(buffer.as_slice());
290 |           trace!("Post Decoder Init");
291 |           let mut finalized_string = String::new();
292 |           let decode_res = decoder.read_to_string(&mut finalized_string);
293 |           if decode_res.is_err() {
294 |             error!("prcoess -> decode_res -> is_err");
295 |             error!("{:?}", decode_res.err().unwrap());
296 |             has_failed.store(true, Ordering::Relaxed);
297 |             return;
298 |           }
299 |           trace!("Post Decode to STR");
300 |           debug!("Decoded String: \n {:?}", finalized_string);
301 | 
302 |           // Create the table if it doesn't exist.
303 |           let create_res = self.db_adapter.create_table(
304 |             file_name_split.table_name.clone(),
305 |             column_defs.clone(),
306 |           );
307 |           if create_res.is_err() {
308 |             error!("prcoess -> create_res -> is_err");
309 |             error!("{:?}", create_res.err().unwrap());
310 |             has_failed.store(true, Ordering::Relaxed);
311 |             return;
312 |           }
313 |           trace!("Post create table");
314 | 
315 |           // For each line in this file.
316 |           for line in finalized_string.lines() {
317 |             trace!("Processing line: [ {:?} ]", line);
318 |             let mut columns = BTreeMap::new();
319 |             // Split by tabs, gather all columns.
320 |             let split_up_tsv_line: Vec<_> = line.split("\t").collect();
321 |             for (pos, name) in column_names.iter().enumerate() {
322 |               let mut split_up_line = Some(split_up_tsv_line[pos].to_owned());
323 |               if split_up_line.clone().unwrap().as_str() == "\\N" {
324 |                 split_up_line = None
325 |               }
326 |               columns.insert(name.to_owned(), split_up_line);
327 |             }
328 | 
329 |             trace!("Inserting Columns: [ {:?} ]", columns);
330 | 
331 |             if is_volatile_table {
332 |               // If we're volatile don't check if it exists already, just insert.
333 |               trace!("Is volatile table, performing insert");
334 |               let ins_res = self.db_adapter.insert_record(
335 |                 file_name_split.table_name.clone(),
336 |                 column_defs.clone(),
337 |                 columns,
338 |               );
339 |               if ins_res.is_err() {
340 |                 error!("process -> for line in finalized_string -> is_volatile -> ins_res -> is_err");
341 |                 error!("{:?}", ins_res.err().unwrap());
342 |                 has_failed.store(true, Ordering::Relaxed);
343 |                 return;
344 |               }
345 |             } else {
346 |               // Perform a diff if we're not volatile.
347 |               trace!("Is not volatile performing diff.");
348 | 
349 |               // Get the ID to diff by.
350 |               let id_like_column = self.get_id_like_column_from_columns(file_name_split.table_name.clone(), &columns);
351 |               if id_like_column.is_none() {
352 |                 error!("Failed to find table id like column!");
353 |                 has_failed.store(true, Ordering::Relaxed);
354 |                 return;
355 |               }
356 |               let id_like_column = id_like_column.unwrap();
357 |               let id_like_value = columns
358 |                 .get(&id_like_column)
359 |                 .unwrap()
360 |                 .clone()
361 |                 .unwrap()
362 |                 .to_owned();
363 |               trace!("Performing deletion request for id like column");
364 |               // Send delete request for that ID. on first time seeing this will be no op due to WHERE Clause.
365 |               let del_res = self.db_adapter.drop_record(
366 |                 file_name_split.table_name.clone(),
367 |                 column_defs.clone(),
368 |                 id_like_column,
369 |                 id_like_value,
370 |               );
371 |               if del_res.is_err() {
372 |                 error!("Failed to drop column!");
373 |                 has_failed.store(true, Ordering::Relaxed);
374 |                 return;
375 |               }
376 | 
377 |               // Insert the column to overwrite.
378 |               trace!("Performing insert");
379 |               let ins_res = self.db_adapter.insert_record(
380 |                 file_name_split.table_name.clone(),
381 |                 column_defs.clone(),
382 |                 columns,
383 |               );
384 |               if ins_res.is_err() {
385 |                 error!("process -> for line in finalized_string -> !is_volatile -> ins_res -> is_err");
386 |                 error!("{:?}", ins_res.err().unwrap());
387 |                 has_failed.store(true, Ordering::Relaxed);
388 |                 return;
389 |               }
390 |             }
391 |             trace!("Imported Line.");
392 |           }
393 |         }
394 |       })
395 |       .collect();
396 | 
397 |     debug!("Has Failed: {}", has_failed.load(Ordering::Relaxed));
398 | 
399 |     if !has_failed.load(Ordering::Relaxed) {
400 |       trace!("Hasn't Failed");
401 |       Ok(())
402 |     } else {
403 |       trace!("Has Failed!");
404 |       Err(ErrorKind::ImportErr.into())
405 |     }
406 |   }
407 | }
408 | 


--------------------------------------------------------------------------------
/src/main.rs:
--------------------------------------------------------------------------------
  1 | extern crate base64;
  2 | extern crate chrono;
  3 | extern crate config;
  4 | #[macro_use]
  5 | extern crate error_chain;
  6 | extern crate env_logger;
  7 | extern crate flate2;
  8 | extern crate futures;
  9 | extern crate glob;
 10 | #[macro_use]
 11 | extern crate lazy_static;
 12 | #[macro_use]
 13 | extern crate log;
 14 | extern crate r2d2;
 15 | extern crate rayon;
 16 | extern crate regex;
 17 | extern crate reqwest;
 18 | extern crate ring;
 19 | extern crate rocksdb;
 20 | extern crate serde;
 21 | #[macro_use]
 22 | extern crate serde_derive;
 23 | extern crate serde_json;
 24 | extern crate tokio_core;
 25 | 
 26 | #[cfg(feature = "postgres_compat")]
 27 | extern crate postgres;
 28 | #[cfg(feature = "postgres_compat")]
 29 | extern crate r2d2_postgres;
 30 | 
 31 | #[cfg(feature = "mysql_compat")]
 32 | extern crate mysql;
 33 | 
 34 | pub mod api_client;
 35 | pub mod db_client;
 36 | pub mod errors;
 37 | pub mod importer;
 38 | pub mod settings;
 39 | pub mod type_converter;
 40 | 
 41 | #[cfg(feature = "mysql_compat")]
 42 | pub mod mysql_pool;
 43 | 
 44 | use db_client::DatabaseClient;
 45 | use rocksdb::DB;
 46 | use settings::DatabaseType;
 47 | 
 48 | #[cfg(feature = "postgres_compat")]
 49 | use r2d2_postgres::PostgresConnectionManager;
 50 | 
 51 | #[cfg(feature = "mysql_compat")]
 52 | use mysql_pool::MysqlConnectionManager;
 53 | 
 54 | /// Entry Point to the application.
 55 | fn main() {
 56 |   env_logger::init();
 57 | 
 58 |   // Initalize Settings.
 59 |   let settings = settings::Settings::new();
 60 |   let has_errord = false;
 61 |   info!("Setting up API Client...");
 62 | 
 63 |   // Get the dump listing, and setup some variables for iteration.
 64 |   let api_client = api_client::CanvasDataApiClient::new(&settings);
 65 |   let mut dumps = api_client.get_dumps().expect("Failed to get List of Dumps");
 66 |   dumps.sort_by(|dump_one, dump_two| {
 67 |     dump_one.created_at.cmp(&dump_two.created_at)
 68 |   });
 69 |   let dumps_len = dumps.len();
 70 |   let only_final_dump = settings.get_should_only_load_final();
 71 |   let mut current_dumps_pos = 0;
 72 |   debug!("{:?}", dumps);
 73 | 
 74 |   // Connect to the local KV Store.
 75 |   info!("Connecting to RocksDB Store....");
 76 |   let whiskey = DB::open_default(settings.get_rocksdb_location()).expect("Failed to open RocksDB");
 77 | 
 78 |   // Get the latest schema.
 79 |   let latest_schema = api_client.get_latest_schema().expect(
 80 |     "Failed to fetch latest schema!",
 81 |   );
 82 |   let mut last_processed_schema = latest_schema.version.clone();
 83 |   let last_processed_schema_res = whiskey.get("last_version_processed".as_bytes());
 84 |   if let Ok(new_last_processed_schema_opt) =  last_processed_schema_res {
 85 |     if let Some(new_last_processed_schema_bytes) = new_last_processed_schema_opt {
 86 |       if let Some(new_last_processed_schema) = new_last_processed_schema_bytes.to_utf8() {
 87 |         last_processed_schema = new_last_processed_schema.to_owned();
 88 |       }
 89 |     }
 90 |   }
 91 | 
 92 |   let _: Vec<_> = dumps
 93 |     .into_iter()
 94 |     .map(|dump| {
 95 |       // Check if we're only importing the last dump.
 96 |       current_dumps_pos = current_dumps_pos + 1;
 97 |       if current_dumps_pos != dumps_len && only_final_dump {
 98 |         info!("Skipping dump: {} due to only final selected", dump.dump_id);
 99 |         return Ok(());
100 |       }
101 | 
102 |       // Check if another dump has failed importing already.
103 |       if has_errord {
104 |         info!(
105 |           "Skipping dump: {} due to previous failure in import",
106 |           dump.dump_id
107 |         );
108 |         return Err(());
109 |       }
110 | 
111 |       // Check if the dump has finished populating.
112 |       debug!("Entering debug loop for dump: {}", dump.dump_id);
113 |       if !dump.finished {
114 |         info!("Skipping dump: {} because it's not finished.", dump.dump_id);
115 |         return Ok(());
116 |       }
117 | 
118 |       // Check if we've already processed this dump.
119 |       let result = whiskey.get(
120 |         format!("dump_processed_{}", dump.dump_id.clone()).as_bytes(),
121 |       );
122 |       if result.is_err() {
123 |         error!("Failed to get value from Rocks!");
124 |         error!("{:?}", result.err().unwrap());
125 |         return Err(());
126 |       }
127 |       let is_potentially_processed = result.unwrap();
128 |       if is_potentially_processed.is_some() {
129 |         let potentially_processed = is_potentially_processed.unwrap();
130 |         let potentially_processed = potentially_processed.to_utf8();
131 |         if potentially_processed.is_some() {
132 |           let processed = potentially_processed.unwrap();
133 |           if processed == "successful" || processed == "out-of-date" {
134 |             info!("Skipping already processed dump: {}", dump.dump_id);
135 |             return Ok(());
136 |           }
137 |         }
138 |       }
139 | 
140 |       // Check if the dump queued for import is the correct schema version.
141 |       if latest_schema.version != dump.schema_version {
142 |         let _ = whiskey.put(
143 |           format!("dump_processed_{}", dump.dump_id.clone()).as_bytes(),
144 |           b"out-of-date",
145 |         );
146 |         return Ok(());
147 |       }
148 | 
149 |       // Get the files for this particular dump.
150 |       let files_in_dump = api_client.get_files_for_dump(dump.dump_id.clone());
151 |       if files_in_dump.is_err() {
152 |         info!("Failed to list files for dump. Skipping...");
153 |         return Ok(());
154 |       }
155 |       let files_in_dump = files_in_dump.unwrap();
156 | 
157 |       // Check if the dump is a historical refresh.
158 |       if api_client.is_historical_refresh(files_in_dump) && settings.get_should_skip_historical_imports() {
159 |         info!(
160 |           "Skipping dump: {} since it's a historical refresh",
161 |           dump.dump_id.clone()
162 |         );
163 |         let _ = whiskey.put(
164 |           format!("dump_processed_{}", dump.dump_id.clone()).as_bytes(),
165 |           b"successful",
166 |         );
167 |         return Ok(());
168 |       }
169 | 
170 |       // Set that we're attempting to improt this.
171 |       let _ = whiskey.put(
172 |         format!("dump_processed_{}", dump.dump_id.clone()).as_bytes(),
173 |         b"in_progress",
174 |       );
175 | 
176 |       // If we have postgres compatability, and are configured for postgres, import that.
177 |       if cfg!(feature = "postgres_compat") {
178 |         if settings.get_database_type() == DatabaseType::Psql {
179 |           info!("Connecting to the DB");
180 |           let db_client = db_client::DatabaseClient::<PostgresConnectionManager>::new(&settings)
181 |             .expect("Couldn't setup DB Client");
182 |           let importer = importer::Importer::<DatabaseClient<PostgresConnectionManager>>::new(
183 |             api_client.clone(),
184 |             db_client,
185 |             dump.dump_id.clone(),
186 |             settings.get_save_location(),
187 |           );
188 |           let res = if last_processed_schema.as_str() != latest_schema.version {
189 |             // If not latest schema. Volatile the table to ensure tables are the latest.
190 |             importer.process(true)
191 |           } else {
192 |             importer.process(settings.get_all_tables_volatile())
193 |           };
194 |           if res.is_ok() {
195 |             let _ = whiskey.put(
196 |               format!("dump_processed_{}", dump.dump_id).as_bytes(),
197 |               b"successful",
198 |             );
199 |             return Ok(());
200 |           } else {
201 |             let _ = whiskey.put(
202 |               format!("dump_processed_{}", dump.dump_id).as_bytes(),
203 |               b"failure",
204 |             );
205 |             return Err(());
206 |           }
207 |         }
208 |       }
209 | 
210 |       // If we have mysql compatability, and are configured for mysql, import that.
211 |       if cfg!(feature = "mysql_compat") {
212 |         if settings.get_database_type() == DatabaseType::Mysql {
213 |           info!("Connecting to the DB");
214 |           let db_client = db_client::DatabaseClient::<MysqlConnectionManager>::new(&settings)
215 |             .expect("Couldn't setup DB Client");
216 |           let importer = importer::Importer::<DatabaseClient<MysqlConnectionManager>>::new(
217 |             api_client.clone(),
218 |             db_client,
219 |             dump.dump_id.clone(),
220 |             settings.get_save_location(),
221 |           );
222 |           let res = importer.process(settings.get_all_tables_volatile());
223 |           if res.is_ok() {
224 |             let _ = whiskey.put(
225 |               format!("dump_processed_{}", dump.dump_id).as_bytes(),
226 |               b"successful",
227 |             );
228 |             return Ok(());
229 |           } else {
230 |             let _ = whiskey.put(
231 |               format!("dump_processed_{}", dump.dump_id).as_bytes(),
232 |               b"failure",
233 |             );
234 |             return Err(());
235 |           }
236 |         }
237 |       }
238 | 
239 |       Err(())
240 |     })
241 |     .collect();
242 | 
243 |   let _ = whiskey.put(
244 |     "last_version_processed".as_bytes(),
245 |     latest_schema.version.as_bytes()
246 |   );
247 | 
248 |   info!("Done!");
249 | }
250 | 


--------------------------------------------------------------------------------
/src/mysql_pool/mod.rs:
--------------------------------------------------------------------------------
1 | pub mod pool;
2 | pub use self::pool::{CreateManager, MysqlConnectionManager};
3 | 


--------------------------------------------------------------------------------
/src/mysql_pool/pool.rs:
--------------------------------------------------------------------------------
 1 | use mysql::error::Error as MysqlError;
 2 | use mysql::Conn as MysqlBaseConn;
 3 | use mysql::Opts as MysqlOpts;
 4 | use mysql::OptsBuilder as MysqlOptsBuilder;
 5 | use r2d2::ManageConnection as R2D2ManageConnection;
 6 | 
 7 | #[derive(Clone, Debug)]
 8 | pub struct MysqlConnectionManager {
 9 |   params: MysqlOpts,
10 | }
11 | 
12 | pub trait CreateManager<T> {
13 |   type Manager;
14 | 
15 |   fn new(params: T) -> Result<Self::Manager, MysqlError>;
16 | }
17 | 
18 | impl CreateManager<MysqlOptsBuilder> for MysqlConnectionManager {
19 |   type Manager = MysqlConnectionManager;
20 | 
21 |   fn new(params: MysqlOptsBuilder) -> Result<Self::Manager, MysqlError> {
22 |     Ok(MysqlConnectionManager { params: MysqlOpts::from(params) })
23 |   }
24 | }
25 | 
26 | impl<'a> CreateManager<&'a str> for MysqlConnectionManager {
27 |   type Manager = MysqlConnectionManager;
28 | 
29 |   fn new(params: &'a str) -> Result<Self::Manager, MysqlError> {
30 |     Ok(MysqlConnectionManager { params: MysqlOpts::from(params) })
31 |   }
32 | }
33 | 
34 | impl R2D2ManageConnection for MysqlConnectionManager {
35 |   type Connection = MysqlBaseConn;
36 |   type Error = MysqlError;
37 | 
38 |   fn connect(&self) -> Result<MysqlBaseConn, MysqlError> {
39 |     MysqlBaseConn::new(self.params.clone())
40 |   }
41 | 
42 |   fn is_valid(&self, conn: &mut MysqlBaseConn) -> Result<(), MysqlError> {
43 |     conn.query("SELECT 1;").map(|_| ())
44 |   }
45 | 
46 |   fn has_broken(&self, conn: &mut MysqlBaseConn) -> bool {
47 |     self.is_valid(conn).is_err()
48 |   }
49 | }
50 | 


--------------------------------------------------------------------------------
/src/settings.rs:
--------------------------------------------------------------------------------
  1 | //! Provides the `Settings` Struct for the rest of the crate in order to get
  2 | //! configuration values from the environment, or one of several files.
  3 | 
  4 | use config::{Config, File, Environment};
  5 | 
  6 | /// An Enum of all possible database types.
  7 | ///
  8 | /// Contains a list of all possible database types that the loader supports.
  9 | #[derive(Clone, Debug, Deserialize, Eq, PartialEq)]
 10 | pub enum DatabaseType {
 11 |   /// A type for postgres-like databases.
 12 |   Psql,
 13 |   /// A type for mysql-like databases.
 14 |   Mysql,
 15 | }
 16 | 
 17 | /// The Database Configuration object.
 18 | ///
 19 | /// Handles all database configuration values, which in this case is just the connection URL.
 20 | #[derive(Debug, Deserialize)]
 21 | struct Database {
 22 |   /// The connection URL for the Database.
 23 |   pub url: String,
 24 |   /// The Type of The Database.
 25 |   pub db_type: String,
 26 | }
 27 | 
 28 | /// The Canvas Data API Auth Configuration object.
 29 | ///
 30 | /// Handles all the configuration values for the Canvas Data API. In this case just the
 31 | /// api key + api secrete for Canvas Data.
 32 | #[derive(Debug, Deserialize)]
 33 | struct Canvasdataauth {
 34 |   /// The API Key for Canvas Data.
 35 |   pub api_key: String,
 36 |   /// The API Secret for Canvas Data.
 37 |   pub api_secret: String,
 38 | }
 39 | 
 40 | /// The Global Settings object for all configuration values.
 41 | #[derive(Debug, Deserialize)]
 42 | pub struct Settings {
 43 |   /// The database configuration object.
 44 |   database: Database,
 45 |   /// The Canvas Data API Auth Configuration Object.
 46 |   canvasdataauth: Canvasdataauth,
 47 |   /// The place to save files.
 48 |   save_location: String,
 49 |   /// The place to store the Rocks DB Database.
 50 |   rocksdb_location: String,
 51 |   /// Whether or not to skip historical imports.
 52 |   skip_historical_imports: bool,
 53 |   /// Only attempts to load the latest import.
 54 |   only_load_final: Option<bool>,
 55 |   /// Treats all tables as volatile.
 56 |   all_tables_volatile: Option<bool>,
 57 | }
 58 | 
 59 | impl Settings {
 60 |   /// Creates a new settings object.
 61 |   pub fn new() -> Self {
 62 |     let mut base_configuration = Config::new();
 63 |     base_configuration
 64 |       .merge(File::with_name("config/default"))
 65 |       .expect("Could not find default configuration file");
 66 | 
 67 |     base_configuration
 68 |       .merge(File::with_name("config/local").required(false))
 69 |       .expect("Transient error getting local configuration.");
 70 | 
 71 |     let mut env = Environment::with_prefix("cdl");
 72 |     env = env.separator("__");
 73 |     base_configuration
 74 |       .merge(env)
 75 |       .expect("Transient error getting environment variables");
 76 | 
 77 |     base_configuration.try_into().expect(
 78 |       "Failed to create base configuration",
 79 |     )
 80 |   }
 81 | 
 82 |   /// Gets the save location provided by the settings.
 83 |   pub fn get_save_location(&self) -> String {
 84 |     self.save_location.clone()
 85 |   }
 86 | 
 87 |   /// Gets the rocksdb location provided by the settings.
 88 |   pub fn get_rocksdb_location(&self) -> String {
 89 |     self.rocksdb_location.clone()
 90 |   }
 91 | 
 92 |   /// Gets the notion of whether or not to skip historical imports from the settings.
 93 |   pub fn get_should_skip_historical_imports(&self) -> bool {
 94 |     self.skip_historical_imports
 95 |   }
 96 | 
 97 |   /// Gets the notion of whether or not to only load the final import.
 98 |   pub fn get_should_only_load_final(&self) -> bool {
 99 |     self.only_load_final.unwrap_or(false)
100 |   }
101 | 
102 |   /// Gets the notion of whether or not to treat all tables as volatile.
103 |   pub fn get_all_tables_volatile(&self) -> bool {
104 |     self.all_tables_volatile.unwrap_or(false)
105 |   }
106 | 
107 |   /// Gets the database url provided by the settings.
108 |   pub fn get_database_url(&self) -> String {
109 |     self.database.url.clone()
110 |   }
111 | 
112 |   /// Gets the database type provided by the settings.
113 |   pub fn get_database_type(&self) -> DatabaseType {
114 |     match self.database.db_type.to_lowercase().as_str() {
115 |       "mysql" => DatabaseType::Mysql,
116 |       _ => DatabaseType::Psql,
117 |     }
118 |   }
119 | 
120 |   /// Gets the Canvas Data API Key provided by the settings.
121 |   pub fn get_canvas_data_api_key(&self) -> String {
122 |     self.canvasdataauth.api_key.clone()
123 |   }
124 | 
125 |   /// Gets the Canvas Data API Secret provided by the settings.
126 |   pub fn get_canvas_data_api_secret(&self) -> String {
127 |     self.canvasdataauth.api_secret.clone()
128 |   }
129 | }
130 | 


--------------------------------------------------------------------------------
/src/type_converter.rs:
--------------------------------------------------------------------------------
  1 | //! Managed the type converter for Rust
  2 | 
  3 | use errors::*;
  4 | use settings::DatabaseType;
  5 | 
  6 | /// Converts a type from a name to a FRD Database Type.
  7 | ///
  8 | /// Takes a type from the Canvas Data Schema API, and turns it into the name of the type
  9 | /// for the passed in database.
 10 | ///
 11 | /// * `orig_type` - The Type passed in from the Canvas Data API.
 12 | /// * `db_type` - The Database type to convert into.
 13 | pub fn convert_type_for_db(orig_type: String, db_type: DatabaseType) -> Result<String> {
 14 |   match orig_type.as_str() {
 15 |     "bigint" => Ok("BIGINT".to_owned()),
 16 |     "boolean" => {
 17 |       match db_type {
 18 |         DatabaseType::Psql => Ok("BOOLEAN".to_owned()),
 19 |         DatabaseType::Mysql => Ok("VARCHAR(10)".to_owned()),
 20 |       }
 21 |     }
 22 |     "double precision" => {
 23 |       match db_type {
 24 |         DatabaseType::Psql => Ok("double precision".to_owned()),
 25 |         DatabaseType::Mysql => Ok("FLOAT(17)".to_owned()),
 26 |       }
 27 |     }
 28 |     "enum" => Ok("TEXT".to_owned()),
 29 |     "int" => Ok("INT".to_owned()),
 30 |     "integer" => Ok("INT".to_owned()),
 31 |     "text" => {
 32 |       match db_type {
 33 |         DatabaseType::Psql => Ok("TEXT".to_owned()),
 34 |         DatabaseType::Mysql => Ok("LONGTEXT".to_owned()),
 35 |       }
 36 |     }
 37 |     "timestamp" => {
 38 |       match db_type {
 39 |         DatabaseType::Psql => Ok("TIMESTAMP".to_owned()),
 40 |         DatabaseType::Mysql => Ok("DATETIME".to_owned()),
 41 |       }
 42 |     }
 43 |     "date" => Ok("DATE".to_owned()),
 44 |     "varchar" => {
 45 |       match db_type {
 46 |         DatabaseType::Psql => Ok("TEXT".to_owned()),
 47 |         DatabaseType::Mysql => Ok("LONGTEXT".to_owned()),
 48 |       }
 49 |     }
 50 |     "guid" => {
 51 |       match db_type {
 52 |         DatabaseType::Psql => Ok("TEXT".to_owned()),
 53 |         DatabaseType::Mysql => Ok("LONGTEXT".to_owned()),
 54 |       }
 55 |     }
 56 |     "datetime" => {
 57 |       match db_type {
 58 |         DatabaseType::Psql => Ok("TIMESTAMP".to_owned()),
 59 |         DatabaseType::Mysql => Ok("DATETIME".to_owned()),
 60 |       }
 61 |     }
 62 |     some_random_value => Err(
 63 |       ErrorKind::InvalidTypeToConvert(some_random_value.to_owned()).into(),
 64 |     ),
 65 |   }
 66 | }
 67 | 
 68 | /// Converts a Database Type into a Cast type.
 69 | ///
 70 | /// Databases can't auto cast strings as other types. So we need to sometimes manually specify
 71 | /// "hey cast this string to another type". This function takes in a type of database (postgres, etc)
 72 | /// and the type of the column, and turns into a cast type, or an empty string.
 73 | ///
 74 | /// * `orig_type` - The type of the column in the database.
 75 | /// * `db_type` - The Type of the Database.
 76 | pub fn get_cast_as(orig_type: String, db_type: DatabaseType) -> String {
 77 |   match db_type {
 78 |     DatabaseType::Psql => {
 79 |       match orig_type.to_lowercase().as_str() {
 80 |         "bigint" => "int8".to_owned(),
 81 |         "boolean" => "boolean".to_owned(),
 82 |         "double precision" => "double precision".to_owned(),
 83 |         "int" => "int".to_owned(),
 84 |         "timestamp" => "timestamp".to_owned(),
 85 |         _ => "".to_owned(),
 86 |       }
 87 |     }
 88 |     DatabaseType::Mysql => {
 89 |       match orig_type.to_lowercase().as_str() {
 90 |         "bigint" => "SIGNED".to_owned(),
 91 |         "int" => "SIGNED".to_owned(),
 92 |         "float(17)" => "DECIMAL(34, 17)".to_owned(),
 93 |         "datetime" => "DATETIME".to_owned(),
 94 |         "date" => "DATE".to_owned(),
 95 |         _ => "".to_owned(),
 96 |       }
 97 |     }
 98 |   }
 99 | }
100 | 


--------------------------------------------------------------------------------