├── .dockerignore ├── .github ├── ISSUE_TEMPLATE.md └── PULL_REQUEST_TEMPLATE.md ├── .gitignore ├── CHANGELOG.md ├── Cargo.toml ├── Dockerfile ├── LICENSE ├── README.md ├── appveyor.yml ├── circle.yml ├── config └── default.toml ├── rustfmt.toml └── src ├── api_client.rs ├── db_client.rs ├── errors.rs ├── importer.rs ├── main.rs ├── mysql_pool ├── mod.rs └── pool.rs ├── settings.rs └── type_converter.rs /.dockerignore: -------------------------------------------------------------------------------- 1 | target/ 2 | **/*.rs.bk 3 | config/local.toml 4 | Cargo.lock -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | Please read the following carefully before opening a new issue. 2 | Your issue may be closed if it does not provide the information required by this template. 3 | 4 | We use GitHub Issues, as well as our help email at canvasdatahelp@instructure.com for tracking issues. Although it should be noted this is not officially supported by Instructure, the Canvas Data support team will ensure it is in a working state. 5 | 6 | - If you have a question about how to use the CLI; please ask your CSM, or email canvasdatahelp@instructure.com 7 | - If you have a feature request, that should be posted on the community site: [HERE][community_link] 8 | 9 | - Make sure your issue reproduces on the latest version! 10 | 11 | --- Delete everything above this line --- 12 | 13 | ### Description ### 14 | 15 | Explain what you did, what you expected to happen, and what actually happens. 16 | 17 | ### Additional Information ### 18 | 19 | * Rust Version [FILL THIS OUT: Can be grabbed with: `rustc --version` on your CLI.] 20 | * Platform: [FILL THIS OUT: Windows, Mac, or Linux? Which Version?] 21 | * Logs: (If you can please run the CLI with: `RUST_LOG=trace` at the beginning and provide us the debug logs.) 22 | 23 | [community_link]: https://community.canvaslms.com/community/answers/data -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | Thanks for submitting a PR! We want to make contributing to the Canvas Data Loader as easy as possible. 2 | Please read these instructions carefully: 3 | 4 | - [ ] Explain the **motivation** for making this change. 5 | - [ ] Provide a **test plan** demonstrating that the code is solid. 6 | - [ ] Match the **code formatting** of the rest of the codebase. 7 | - [ ] Make sure to **add tests** to help keep code coverage up. 8 | 9 | ## Motivation (required) ## 10 | 11 | What existing problem does the pull request solve? 12 | 13 | ## Test Plan (required) ## 14 | 15 | A good test plan has the exact commands you ran and their output. 16 | 17 | If you have added code that should be tested, add tests. 18 | 19 | ## Next Steps ## 20 | 21 | - Small pull requests are much easier to review and more likely to get merged. Make sure the PR does only one thing, otherwise please split it. 22 | - Make sure all **tests pass**, we will run automated tests, but you can run it yourself by running `cargo test`. -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | target/ 2 | **/*.rs.bk 3 | config/local.toml 4 | Cargo.lock -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## 0.3.0 2 | 3 | * Add Option to only import latest files. 4 | * Update Dependencies. 5 | * Add Comments. 6 | 7 | ## 0.2.0 8 | 9 | * Support for MySQL Databases. 10 | 11 | ## 0.1.0 12 | 13 | * Initial Release of Canvas Data Loader. 14 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "cdl-runner" 3 | version = "0.4.0" 4 | authors = [ 5 | "Canvas Data Support Team ", 6 | "Instructure Engineering ", 7 | "Eric Coan " 8 | ] 9 | 10 | [features] 11 | default = [ "postgres_compat", "mysql_compat" ] 12 | postgres_compat = [ "postgres", "r2d2_postgres" ] 13 | mysql_compat = [ "mysql" ] 14 | 15 | [dependencies] 16 | base64 = "^0.9" 17 | chrono = { version = "^0.4", features = [ "serde" ] } 18 | config = "^0.9" 19 | error-chain = "^0.12" 20 | env_logger = "^0.5" 21 | flate2 = { version = "^1.0", features = ["zlib"], default-features = false } 22 | futures = "^0.1" 23 | log = "^0.4" 24 | glob = "^0.2" 25 | lazy_static = "^1.1" 26 | r2d2 = "^0.8" 27 | rayon = "^1.0.2" 28 | regex = "^1.0" 29 | reqwest = "^0.9" 30 | ring = "^0.13" 31 | rocksdb = "^0.10" 32 | serde = "^1.0" 33 | serde_derive = "^1.0" 34 | serde_json = "^1.0" 35 | tokio-core = "^0.1" 36 | 37 | # Postgres Deps 38 | postgres = { version = "^0.15", optional = true } 39 | r2d2_postgres = { version = "^0.14", optional = true } 40 | 41 | # Mysql Deps 42 | mysql = { version = "^14", optional = true, features = ["ssl"] } 43 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM rust:latest as cdl-build 2 | 3 | RUN apt-get update && apt-get -y --no-install-recommends install clang && apt-get clean && rm -rf /var/lib/apt/lists/* 4 | 5 | # Cache dependencies for faster builds 6 | RUN cargo install cargo-build-deps 7 | RUN cd /tmp && USER=root cargo new --bin canvas-data-loader 8 | WORKDIR /tmp/canvas-data-loader 9 | COPY Cargo.toml ./ 10 | RUN cargo-build-deps --release 11 | 12 | # Copy in our source and build it 13 | COPY src /tmp/canvas-data-loader/src 14 | RUN cargo build --release 15 | 16 | # Start a new build from a minimal image that we can copy the binary into 17 | FROM debian:stretch-slim 18 | 19 | RUN apt-get update && apt-get -y --no-install-recommends install libssl1.1 ca-certificates && apt-get clean && rm -rf /var/lib/apt/lists/* 20 | COPY --from=cdl-build /tmp/canvas-data-loader/target/release/cdl-runner . 21 | COPY ./config ./config 22 | 23 | ENV RUST_LOG info 24 | CMD ./cdl-runner 25 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License 2 | 3 | Copyright (c) 2017 Instructure 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Canvas Data Loader # 2 | 3 | This is the source code for the Canvas Data Loader. The Canvas Data Loader is an example application 4 | that downloads your data, and imports it into a Database. The process is completely automated, and is 5 | able to handle things like Historical Refreshes, Schema Changes, and the 24-36 hour variance all without 6 | issue. 7 | 8 | It should be noted although there are better options out there, there isn't a reason why you couldn't use 9 | the loader to handle all of your imports everyday. The Canvas Data Loader could for example handle your 10 | imports at first, and then later off be handed to a more stable process. 11 | 12 | ## Support ## 13 | 14 | Although this is under the Instructure Repo this is purely an example application, and as such is not fully supported by Instructure. 15 | 16 | However, the Canvas Data Support team is happy to field requests about usage in the standard canvasdatahelp@instructure.com email. 17 | 18 | ## How Do I Use It? ## 19 | 20 | The following instructions are for a linux server, but steps 1-5 should work universally. 21 | You'll just need to use your systems way of scheduling a repeating task instead of crons if you 22 | are not using linux. 23 | 24 | * Clone this repository. 25 | * Copy the default configuration, and modify it to your needs: 26 | * `cp ./config/default.toml ./config/local.toml` 27 | * `my_text_editor ./config/local.toml` 28 | * Choose a home for the importer, and copy this repository there. 29 | * [Install Rust](https://www.rust-lang.org/en-US/install.html) 30 | * Build a release version: `cargo build --release`. 31 | * Setup a crontab to run the importer every hour: 32 | * `crontab -e` 33 | * Enter on it's own line, replacing the path to your importer: `0 * * * * cd && RUST_LOG=info ./target/release/cdl-runner > /var/log/cdl-log 2>&1` 34 | * Tadah! 35 | 36 | ### Configuration Using Environment Variables 37 | 38 | Configuration can also be done using environment variables instead of, or in addition to the `./config/local.toml` file. For example, you may wish to use environment variables for the API key/secret and use the file for the remaining configuration. 39 | 40 | Example: 41 | 42 | `export cdl__canvasdataauth__api_key=abcdefg123456` 43 | `export cdl__canvasdataauth__api_secret=123456abcdefg` 44 | 45 | Possible environment variables: 46 | 47 | - `cdl__canvasdataauth__api_key` 48 | - `cdl__canvasdataauth__api_secret` 49 | - `cdl__database__db_type` 50 | - `cdl__database__url` 51 | - `cdl__only_load_final` 52 | - `cdl__rocksdb_location` 53 | - `cdl__save_location` 54 | - `cdl__skip_historical_imports` 55 | 56 | ## License ## 57 | 58 | The Canvas Data Loader is Licensed under MIT. 59 | -------------------------------------------------------------------------------- /appveyor.yml: -------------------------------------------------------------------------------- 1 | # Appveyor configuration template for Rust using rustup for Rust installation 2 | # https://github.com/starkat99/appveyor-rust 3 | 4 | ## Operating System (VM environment) ## 5 | 6 | # Rust needs at least Visual Studio 2013 Appveyor OS for MSVC targets. 7 | os: Visual Studio 2015 8 | 9 | ## Build Matrix ## 10 | environment: 11 | matrix: 12 | 13 | ### MSVC Toolchains ### 14 | 15 | # Stable 64-bit MSVC 16 | - channel: stable 17 | target: x86_64-pc-windows-msvc 18 | # Stable 32-bit MSVC 19 | - channel: stable 20 | target: i686-pc-windows-msvc 21 | 22 | ### GNU Toolchains ### 23 | 24 | # Stable 64-bit GNU 25 | - channel: stable 26 | target: x86_64-pc-windows-gnu 27 | # Stable 32-bit GNU 28 | - channel: stable 29 | target: i686-pc-windows-gnu 30 | 31 | ## Install Script ## 32 | install: 33 | - appveyor DownloadFile https://win.rustup.rs/ -FileName rustup-init.exe 34 | - rustup-init -yv --default-toolchain %channel% --default-host %target% 35 | - set PATH=%PATH%;%USERPROFILE%\.cargo\bin 36 | - rustc -vV 37 | - cargo -vV 38 | 39 | ## Build Script ## 40 | build: false 41 | test_script: 42 | - cargo build --verbose %cargoflags% -------------------------------------------------------------------------------- /circle.yml: -------------------------------------------------------------------------------- 1 | --- 2 | version: 2 3 | executorType: docker 4 | containerInfo: 5 | - image: jimmycuadra/rust:latest 6 | stages: 7 | build: 8 | workDir: /source 9 | steps: 10 | - type: shell 11 | shell: /bin/bash 12 | command: apt-get update 13 | - type: shell 14 | shell: /bin/bash 15 | command: apt-get install openssh-client -y 16 | - type: checkout 17 | - type: shell 18 | shell: /bin/bash 19 | command: cargo build 20 | -------------------------------------------------------------------------------- /config/default.toml: -------------------------------------------------------------------------------- 1 | # This determines the log level 2 | save_location = "/tmp/cdl-save" 3 | rocksdb_location = "/tmp/cdl-rocksdb" 4 | skip_historical_imports = true 5 | # uncomment this line below, to only import the latest dump: Note you may miss dumps on 6 | # days where we do historical refreshes. It's good for a first initial import. 7 | # only_load_final = true 8 | 9 | # uncomment this line below to value speed of imports over, availability of data. 10 | # specifically the CDL will drop tables, and reimport them from scratch everytime 11 | # no matter the table. Instead of doing targeted DELETE/INSERTs. 12 | # all_tables_volatile = true 13 | 14 | [canvasdataauth] 15 | api_key = "FILL_ME_OUT" 16 | api_secret = "FILL_ME_OUT" 17 | 18 | [database] 19 | url = "postgres://localhost/canvas_data_loader" 20 | # Valid Values are Psql, Mysql 21 | db_type = "Psql" 22 | -------------------------------------------------------------------------------- /rustfmt.toml: -------------------------------------------------------------------------------- 1 | tab_spaces = 2 2 | max_width = 120 -------------------------------------------------------------------------------- /src/api_client.rs: -------------------------------------------------------------------------------- 1 | //! Provides an API Client for the Canvas Data API. 2 | 3 | use base64::encode as B64Encode; 4 | use chrono::prelude::*; 5 | use errors::*; 6 | use rayon::prelude::*; 7 | use regex::Regex; 8 | use reqwest::{Client as HttpClient, Method, Request}; 9 | use reqwest::header::HeaderValue; 10 | use ring::{digest, hmac}; 11 | use settings::Settings; 12 | use std::collections::BTreeMap; 13 | use std::fs::{self, File}; 14 | use std::io; 15 | use std::path::Path; 16 | 17 | lazy_static! { 18 | static ref REQREG: Regex = Regex::new(r"^requests.*?$").expect("Invalid Static Requests Regex"); 19 | } 20 | 21 | /// The API Client for Canvas Data. 22 | #[derive(Clone)] 23 | pub struct CanvasDataApiClient { 24 | /// The API Key to use for Canvas Data. 25 | api_key: String, 26 | /// The API Secret to use for Canvas Data. 27 | api_secret: String, 28 | /// The place to save files. 29 | save_location: String, 30 | /// The Reqwest Client, 31 | client: HttpClient, 32 | } 33 | 34 | impl CanvasDataApiClient { 35 | /// Creates a new Canvas Data API Client. 36 | /// 37 | /// Creates a Canvas Data API Client that talks to the core portal.inshosteddata.com. 38 | /// 39 | /// * `settings` - The settings to use for this API Client. 40 | pub fn new(settings: &Settings) -> Self { 41 | CanvasDataApiClient { 42 | api_key: settings.get_canvas_data_api_key(), 43 | api_secret: settings.get_canvas_data_api_secret(), 44 | save_location: settings.get_save_location(), 45 | client: HttpClient::new(), 46 | } 47 | } 48 | 49 | /// Computes the authorization header. 50 | /// 51 | /// Computes the authorization header needed for authenticating to the Canvas Data API. 52 | /// 53 | /// * `http_method` - The HTTP Method you're using. 54 | /// * `host` - The Host Header you're using. 55 | /// * `content_type` - The Content Type you're using. 56 | /// * `content_md5` - The Content MD5 Header you're sending. 57 | /// * `path` - The path of your request. 58 | /// * `query_params` - The query parameters of your request. 59 | /// * `date_header` - The Date Header you're using. 60 | pub fn compute_auth_header( 61 | &self, 62 | http_method: &str, 63 | host: &str, 64 | content_type: &str, 65 | content_md5: &str, 66 | path: &str, 67 | query_params: &str, 68 | date_header: &str, 69 | ) -> String { 70 | 71 | let pre_sign = 72 | format!( 73 | "{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}", 74 | http_method, 75 | host, 76 | content_type, 77 | content_md5, 78 | path, 79 | query_params, 80 | date_header, 81 | self.api_secret, 82 | ); 83 | debug!("Compute Auth Header was passed: {:?}", pre_sign); 84 | 85 | let signing_key = hmac::SigningKey::new(&digest::SHA256, self.api_secret.clone().as_bytes()); 86 | let output = hmac::sign(&signing_key, pre_sign.as_bytes()); 87 | let encoded_val = B64Encode(&output); 88 | format!("HMACAuth {}:{}", self.api_key, encoded_val) 89 | } 90 | 91 | /// Gets the current date. 92 | /// 93 | /// Gets the current date in the format needed for compute_auth_header. 94 | pub fn get_current_date(&self) -> String { 95 | Utc::now().format("%Y-%m-%dT%H:%M:%S%.3fZ").to_string() 96 | } 97 | 98 | /// Determines if a dump is a historical refresh based on the files in dump response. 99 | /// 100 | /// * `resp` - The Files in dump response to check. 101 | pub fn is_historical_refresh(&self, resp: FilesInDumpResponse) -> bool { 102 | let mut has_found_all_requests_table = true; 103 | 'outer: for artifact in resp.artifacts_by_table.values() { 104 | for file in artifact.files.iter() { 105 | if !REQREG.is_match(&file.filename) { 106 | has_found_all_requests_table = false; 107 | break 'outer; 108 | } 109 | } 110 | } 111 | has_found_all_requests_table 112 | } 113 | 114 | /// Gets a current list of Dumps for your Canvas Data Instance. 115 | pub fn get_dumps(&self) -> Result> { 116 | trace!("Get Dumps was called."); 117 | 118 | let uri = try!("https://portal.inshosteddata.com/api/account/self/dump".parse()); 119 | let mut req: Request = Request::new(Method::GET, uri); 120 | let date_str = self.get_current_date(); 121 | req.headers_mut().insert("Date", HeaderValue::from_str(&date_str).expect("Couldn't turn string into header value!")); 122 | req.headers_mut().insert( 123 | "Content-Type", 124 | HeaderValue::from_static("application/json"), 125 | ); 126 | req.headers_mut().insert( 127 | "Authorization", 128 | HeaderValue::from_str(&self.compute_auth_header( 129 | "GET", 130 | "portal.inshosteddata.com", 131 | "application/json", 132 | "", 133 | "/api/account/self/dump", 134 | "", 135 | &date_str, 136 | )).expect("Couldn't turn string into header value!"), 137 | ); 138 | 139 | Ok(try!(self.client.execute(req).and_then(|mut res| { 140 | res.json() 141 | }).map_err(|e| { 142 | io::Error::new(io::ErrorKind::Other, e) 143 | }))) 144 | } 145 | 146 | /// Gets the latest schema. 147 | pub fn get_latest_schema(&self) -> Result { 148 | trace!("Get latest schema was called"); 149 | 150 | let uri = try!("https://portal.inshosteddata.com/api/schema/latest".parse()); 151 | let mut req: Request = Request::new(Method::GET, uri); 152 | let date_str = self.get_current_date(); 153 | req.headers_mut().insert("Date", HeaderValue::from_str(&date_str).expect("Failed to turn string into header value!")); 154 | req.headers_mut().insert( 155 | "Content-Type", 156 | HeaderValue::from_static("application/json"), 157 | ); 158 | req.headers_mut().insert( 159 | "Authorization", 160 | HeaderValue::from_str(&self.compute_auth_header( 161 | "GET", 162 | "portal.inshosteddata.com", 163 | "application/json", 164 | "", 165 | "/api/schema/latest", 166 | "", 167 | &date_str, 168 | )).expect("Failed to turn string into header value!"), 169 | ); 170 | 171 | Ok(try!(self.client.execute(req).and_then(|mut res| { 172 | res.json() 173 | }).map_err(|e| { 174 | io::Error::new(io::ErrorKind::Other, e) 175 | }))) 176 | } 177 | 178 | /// Gets the Table Definition for a Specific Table. 179 | /// 180 | /// * `table_name` - The Table name to get the definition for. 181 | pub fn get_table_definition(&self, table_name: String) -> Result> { 182 | trace!("get_table_definition was called for: [ {} ]", table_name); 183 | 184 | let uri = try!("https://portal.inshosteddata.com/api/schema/latest".parse()); 185 | let mut req: Request = Request::new(Method::GET, uri); 186 | let date_str = self.get_current_date(); 187 | req.headers_mut().insert("Date", HeaderValue::from_str(&date_str).expect("Failed to turn string into headervalue!")); 188 | req.headers_mut().insert( 189 | "Content-Type", 190 | HeaderValue::from_static("application/json"), 191 | ); 192 | req.headers_mut().insert( 193 | "Authorization", 194 | HeaderValue::from_str(&self.compute_auth_header( 195 | "GET", 196 | "portal.inshosteddata.com", 197 | "application/json", 198 | "", 199 | "/api/schema/latest", 200 | "", 201 | &date_str, 202 | )).expect("Failed to turn string into headervalue!"), 203 | ); 204 | 205 | Ok(try!(self.client.execute(req).and_then(|mut res| { 206 | res.json() 207 | }).map(|res: SchemaDefinition| { 208 | let mut ret = None; 209 | 210 | for table_def in res.schema.values() { 211 | if table_def.table_name.to_lowercase() == table_name { 212 | ret = Some(table_def.clone()); 213 | break; 214 | } 215 | } 216 | 217 | ret 218 | }).map_err(|e| { 219 | io::Error::new(io::ErrorKind::Other, e) 220 | }))) 221 | } 222 | 223 | /// Gets the list of files for a specific dump. 224 | /// 225 | /// * `dump_id` - The Dump ID to grab the list of files for. 226 | pub fn get_files_for_dump(&self, dump_id: String) -> Result { 227 | trace!( 228 | "Get files for dump was called with dump id: [ {} ]", 229 | dump_id 230 | ); 231 | 232 | let path = format!("/api/account/self/file/byDump/{}", dump_id); 233 | let uri = try!(format!("https://portal.inshosteddata.com{}", &path).parse()); 234 | let mut req: Request = Request::new(Method::GET, uri); 235 | let date_str = self.get_current_date(); 236 | req.headers_mut().insert("Date", HeaderValue::from_str(&date_str).expect("Failed to turn string into header value!")); 237 | req.headers_mut().insert( 238 | "Content-Type", 239 | HeaderValue::from_static("application/json"), 240 | ); 241 | req.headers_mut().insert( 242 | "Authorization", 243 | HeaderValue::from_str(&self.compute_auth_header( 244 | "GET", 245 | "portal.inshosteddata.com", 246 | "application/json", 247 | "", 248 | &path, 249 | "", 250 | &date_str, 251 | )).expect("Failed to turn string into headervalue!"), 252 | ); 253 | 254 | Ok(try!(self.client.execute(req).and_then(|mut res| { 255 | res.json() 256 | }).map_err(|e| { 257 | io::Error::new(io::ErrorKind::Other, e) 258 | }))) 259 | } 260 | 261 | /// Download all files for a specific dump. 262 | /// 263 | /// * `dump_id` - The Dump ID of the files to download. 264 | pub fn download_files_for_dump(&self, dump_id: String) -> Result<()> { 265 | trace!( 266 | "Download files for dump was called with dump id: [ {} ]", 267 | dump_id 268 | ); 269 | let save_location = format!("{}/{}", self.save_location, &dump_id); 270 | try!(fs::create_dir_all(save_location.clone())); 271 | let files_in_dump = try!(self.get_files_for_dump(dump_id.clone())); 272 | 273 | files_in_dump.artifacts_by_table.par_iter().map(move |(_, table_artifact)| { 274 | for file_to_download in table_artifact.files.iter().cloned() { 275 | let finalized_to_download_path = format!("{}/{}", &save_location, &file_to_download.filename); 276 | let cloned_download_path = finalized_to_download_path.clone(); 277 | let path = Path::new(&finalized_to_download_path); 278 | if path.exists() { 279 | debug!( 280 | "{:?} exists, skipping entire artifact", 281 | cloned_download_path 282 | ); 283 | // Assume the entire artifact is downloaded. 284 | continue; 285 | } else { 286 | debug!( 287 | "{:?} does not exist, downloading files", 288 | cloned_download_path 289 | ); 290 | let uri = file_to_download.url.parse().expect("Failed to parse file url form hosted-data!"); 291 | let req = Request::new(Method::GET, uri); 292 | self.client.execute(req).map(move |mut res| { 293 | let download_path = cloned_download_path; 294 | let mut file = File::create(Path::new(&download_path)).expect("Failed to create download file!"); 295 | 296 | res.copy_to(&mut file).expect("Failed to copy to file!") 297 | }).expect("Failed to download table!"); 298 | } 299 | } 300 | }).count(); 301 | 302 | trace!("Done Downloading Files for: {}", dump_id); 303 | 304 | Ok(()) 305 | } 306 | } 307 | 308 | /// Represents a Dump returned from the list dumps endpoint. 309 | #[derive(Clone, Debug, Deserialize)] 310 | pub struct DumpInList { 311 | /// The ID of this particular Dumpm. 312 | #[serde(rename = "dumpId")] 313 | pub dump_id: String, 314 | /// The Sequence number of this dump. 315 | pub sequence: i64, 316 | /// The Account ID this dump is for. 317 | #[serde(rename = "accountId")] 318 | pub account_id: String, 319 | /// The Number of Files this dump is reporting. 320 | #[serde(rename = "numFiles")] 321 | pub num_files: i64, 322 | /// If this dump is finished. 323 | pub finished: bool, 324 | /// When this dump is set to expire. 325 | pub expires: i64, 326 | /// When this dump was last updated. 327 | #[serde(rename = "updatedAt")] 328 | pub updated_at: DateTime, 329 | /// When this dump was created. 330 | #[serde(rename = "createdAt")] 331 | pub created_at: DateTime, 332 | /// The Schema Version this dump is using. 333 | #[serde(rename = "schemaVersion")] 334 | pub schema_version: String, 335 | } 336 | unsafe impl Send for DumpInList {} 337 | 338 | /// The list of files returned from a file in dump response. 339 | #[derive(Clone, Debug, Deserialize)] 340 | pub struct FilesInDumpResponse { 341 | /// The Account ID these files are for. 342 | #[serde(rename = "accountId")] 343 | pub account_id: String, 344 | /// When these files expire. 345 | pub expires: i64, 346 | /// The sequence of the dump these files are apart of. 347 | pub sequence: i64, 348 | /// When these files were last updated. 349 | #[serde(rename = "updatedAt")] 350 | pub updated_at: DateTime, 351 | /// The schema version these files are at. 352 | #[serde(rename = "schemaVersion")] 353 | pub schema_version: String, 354 | /// The number of files that exist. 355 | #[serde(rename = "numFiles")] 356 | pub num_files: i64, 357 | /// When the dump was created these files are apart of. 358 | #[serde(rename = "createdAt")] 359 | pub created_at: DateTime, 360 | /// The Dump ID these files are related to. 361 | #[serde(rename = "dumpId")] 362 | pub dump_id: String, 363 | /// Whether the dump is finished or not. 364 | pub finished: bool, 365 | /// A list of the "artifacts" or files per table. 366 | #[serde(rename = "artifactsByTable")] 367 | pub artifacts_by_table: BTreeMap, 368 | } 369 | unsafe impl Send for FilesInDumpResponse {} 370 | 371 | /// A list of artifacts per table. 372 | #[derive(Clone, Debug, Deserialize)] 373 | pub struct ArtifactByTable { 374 | /// The table name these artifacts are apart of. 375 | #[serde(rename = "tableName")] 376 | pub table_name: String, 377 | /// Whether or not this is a partial table. 378 | pub partial: bool, 379 | /// A List of files for this table. 380 | pub files: Vec, 381 | } 382 | unsafe impl Send for ArtifactByTable {} 383 | 384 | /// A File object returned in ArtifactsByTable. 385 | #[derive(Clone, Debug, Deserialize)] 386 | pub struct BasicFile { 387 | /// The URL for this file to download from. 388 | pub url: String, 389 | /// The filename of this file. 390 | pub filename: String, 391 | } 392 | unsafe impl Send for BasicFile {} 393 | 394 | /// The Schema Definition returned by Canvas Data. 395 | #[derive(Clone, Debug, Deserialize)] 396 | pub struct SchemaDefinition { 397 | /// The Version of the schema. 398 | pub version: String, 399 | /// The Actual Schema Object itself. 400 | pub schema: BTreeMap, 401 | } 402 | unsafe impl Send for SchemaDefinition {} 403 | 404 | /// A Definition for a Table returned by the Schema API. 405 | #[derive(Clone, Debug, Deserialize)] 406 | pub struct TableDefinition { 407 | /// The DW Type (dimension, or fact). 408 | pub dw_type: String, 409 | /// An optional Description of the table. 410 | pub description: Option, 411 | /// Any hints about how a table, almost always empty, may occasionally provide a sort key, or something of the like. 412 | pub hints: BTreeMap, 413 | /// Whther this table is incremental. 414 | pub incremental: bool, 415 | /// The table name of this table, 416 | #[serde(rename = "tableName")] 417 | pub table_name: String, 418 | /// A List of it's columns. 419 | pub columns: Vec, 420 | } 421 | unsafe impl Send for TableDefinition {} 422 | 423 | /// A Definition for a Column returned by the Schema API. 424 | #[derive(Clone, Debug, Deserialize)] 425 | pub struct ColumnDefinition { 426 | /// The Type this column is. 427 | #[serde(rename = "type")] 428 | pub db_type: String, 429 | /// An optional description of this column. 430 | pub description: Option, 431 | /// The name of this column/ 432 | pub name: String, 433 | /// An optional length to apply to this column. 434 | pub length: Option, 435 | /// Optional information about the dimension. 436 | pub dimension: Option, 437 | } 438 | unsafe impl Send for ColumnDefinition {} 439 | 440 | /// Dimension information returned by the Schema API. 441 | #[derive(Clone, Debug, Deserialize)] 442 | pub struct DimensionDefinition { 443 | /// The name of this dimension. 444 | pub name: String, 445 | /// The ID of this dimension. 446 | pub id: String, 447 | /// An optional role to attach to this dimension. 448 | pub role: Option, 449 | } 450 | unsafe impl Send for DimensionDefinition {} 451 | -------------------------------------------------------------------------------- /src/db_client.rs: -------------------------------------------------------------------------------- 1 | //! Provides the Database Client for the CDL Runner. 2 | //! This will control all the connections/inserts/updates/etc. 3 | 4 | use errors::*; 5 | use r2d2::{ManageConnection, Pool}; 6 | use std::clone::Clone; 7 | use std::collections::BTreeMap; 8 | use settings::{DatabaseType, Settings}; 9 | use type_converter::get_cast_as; 10 | 11 | #[cfg(feature = "postgres_compat")] 12 | use r2d2_postgres::{TlsMode, PostgresConnectionManager}; 13 | 14 | #[cfg(feature = "mysql_compat")] 15 | use mysql_pool::{CreateManager, MysqlConnectionManager}; 16 | 17 | /// The Database Client Structure. 18 | pub struct DatabaseClient { 19 | /// The Type of the Database. 20 | pub db_type: DatabaseType, 21 | /// The Underlying Connection Pool. 22 | underlying_pool: Pool, 23 | } 24 | 25 | impl Clone for DatabaseClient { 26 | fn clone(&self) -> DatabaseClient { 27 | DatabaseClient { 28 | db_type: self.db_type.clone(), 29 | underlying_pool: self.underlying_pool.clone(), 30 | } 31 | } 32 | } 33 | 34 | /// Something the importer can use to talk to the database. 35 | pub trait ImportDatabaseAdapter { 36 | /// Gets the Database Type. 37 | fn get_db_type(&self) -> DatabaseType; 38 | 39 | /// Drops a Table in the Database. 40 | /// 41 | /// * `table_name` - The Table name to Drop. 42 | fn drop_table(&self, table_name: String) -> Result<()>; 43 | 44 | /// Creates a Table in the Database. 45 | /// 46 | /// * `table_name` - The Table name to Create. 47 | /// * `columns` - The column definition to create . 48 | fn create_table(&self, table_name: String, columns: BTreeMap) -> Result<()>; 49 | 50 | /// Drops a Record in the Database. 51 | /// 52 | /// * `table_name` - The Table Name to drop from. 53 | /// * `column_types` - The types of columns 54 | /// * `column_name` - The column name to use in the WHERE clause. 55 | /// * `value` - The columnv value to use in the WHERE clause. 56 | fn drop_record( 57 | &self, 58 | table_name: String, 59 | column_types: BTreeMap, 60 | column_name: String, 61 | value: String, 62 | ) -> Result<()>; 63 | 64 | /// Inserts a Record into the Database. 65 | /// 66 | /// * `table_name` - The table name to insert the record into. 67 | /// * `columns` - The columns to insert into the table . 68 | /// * `column_types` - The types of columns to use. 69 | fn insert_record( 70 | &self, 71 | table_name: String, 72 | column_types: BTreeMap, 73 | columns: BTreeMap>, 74 | ) -> Result<()>; 75 | } 76 | 77 | #[cfg(feature = "postgres_compat")] 78 | impl DatabaseClient { 79 | /// Creates a New Database Client for Postgres. 80 | /// 81 | /// `settings` - The underlying settings object to configure ourselves with. 82 | pub fn new(settings: &Settings) -> Result> { 83 | let manager = PostgresConnectionManager::new(settings.get_database_url(), TlsMode::None); 84 | if manager.is_err() { 85 | return Err(ErrorKind::PostgresErr.into()); 86 | } 87 | let manager = manager.unwrap(); 88 | let pool = Pool::new(manager).expect( 89 | "Failed to turn connection into pool. This should never happen", 90 | ); 91 | Ok(DatabaseClient:: { 92 | db_type: DatabaseType::Psql, 93 | underlying_pool: pool, 94 | }) 95 | } 96 | } 97 | 98 | #[cfg(feature = "mysql_compat")] 99 | impl DatabaseClient { 100 | /// Creates a New Database Client for Mysql. 101 | /// 102 | /// `settings` - The underlying settings object to configure ourselves with. 103 | pub fn new(settings: &Settings) -> Result> { 104 | let manager = MysqlConnectionManager::new(settings.get_database_url().as_str()); 105 | if manager.is_err() { 106 | return Err(ErrorKind::MysqlErr.into()); 107 | } 108 | let manager = manager.unwrap(); 109 | let pool = Pool::new(manager).expect( 110 | "Failed to turn a connection into pool. This should never happen", 111 | ); 112 | Ok(DatabaseClient:: { 113 | db_type: DatabaseType::Mysql, 114 | underlying_pool: pool, 115 | }) 116 | } 117 | } 118 | 119 | #[cfg(feature = "postgres_compat")] 120 | impl ImportDatabaseAdapter for DatabaseClient { 121 | fn get_db_type(&self) -> DatabaseType { 122 | trace!("get_db_type was called"); 123 | self.db_type.clone() 124 | } 125 | 126 | fn drop_table(&self, table_name: String) -> Result<()> { 127 | trace!("drop_table was called for: [ {} ]", table_name); 128 | // Get a aconnection from the pool. 129 | let connection = self.underlying_pool.get(); 130 | if connection.is_err() { 131 | return Err(ErrorKind::PostgresErr.into()); 132 | } 133 | let connection = connection.unwrap(); 134 | 135 | // Execute drop table statement. 136 | let result = connection.execute(&format!("DROP TABLE IF EXISTS {}", table_name), &[]); 137 | if result.is_err() { 138 | error!("drop_table err"); 139 | error!("{:?}", result.err().unwrap()); 140 | return Err(ErrorKind::PostgresErr.into()); 141 | } else { 142 | trace!("drop_table was successful"); 143 | return Ok(()); 144 | } 145 | } 146 | 147 | fn create_table(&self, table_name: String, columns: BTreeMap) -> Result<()> { 148 | trace!("create_table was called for: [ {} ]", table_name); 149 | // Get a Connection from the underlying DB Connection Pool. 150 | let connection = self.underlying_pool.get(); 151 | if connection.is_err() { 152 | return Err(ErrorKind::PostgresErr.into()); 153 | } 154 | let connection = connection.unwrap(); 155 | 156 | // Create the create table statement. `default` is reseverd word, so replace with 157 | // `_default`. 158 | let mut creation_string = format!("CREATE TABLE IF NOT EXISTS {} (\n", table_name); 159 | for (key, val) in columns.into_iter() { 160 | creation_string += &format!("{} {},\n", key.replace("default", "_default"), val); 161 | } 162 | // Cut off the newline + trailing comma. 163 | let len = creation_string.len(); 164 | creation_string.truncate(len - 2); 165 | // Append final parentheses. 166 | creation_string += ")"; 167 | trace!( 168 | "Using the following creation string: \n {}", 169 | creation_string 170 | ); 171 | 172 | // Execute Create Table Statement. 173 | let result = connection.execute(&creation_string, &[]); 174 | if result.is_err() { 175 | error!("create_table err"); 176 | error!("{:?}", result.err().unwrap()); 177 | return Err(ErrorKind::PostgresErr.into()); 178 | } else { 179 | trace!("create_table was successful!"); 180 | return Ok(()); 181 | } 182 | } 183 | 184 | fn drop_record( 185 | &self, 186 | table_name: String, 187 | column_types: BTreeMap, 188 | column_name: String, 189 | value: String, 190 | ) -> Result<()> { 191 | trace!( 192 | "Drop record was called for table: {} on column: {} with value: {}", 193 | table_name, 194 | column_name, 195 | value 196 | ); 197 | // Get a Connection from the underlying pool. 198 | let connection = self.underlying_pool.get(); 199 | if connection.is_err() { 200 | return Err(ErrorKind::PostgresErr.into()); 201 | } 202 | let connection = connection.unwrap(); 203 | 204 | // Prepare a statemtn for deleting from a table. 205 | let mut prepared = 206 | format!( 207 | "DELETE FROM {} WHERE {} = ", 208 | table_name, 209 | column_name.clone(), 210 | ); 211 | let the_type = column_types.get(&column_name).unwrap(); 212 | 213 | // Make sure the column gets inserted as the right type to prevent db errors. 214 | let cast_as = get_cast_as(the_type.to_owned(), self.db_type.clone()); 215 | if cast_as == "" { 216 | prepared += &format!("{:?}", value.replace("'", "").replace("\"", "")).replace("\"", "'"); 217 | } else { 218 | prepared += &format!( 219 | "{:?}::{}", 220 | value.replace("'", "").replace("\"", ""), 221 | cast_as 222 | ).replace("\"", "'"); 223 | } 224 | 225 | // Execute the preapred delete statement. 226 | let statement = connection.execute(&prepared, &[]); 227 | if statement.is_err() { 228 | error!("drop_record err"); 229 | error!("{:?}", statement.err().unwrap()); 230 | return Err(ErrorKind::PostgresErr.into()); 231 | } else { 232 | return Ok(()); 233 | } 234 | } 235 | 236 | fn insert_record( 237 | &self, 238 | table_name: String, 239 | column_types: BTreeMap, 240 | columns: BTreeMap>, 241 | ) -> Result<()> { 242 | trace!("insert_record was called for table: {}", table_name); 243 | // Get a connection from the underlying pool. 244 | let connection = self.underlying_pool.get(); 245 | if connection.is_err() { 246 | return Err(ErrorKind::PostgresErr.into()); 247 | } 248 | let connection = connection.unwrap(); 249 | 250 | // Create the insert into statement. 251 | let mut insert_string = format!("INSERT INTO {} (", table_name); 252 | let mut types = BTreeMap::new(); 253 | 254 | // We need to know all the types of the keys for the INSERT INTO () VALUES () 255 | for (pos, key) in columns.keys().enumerate() { 256 | insert_string += &format!("{},", key.replace("default", "_default")); 257 | types.insert(pos, column_types.get(key).unwrap().to_owned()); 258 | } 259 | let mut len = insert_string.len(); 260 | // Remove Trailing Comma. 261 | insert_string.truncate(len - 1); 262 | 263 | // Loop over actual values. 264 | insert_string += ") VALUES ("; 265 | for (pos, val) in columns.values().enumerate() { 266 | // Handle Nulls 267 | if val.is_none() { 268 | insert_string += "NULL,"; 269 | } else { 270 | let the_type = types.get(&pos).unwrap(); 271 | // Cast the value as the right type. 272 | let cast_as = get_cast_as(the_type.to_owned(), self.db_type.clone()); 273 | if cast_as == "" { 274 | insert_string += &format!( 275 | "{:?},", 276 | val.clone().unwrap().replace("'", "").replace("\"", "") 277 | ).replace("\"", "'"); 278 | } else { 279 | insert_string += &format!( 280 | "{:?}::{},", 281 | val.clone().unwrap().replace("'", "").replace("\"", ""), 282 | cast_as 283 | ).replace("\"", "'"); 284 | } 285 | } 286 | } 287 | len = insert_string.len(); 288 | 289 | // Remove Trailing Comma. 290 | insert_string.truncate(len - 1); 291 | insert_string += ")"; 292 | debug!("Insert_record string looks like: \n {}", insert_string); 293 | 294 | // Execute. 295 | let statement = connection.execute(&insert_string, &[]); 296 | if statement.is_err() { 297 | error!("insert error"); 298 | error!("{:?}", statement.err().unwrap()); 299 | return Err(ErrorKind::PostgresErr.into()); 300 | } else { 301 | return Ok(()); 302 | } 303 | } 304 | } 305 | 306 | 307 | #[cfg(feature = "mysql_compat")] 308 | impl ImportDatabaseAdapter for DatabaseClient { 309 | fn get_db_type(&self) -> DatabaseType { 310 | trace!("get_db_type was called"); 311 | self.db_type.clone() 312 | } 313 | 314 | fn drop_table(&self, table_name: String) -> Result<()> { 315 | trace!("drop_table was called for: [ {} ]", table_name); 316 | 317 | // Get connection from the underlying pool. 318 | let connection = self.underlying_pool.get(); 319 | if connection.is_err() { 320 | return Err(ErrorKind::MysqlErr.into()); 321 | } 322 | let mut connection = connection.unwrap(); 323 | 324 | // Create DropTable statement. 325 | let result = connection.query(&format!("DROP TABLE IF EXISTS {}", table_name)); 326 | if result.is_err() { 327 | error!("drop_table err"); 328 | error!("{:?}", result.err().unwrap()); 329 | return Err(ErrorKind::MysqlErr.into()); 330 | } else { 331 | trace!("drop_table was successful"); 332 | return Ok(()); 333 | } 334 | } 335 | 336 | fn create_table(&self, table_name: String, columns: BTreeMap) -> Result<()> { 337 | trace!("create_table was called for: [ {} ]", table_name); 338 | // Get connection from the underlying pool. 339 | let connection = self.underlying_pool.get(); 340 | if connection.is_err() { 341 | return Err(ErrorKind::MysqlErr.into()); 342 | } 343 | let mut connection = connection.unwrap(); 344 | 345 | // Form Creation String. `default`, and `generated` are reserved words. 346 | let mut creation_string = format!("CREATE TABLE IF NOT EXISTS {} (\n", table_name); 347 | for (key, val) in columns.into_iter() { 348 | creation_string += &format!( 349 | "{} {},\n", 350 | key.replace("default", "_default").replace( 351 | "generated", 352 | "_generated", 353 | ), 354 | val 355 | ); 356 | } 357 | let len = creation_string.len(); 358 | // Remove Trailing newline, and comma. 359 | creation_string.truncate(len - 2); 360 | // Ensure Character set is utf8mb4. 361 | creation_string += ") CHARACTER SET utf8mb4"; 362 | trace!( 363 | "Using the following creation string: \n {}", 364 | creation_string 365 | ); 366 | 367 | // Execute. 368 | let result = connection.query(&creation_string); 369 | if result.is_err() { 370 | error!("create_table err"); 371 | error!("{:?}", result.err().unwrap()); 372 | return Err(ErrorKind::MysqlErr.into()); 373 | } else { 374 | trace!("create_table was successful!"); 375 | return Ok(()); 376 | } 377 | } 378 | 379 | fn drop_record( 380 | &self, 381 | table_name: String, 382 | column_types: BTreeMap, 383 | column_name: String, 384 | value: String, 385 | ) -> Result<()> { 386 | trace!( 387 | "Drop record was called for table: {} on column: {} with value: {}", 388 | table_name, 389 | column_name, 390 | value 391 | ); 392 | // Grab a Connection from the pool. 393 | let connection = self.underlying_pool.get(); 394 | if connection.is_err() { 395 | return Err(ErrorKind::MysqlErr.into()); 396 | } 397 | let mut connection = connection.unwrap(); 398 | 399 | // Start Preparing a Delete from statement. 400 | let mut prepared = 401 | format!( 402 | "DELETE FROM {} WHERE {} = ", 403 | table_name, 404 | column_name.clone(), 405 | ); 406 | let the_type = column_types.get(&column_name).unwrap(); 407 | 408 | // Cast the type correctly. 409 | let cast_as = get_cast_as(the_type.to_owned(), self.db_type.clone()); 410 | if cast_as == "" { 411 | prepared += &format!("{:?}", value.replace("'", "").replace("\"", "")).replace("\"", "'"); 412 | } else { 413 | prepared += &format!( 414 | "CAST({:?} as {})", 415 | value.replace("'", "").replace("\"", ""), 416 | cast_as 417 | ).replace("\"", "'"); 418 | } 419 | 420 | // Execute. 421 | let statement = connection.query(&prepared); 422 | if statement.is_err() { 423 | error!("drop_record err"); 424 | error!("{:?}", statement.err().unwrap()); 425 | return Err(ErrorKind::MysqlErr.into()); 426 | } else { 427 | return Ok(()); 428 | } 429 | } 430 | 431 | fn insert_record( 432 | &self, 433 | table_name: String, 434 | column_types: BTreeMap, 435 | columns: BTreeMap>, 436 | ) -> Result<()> { 437 | trace!("insert_record was called for table: {}", table_name); 438 | // Get connection from the underlying pool. 439 | let connection = self.underlying_pool.get(); 440 | if connection.is_err() { 441 | return Err(ErrorKind::PostgresErr.into()); 442 | } 443 | let mut connection = connection.unwrap(); 444 | 445 | // Start Preparing insert into statements. 446 | let mut insert_string = format!("INSERT INTO {} (", table_name); 447 | let mut types = BTreeMap::new(); 448 | 449 | // We need the types for INSERT INTO () VALUES (). Get Those. 450 | for (pos, key) in columns.keys().enumerate() { 451 | insert_string += &format!( 452 | "{},", 453 | key.replace("default", "_default").replace( 454 | "generated", 455 | "_generated", 456 | ) 457 | ); 458 | types.insert(pos, column_types.get(key).unwrap().to_owned()); 459 | } 460 | let mut len = insert_string.len(); 461 | // Remove trailing comma. 462 | insert_string.truncate(len - 1); 463 | 464 | // Start Inserting Values. 465 | insert_string += ") VALUES ("; 466 | for (pos, val) in columns.values().enumerate() { 467 | if val.is_none() { 468 | // Handle NULLs. 469 | insert_string += "NULL,"; 470 | } else { 471 | let the_type = types.get(&pos).unwrap(); 472 | // Cast the type correctly. 473 | let cast_as = get_cast_as(the_type.to_owned(), self.db_type.clone()); 474 | if cast_as == "" { 475 | insert_string += &format!( 476 | "{:?},", 477 | val.clone().unwrap().replace("'", "").replace("\"", "") 478 | ).replace("\"", "'"); 479 | } else { 480 | insert_string += &format!( 481 | "CAST({:?} AS {}),", 482 | val.clone().unwrap().replace("'", "").replace("\"", ""), 483 | cast_as 484 | ).replace("\"", "'"); 485 | } 486 | } 487 | } 488 | len = insert_string.len(); 489 | // Remove trailing commas. 490 | insert_string.truncate(len - 1); 491 | insert_string += ")"; 492 | debug!("Insert_record string looks like: \n {}", insert_string); 493 | 494 | // Execute. 495 | let statement = connection.query(&insert_string); 496 | if statement.is_err() { 497 | error!("insert error"); 498 | error!("{:?}", statement.err().unwrap()); 499 | return Err(ErrorKind::MysqlErr.into()); 500 | } else { 501 | return Ok(()); 502 | } 503 | } 504 | } 505 | -------------------------------------------------------------------------------- /src/errors.rs: -------------------------------------------------------------------------------- 1 | //! Provides all errors for the cdl-runner crate. 2 | 3 | use glob; 4 | use reqwest; 5 | use std::io; 6 | 7 | error_chain! { 8 | 9 | errors { 10 | InvalidTypeToConvert(the_type: String) { 11 | description("Cannot convert type to a Database Type!") 12 | display("Invalid Type: [ {} ] to convert to DB", the_type) 13 | } 14 | 15 | PostgresErr { 16 | description("Underlying postgres error!") 17 | display("Underlying postgres error!") 18 | } 19 | 20 | MysqlErr { 21 | description("Underlying Mysql error!") 22 | display("Underlying Mysql error!") 23 | } 24 | 25 | ImportErr { 26 | description("Underlying import errror!") 27 | display("Underlying import error!") 28 | } 29 | } 30 | 31 | foreign_links { 32 | Globerror(glob::PatternError); 33 | HttpError(reqwest::Error); 34 | HttpUrlError(reqwest::UrlError); 35 | Ioerror(io::Error); 36 | } 37 | 38 | } 39 | -------------------------------------------------------------------------------- /src/importer.rs: -------------------------------------------------------------------------------- 1 | //! Actually imports the data into a database. 2 | 3 | use api_client::{CanvasDataApiClient, TableDefinition}; 4 | use db_client::ImportDatabaseAdapter; 5 | use errors::*; 6 | use flate2::read::GzDecoder; 7 | use glob::glob; 8 | use rayon::prelude::*; 9 | use std::collections::BTreeMap; 10 | use std::fs::File; 11 | use std::io::prelude::*; 12 | use std::sync::atomic::{AtomicBool, Ordering}; 13 | use type_converter::convert_type_for_db; 14 | 15 | lazy_static! { 16 | /// A list of tables that may not have constant IDs, or 17 | /// single field PKs, and as such need to be dropped/recreated 18 | /// on each import. 19 | static ref VOLATILE_TABLES: Vec = vec![ 20 | "module_completion_requirement_fact".to_owned(), 21 | "module_fact".to_owned(), 22 | "module_item_fact".to_owned(), 23 | "module_prerequisite_fact".to_owned(), 24 | "module_progression_completion_requirement_fact".to_owned(), 25 | "module_progression_fact".to_owned(), 26 | "quiz_fact".to_owned(), 27 | "quiz_question_answer_fact".to_owned(), 28 | "quiz_question_fact".to_owned(), 29 | "quiz_question_group_fact".to_owned(), 30 | "quiz_submission_fact".to_owned(), 31 | "quiz_submission_historical_fact".to_owned(), 32 | "module_completion_requirement_dim".to_owned(), 33 | "module_dim".to_owned(), 34 | "module_item_dim".to_owned(), 35 | "module_prerequisite_dim".to_owned(), 36 | "module_progression_completion_requirement_dim".to_owned(), 37 | "module_progression_dim".to_owned(), 38 | "quiz_dim".to_owned(), 39 | "quiz_question_answer_dim".to_owned(), 40 | "quiz_question_dim".to_owned(), 41 | "quiz_question_group_dim".to_owned(), 42 | "quiz_submission_dim".to_owned(), 43 | "quiz_submission_historical_dim".to_owned(), 44 | "submission_comment_participant_dim".to_owned(), 45 | "requests".to_owned(), 46 | "assignment_override_user_rollup_fact".to_owned(), 47 | "enrollment_rollup_dim".to_owned(), 48 | ]; 49 | } 50 | 51 | /// The Root Importer Object. 52 | pub struct Importer { 53 | /// The Canvas Data API Client. 54 | api_client: CanvasDataApiClient, 55 | /// The Dump ID to process. 56 | dump_id: String, 57 | /// The location of where to save stuff. 58 | save_location: String, 59 | /// The Importing Database Adapter. 60 | db_adapter: T, 61 | } 62 | unsafe impl Send for Importer {} 63 | unsafe impl Sync for Importer {} 64 | 65 | /// A representation of the filenaame. 66 | struct FileNameSplit { 67 | /// The Table name of this file. 68 | pub table_name: String, 69 | /// The part of the internal shard for this file. 70 | pub sharded_part: String, 71 | /// The part of the internal hash for this file. 72 | pub hash_part: String, 73 | /// The extension for this file. 74 | pub extension: String, 75 | } 76 | 77 | impl FileNameSplit { 78 | /// Split a file name that has been downloaded up into pieces to match on table names, and such easier. 79 | /// 80 | /// * `split_from` - The filename to go ahead, and split. 81 | pub fn new(split_from: String) -> Option { 82 | if split_from.find("-").is_none() { 83 | return None; 84 | } 85 | let as_split: Vec<_> = split_from.split("-").collect(); 86 | if as_split.len() != 3 { 87 | return None; 88 | } 89 | let to_split_part = as_split[2].to_owned(); 90 | let part_with_file_extension: Vec<_> = to_split_part.split(".").collect(); 91 | let hash_part_frd = part_with_file_extension[0].to_owned(); 92 | let extension_frd = part_with_file_extension[1].to_owned(); 93 | 94 | Some(FileNameSplit { 95 | table_name: as_split[0].to_owned(), 96 | sharded_part: as_split[1].to_owned(), 97 | hash_part: hash_part_frd, 98 | extension: extension_frd, 99 | }) 100 | } 101 | } 102 | 103 | impl Importer { 104 | /// Creates a new Importer. 105 | /// 106 | /// * `api_client` - The API Client to use. 107 | /// * `db_adapter` - The Database Adapter to Import Into. 108 | /// * `dump_id` - The Dump ID to import. 109 | /// * `save_location` - The Save location. 110 | pub fn new(api_client: CanvasDataApiClient, db_adapter: T, dump_id: String, save_location: String) -> Self { 111 | Importer { 112 | api_client: api_client, 113 | dump_id: dump_id, 114 | save_location: save_location, 115 | db_adapter: db_adapter, 116 | } 117 | } 118 | 119 | /// Gets the table info from the definition. 120 | /// 121 | /// Gets the table info we need for processing from the definition. Specifically returns the 122 | /// (, ) items. 123 | /// 124 | /// * `table_def` - The Table Definition. 125 | fn get_table_info_from_def(&self, table_def: TableDefinition) -> (Vec, BTreeMap) { 126 | let mut finalized_vec = Vec::new(); 127 | let mut finalized_map = BTreeMap::new(); 128 | 129 | for column in table_def.columns.iter() { 130 | finalized_vec.push(column.name.clone()); 131 | finalized_map.insert( 132 | column.name.clone(), 133 | convert_type_for_db(column.db_type.clone(), self.db_adapter.get_db_type()) 134 | .expect("Failed to Convert Type for DB!"), 135 | ); 136 | } 137 | 138 | (finalized_vec, finalized_map) 139 | } 140 | 141 | /// Gets an "ID" Like column from a list of columns, and a table name. 142 | /// 143 | /// Used to automatically "guess" a primary key for a table since our methods of naming in the schema 144 | /// are mostly deterministic. 145 | /// 146 | /// * `table_name` - the name of the table these columns provide for. 147 | /// * `columns` - A Reference to the list of columns. 148 | fn get_id_like_column_from_columns( 149 | &self, 150 | table_name: String, 151 | columns: &BTreeMap>, 152 | ) -> Option { 153 | debug!("Finding ID Like column for: {}", table_name); 154 | // Check if we have an ID Column. If so, that's what we should use. 155 | if columns.contains_key("id") { 156 | debug!("Has ID Column!"); 157 | return Some("id".to_owned()); 158 | } else { 159 | debug!("Looking up name!"); 160 | // Other tables are labeled like assignment_fact, and have assignment_id. Handle those. 161 | let find_table_name_potential = table_name.rfind("_"); 162 | if find_table_name_potential.is_some() { 163 | let (the_final_table_name, _) = table_name 164 | .split_at(find_table_name_potential.unwrap()) 165 | .to_owned(); 166 | debug!("Looking up: {}_id", the_final_table_name); 167 | if columns.contains_key(&format!("{}_id", the_final_table_name.clone())) { 168 | debug!("Found per table ID!"); 169 | return Some(format!("{}_id", the_final_table_name)); 170 | } 171 | let find_final_table_name_potential = the_final_table_name.rfind("_"); 172 | if find_final_table_name_potential.is_some() { 173 | let (the_final_table_name_frd, _) = the_final_table_name 174 | .split_at(find_final_table_name_potential.unwrap()) 175 | .to_owned(); 176 | debug!("Looking up: {}_id", the_final_table_name_frd); 177 | if columns.contains_key(&format!("{}_id", the_final_table_name_frd.clone())) { 178 | debug!("Found per table ID!"); 179 | return Some(format!("{}_id", the_final_table_name_frd)); 180 | } 181 | } 182 | } 183 | } 184 | debug!("No ID Found!"); 185 | None 186 | } 187 | 188 | /// Processes a Dump. Aka Imports it. 189 | pub fn process(&self, is_all_volatile: bool) -> Result<()> { 190 | trace!("Process Called for dump: {}", self.dump_id); 191 | 192 | // Download the Files for this dump. 193 | try!(self.api_client.download_files_for_dump( 194 | self.dump_id.clone(), 195 | )); 196 | 197 | // Glob to find downloaded files. 198 | let saved_location_glob = format!("{}/{}/*.gz", &self.save_location, &self.dump_id); 199 | let mut collected: Vec<_> = try!(glob(&saved_location_glob)).collect(); 200 | 201 | // Keep a seperate have failed for our iterator, and the tables we've already dropped. 202 | // Don't want to drop a table multiple times. 203 | let has_failed = AtomicBool::from(false); 204 | 205 | // Drop tables first if first. 206 | collected.iter_mut().map(|entry| { 207 | // If we've already failed, skip. Don't try to keep importing. 208 | if has_failed.load(Ordering::Relaxed) { 209 | trace!("Skipping Entry: {:?} , due to failing", entry); 210 | return; 211 | } 212 | 213 | if let &mut Ok(ref mut path) = entry { 214 | let path_frd = path; 215 | let file_name = path_frd.file_name().unwrap().to_str().unwrap().to_owned(); 216 | let file_name_split = FileNameSplit::new(file_name).unwrap(); 217 | 218 | if VOLATILE_TABLES.contains(&file_name_split.table_name) || is_all_volatile { 219 | let drop_res = self.db_adapter.drop_table(file_name_split.table_name); 220 | if drop_res.is_err() { 221 | error!("process -> is_volatile -> drop_res -> is_err"); 222 | error!("{:?}", drop_res.err().unwrap()); 223 | has_failed.store(true, Ordering::Relaxed); 224 | return; 225 | } 226 | } 227 | } 228 | }).count(); 229 | 230 | let _: Vec<_> = collected 231 | .par_iter_mut() 232 | .map(|entry| { 233 | // If we've already failed, skip. Don't try to keep importing. 234 | if has_failed.load(Ordering::Relaxed) { 235 | trace!("Skipping Entry: {:?} , due to failing", entry); 236 | return; 237 | } 238 | 239 | // If we have a path of a downloaded file. 240 | if let &mut Ok(ref mut path) = entry { 241 | // Get the filename of the downloaded file, and parse it since filenames are determinsitic. 242 | trace!("Got Path"); 243 | let path_frd = path.clone(); 244 | let file_name = path_frd.file_name().unwrap().to_str().unwrap().to_owned(); 245 | let file_name_split = FileNameSplit::new(file_name).unwrap(); 246 | trace!("Post Split!"); 247 | 248 | // Get the table definition for the downloaded table we're looking at. 249 | let table_def = self.api_client.get_table_definition( 250 | file_name_split.table_name.clone(), 251 | ); 252 | if table_def.is_err() { 253 | error!("process -> table_def -> is_err"); 254 | error!("{:?}", table_def.err().unwrap()); 255 | has_failed.store(true, Ordering::Relaxed); 256 | return; 257 | } 258 | let table_def = table_def.unwrap().unwrap(); 259 | let is_volatile_table = VOLATILE_TABLES.contains(&file_name_split.table_name) || is_all_volatile; 260 | 261 | // Get the columns for our table. 262 | let (column_names, column_defs) = self.get_table_info_from_def(table_def); 263 | trace!("Post Table Def!"); 264 | 265 | // Open up the file for readaing. 266 | let file = File::open(path_frd); 267 | if file.is_err() { 268 | error!("process -> file -> is_err"); 269 | error!("{:?}", file.err().unwrap()); 270 | has_failed.store(true, Ordering::Relaxed); 271 | return; 272 | } 273 | let mut file = file.unwrap(); 274 | trace!("Post File Open"); 275 | 276 | // Read the entire file into a buffer. 277 | // TODO: Maybe oneday switch to a buffered reader? 278 | let mut buffer = Vec::new(); 279 | let res = file.read_to_end(&mut buffer); 280 | if res.is_err() { 281 | error!("process -> res -> is_err"); 282 | error!("{:?}", res.err().unwrap()); 283 | has_failed.store(true, Ordering::Relaxed); 284 | return; 285 | } 286 | trace!("Post Reader"); 287 | 288 | // Uncompress the file. 289 | let mut decoder = GzDecoder::new(buffer.as_slice()); 290 | trace!("Post Decoder Init"); 291 | let mut finalized_string = String::new(); 292 | let decode_res = decoder.read_to_string(&mut finalized_string); 293 | if decode_res.is_err() { 294 | error!("prcoess -> decode_res -> is_err"); 295 | error!("{:?}", decode_res.err().unwrap()); 296 | has_failed.store(true, Ordering::Relaxed); 297 | return; 298 | } 299 | trace!("Post Decode to STR"); 300 | debug!("Decoded String: \n {:?}", finalized_string); 301 | 302 | // Create the table if it doesn't exist. 303 | let create_res = self.db_adapter.create_table( 304 | file_name_split.table_name.clone(), 305 | column_defs.clone(), 306 | ); 307 | if create_res.is_err() { 308 | error!("prcoess -> create_res -> is_err"); 309 | error!("{:?}", create_res.err().unwrap()); 310 | has_failed.store(true, Ordering::Relaxed); 311 | return; 312 | } 313 | trace!("Post create table"); 314 | 315 | // For each line in this file. 316 | for line in finalized_string.lines() { 317 | trace!("Processing line: [ {:?} ]", line); 318 | let mut columns = BTreeMap::new(); 319 | // Split by tabs, gather all columns. 320 | let split_up_tsv_line: Vec<_> = line.split("\t").collect(); 321 | for (pos, name) in column_names.iter().enumerate() { 322 | let mut split_up_line = Some(split_up_tsv_line[pos].to_owned()); 323 | if split_up_line.clone().unwrap().as_str() == "\\N" { 324 | split_up_line = None 325 | } 326 | columns.insert(name.to_owned(), split_up_line); 327 | } 328 | 329 | trace!("Inserting Columns: [ {:?} ]", columns); 330 | 331 | if is_volatile_table { 332 | // If we're volatile don't check if it exists already, just insert. 333 | trace!("Is volatile table, performing insert"); 334 | let ins_res = self.db_adapter.insert_record( 335 | file_name_split.table_name.clone(), 336 | column_defs.clone(), 337 | columns, 338 | ); 339 | if ins_res.is_err() { 340 | error!("process -> for line in finalized_string -> is_volatile -> ins_res -> is_err"); 341 | error!("{:?}", ins_res.err().unwrap()); 342 | has_failed.store(true, Ordering::Relaxed); 343 | return; 344 | } 345 | } else { 346 | // Perform a diff if we're not volatile. 347 | trace!("Is not volatile performing diff."); 348 | 349 | // Get the ID to diff by. 350 | let id_like_column = self.get_id_like_column_from_columns(file_name_split.table_name.clone(), &columns); 351 | if id_like_column.is_none() { 352 | error!("Failed to find table id like column!"); 353 | has_failed.store(true, Ordering::Relaxed); 354 | return; 355 | } 356 | let id_like_column = id_like_column.unwrap(); 357 | let id_like_value = columns 358 | .get(&id_like_column) 359 | .unwrap() 360 | .clone() 361 | .unwrap() 362 | .to_owned(); 363 | trace!("Performing deletion request for id like column"); 364 | // Send delete request for that ID. on first time seeing this will be no op due to WHERE Clause. 365 | let del_res = self.db_adapter.drop_record( 366 | file_name_split.table_name.clone(), 367 | column_defs.clone(), 368 | id_like_column, 369 | id_like_value, 370 | ); 371 | if del_res.is_err() { 372 | error!("Failed to drop column!"); 373 | has_failed.store(true, Ordering::Relaxed); 374 | return; 375 | } 376 | 377 | // Insert the column to overwrite. 378 | trace!("Performing insert"); 379 | let ins_res = self.db_adapter.insert_record( 380 | file_name_split.table_name.clone(), 381 | column_defs.clone(), 382 | columns, 383 | ); 384 | if ins_res.is_err() { 385 | error!("process -> for line in finalized_string -> !is_volatile -> ins_res -> is_err"); 386 | error!("{:?}", ins_res.err().unwrap()); 387 | has_failed.store(true, Ordering::Relaxed); 388 | return; 389 | } 390 | } 391 | trace!("Imported Line."); 392 | } 393 | } 394 | }) 395 | .collect(); 396 | 397 | debug!("Has Failed: {}", has_failed.load(Ordering::Relaxed)); 398 | 399 | if !has_failed.load(Ordering::Relaxed) { 400 | trace!("Hasn't Failed"); 401 | Ok(()) 402 | } else { 403 | trace!("Has Failed!"); 404 | Err(ErrorKind::ImportErr.into()) 405 | } 406 | } 407 | } 408 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | extern crate base64; 2 | extern crate chrono; 3 | extern crate config; 4 | #[macro_use] 5 | extern crate error_chain; 6 | extern crate env_logger; 7 | extern crate flate2; 8 | extern crate futures; 9 | extern crate glob; 10 | #[macro_use] 11 | extern crate lazy_static; 12 | #[macro_use] 13 | extern crate log; 14 | extern crate r2d2; 15 | extern crate rayon; 16 | extern crate regex; 17 | extern crate reqwest; 18 | extern crate ring; 19 | extern crate rocksdb; 20 | extern crate serde; 21 | #[macro_use] 22 | extern crate serde_derive; 23 | extern crate serde_json; 24 | extern crate tokio_core; 25 | 26 | #[cfg(feature = "postgres_compat")] 27 | extern crate postgres; 28 | #[cfg(feature = "postgres_compat")] 29 | extern crate r2d2_postgres; 30 | 31 | #[cfg(feature = "mysql_compat")] 32 | extern crate mysql; 33 | 34 | pub mod api_client; 35 | pub mod db_client; 36 | pub mod errors; 37 | pub mod importer; 38 | pub mod settings; 39 | pub mod type_converter; 40 | 41 | #[cfg(feature = "mysql_compat")] 42 | pub mod mysql_pool; 43 | 44 | use db_client::DatabaseClient; 45 | use rocksdb::DB; 46 | use settings::DatabaseType; 47 | 48 | #[cfg(feature = "postgres_compat")] 49 | use r2d2_postgres::PostgresConnectionManager; 50 | 51 | #[cfg(feature = "mysql_compat")] 52 | use mysql_pool::MysqlConnectionManager; 53 | 54 | /// Entry Point to the application. 55 | fn main() { 56 | env_logger::init(); 57 | 58 | // Initalize Settings. 59 | let settings = settings::Settings::new(); 60 | let has_errord = false; 61 | info!("Setting up API Client..."); 62 | 63 | // Get the dump listing, and setup some variables for iteration. 64 | let api_client = api_client::CanvasDataApiClient::new(&settings); 65 | let mut dumps = api_client.get_dumps().expect("Failed to get List of Dumps"); 66 | dumps.sort_by(|dump_one, dump_two| { 67 | dump_one.created_at.cmp(&dump_two.created_at) 68 | }); 69 | let dumps_len = dumps.len(); 70 | let only_final_dump = settings.get_should_only_load_final(); 71 | let mut current_dumps_pos = 0; 72 | debug!("{:?}", dumps); 73 | 74 | // Connect to the local KV Store. 75 | info!("Connecting to RocksDB Store...."); 76 | let whiskey = DB::open_default(settings.get_rocksdb_location()).expect("Failed to open RocksDB"); 77 | 78 | // Get the latest schema. 79 | let latest_schema = api_client.get_latest_schema().expect( 80 | "Failed to fetch latest schema!", 81 | ); 82 | let mut last_processed_schema = latest_schema.version.clone(); 83 | let last_processed_schema_res = whiskey.get("last_version_processed".as_bytes()); 84 | if let Ok(new_last_processed_schema_opt) = last_processed_schema_res { 85 | if let Some(new_last_processed_schema_bytes) = new_last_processed_schema_opt { 86 | if let Some(new_last_processed_schema) = new_last_processed_schema_bytes.to_utf8() { 87 | last_processed_schema = new_last_processed_schema.to_owned(); 88 | } 89 | } 90 | } 91 | 92 | let _: Vec<_> = dumps 93 | .into_iter() 94 | .map(|dump| { 95 | // Check if we're only importing the last dump. 96 | current_dumps_pos = current_dumps_pos + 1; 97 | if current_dumps_pos != dumps_len && only_final_dump { 98 | info!("Skipping dump: {} due to only final selected", dump.dump_id); 99 | return Ok(()); 100 | } 101 | 102 | // Check if another dump has failed importing already. 103 | if has_errord { 104 | info!( 105 | "Skipping dump: {} due to previous failure in import", 106 | dump.dump_id 107 | ); 108 | return Err(()); 109 | } 110 | 111 | // Check if the dump has finished populating. 112 | debug!("Entering debug loop for dump: {}", dump.dump_id); 113 | if !dump.finished { 114 | info!("Skipping dump: {} because it's not finished.", dump.dump_id); 115 | return Ok(()); 116 | } 117 | 118 | // Check if we've already processed this dump. 119 | let result = whiskey.get( 120 | format!("dump_processed_{}", dump.dump_id.clone()).as_bytes(), 121 | ); 122 | if result.is_err() { 123 | error!("Failed to get value from Rocks!"); 124 | error!("{:?}", result.err().unwrap()); 125 | return Err(()); 126 | } 127 | let is_potentially_processed = result.unwrap(); 128 | if is_potentially_processed.is_some() { 129 | let potentially_processed = is_potentially_processed.unwrap(); 130 | let potentially_processed = potentially_processed.to_utf8(); 131 | if potentially_processed.is_some() { 132 | let processed = potentially_processed.unwrap(); 133 | if processed == "successful" || processed == "out-of-date" { 134 | info!("Skipping already processed dump: {}", dump.dump_id); 135 | return Ok(()); 136 | } 137 | } 138 | } 139 | 140 | // Check if the dump queued for import is the correct schema version. 141 | if latest_schema.version != dump.schema_version { 142 | let _ = whiskey.put( 143 | format!("dump_processed_{}", dump.dump_id.clone()).as_bytes(), 144 | b"out-of-date", 145 | ); 146 | return Ok(()); 147 | } 148 | 149 | // Get the files for this particular dump. 150 | let files_in_dump = api_client.get_files_for_dump(dump.dump_id.clone()); 151 | if files_in_dump.is_err() { 152 | info!("Failed to list files for dump. Skipping..."); 153 | return Ok(()); 154 | } 155 | let files_in_dump = files_in_dump.unwrap(); 156 | 157 | // Check if the dump is a historical refresh. 158 | if api_client.is_historical_refresh(files_in_dump) && settings.get_should_skip_historical_imports() { 159 | info!( 160 | "Skipping dump: {} since it's a historical refresh", 161 | dump.dump_id.clone() 162 | ); 163 | let _ = whiskey.put( 164 | format!("dump_processed_{}", dump.dump_id.clone()).as_bytes(), 165 | b"successful", 166 | ); 167 | return Ok(()); 168 | } 169 | 170 | // Set that we're attempting to improt this. 171 | let _ = whiskey.put( 172 | format!("dump_processed_{}", dump.dump_id.clone()).as_bytes(), 173 | b"in_progress", 174 | ); 175 | 176 | // If we have postgres compatability, and are configured for postgres, import that. 177 | if cfg!(feature = "postgres_compat") { 178 | if settings.get_database_type() == DatabaseType::Psql { 179 | info!("Connecting to the DB"); 180 | let db_client = db_client::DatabaseClient::::new(&settings) 181 | .expect("Couldn't setup DB Client"); 182 | let importer = importer::Importer::>::new( 183 | api_client.clone(), 184 | db_client, 185 | dump.dump_id.clone(), 186 | settings.get_save_location(), 187 | ); 188 | let res = if last_processed_schema.as_str() != latest_schema.version { 189 | // If not latest schema. Volatile the table to ensure tables are the latest. 190 | importer.process(true) 191 | } else { 192 | importer.process(settings.get_all_tables_volatile()) 193 | }; 194 | if res.is_ok() { 195 | let _ = whiskey.put( 196 | format!("dump_processed_{}", dump.dump_id).as_bytes(), 197 | b"successful", 198 | ); 199 | return Ok(()); 200 | } else { 201 | let _ = whiskey.put( 202 | format!("dump_processed_{}", dump.dump_id).as_bytes(), 203 | b"failure", 204 | ); 205 | return Err(()); 206 | } 207 | } 208 | } 209 | 210 | // If we have mysql compatability, and are configured for mysql, import that. 211 | if cfg!(feature = "mysql_compat") { 212 | if settings.get_database_type() == DatabaseType::Mysql { 213 | info!("Connecting to the DB"); 214 | let db_client = db_client::DatabaseClient::::new(&settings) 215 | .expect("Couldn't setup DB Client"); 216 | let importer = importer::Importer::>::new( 217 | api_client.clone(), 218 | db_client, 219 | dump.dump_id.clone(), 220 | settings.get_save_location(), 221 | ); 222 | let res = importer.process(settings.get_all_tables_volatile()); 223 | if res.is_ok() { 224 | let _ = whiskey.put( 225 | format!("dump_processed_{}", dump.dump_id).as_bytes(), 226 | b"successful", 227 | ); 228 | return Ok(()); 229 | } else { 230 | let _ = whiskey.put( 231 | format!("dump_processed_{}", dump.dump_id).as_bytes(), 232 | b"failure", 233 | ); 234 | return Err(()); 235 | } 236 | } 237 | } 238 | 239 | Err(()) 240 | }) 241 | .collect(); 242 | 243 | let _ = whiskey.put( 244 | "last_version_processed".as_bytes(), 245 | latest_schema.version.as_bytes() 246 | ); 247 | 248 | info!("Done!"); 249 | } 250 | -------------------------------------------------------------------------------- /src/mysql_pool/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod pool; 2 | pub use self::pool::{CreateManager, MysqlConnectionManager}; 3 | -------------------------------------------------------------------------------- /src/mysql_pool/pool.rs: -------------------------------------------------------------------------------- 1 | use mysql::error::Error as MysqlError; 2 | use mysql::Conn as MysqlBaseConn; 3 | use mysql::Opts as MysqlOpts; 4 | use mysql::OptsBuilder as MysqlOptsBuilder; 5 | use r2d2::ManageConnection as R2D2ManageConnection; 6 | 7 | #[derive(Clone, Debug)] 8 | pub struct MysqlConnectionManager { 9 | params: MysqlOpts, 10 | } 11 | 12 | pub trait CreateManager { 13 | type Manager; 14 | 15 | fn new(params: T) -> Result; 16 | } 17 | 18 | impl CreateManager for MysqlConnectionManager { 19 | type Manager = MysqlConnectionManager; 20 | 21 | fn new(params: MysqlOptsBuilder) -> Result { 22 | Ok(MysqlConnectionManager { params: MysqlOpts::from(params) }) 23 | } 24 | } 25 | 26 | impl<'a> CreateManager<&'a str> for MysqlConnectionManager { 27 | type Manager = MysqlConnectionManager; 28 | 29 | fn new(params: &'a str) -> Result { 30 | Ok(MysqlConnectionManager { params: MysqlOpts::from(params) }) 31 | } 32 | } 33 | 34 | impl R2D2ManageConnection for MysqlConnectionManager { 35 | type Connection = MysqlBaseConn; 36 | type Error = MysqlError; 37 | 38 | fn connect(&self) -> Result { 39 | MysqlBaseConn::new(self.params.clone()) 40 | } 41 | 42 | fn is_valid(&self, conn: &mut MysqlBaseConn) -> Result<(), MysqlError> { 43 | conn.query("SELECT 1;").map(|_| ()) 44 | } 45 | 46 | fn has_broken(&self, conn: &mut MysqlBaseConn) -> bool { 47 | self.is_valid(conn).is_err() 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/settings.rs: -------------------------------------------------------------------------------- 1 | //! Provides the `Settings` Struct for the rest of the crate in order to get 2 | //! configuration values from the environment, or one of several files. 3 | 4 | use config::{Config, File, Environment}; 5 | 6 | /// An Enum of all possible database types. 7 | /// 8 | /// Contains a list of all possible database types that the loader supports. 9 | #[derive(Clone, Debug, Deserialize, Eq, PartialEq)] 10 | pub enum DatabaseType { 11 | /// A type for postgres-like databases. 12 | Psql, 13 | /// A type for mysql-like databases. 14 | Mysql, 15 | } 16 | 17 | /// The Database Configuration object. 18 | /// 19 | /// Handles all database configuration values, which in this case is just the connection URL. 20 | #[derive(Debug, Deserialize)] 21 | struct Database { 22 | /// The connection URL for the Database. 23 | pub url: String, 24 | /// The Type of The Database. 25 | pub db_type: String, 26 | } 27 | 28 | /// The Canvas Data API Auth Configuration object. 29 | /// 30 | /// Handles all the configuration values for the Canvas Data API. In this case just the 31 | /// api key + api secrete for Canvas Data. 32 | #[derive(Debug, Deserialize)] 33 | struct Canvasdataauth { 34 | /// The API Key for Canvas Data. 35 | pub api_key: String, 36 | /// The API Secret for Canvas Data. 37 | pub api_secret: String, 38 | } 39 | 40 | /// The Global Settings object for all configuration values. 41 | #[derive(Debug, Deserialize)] 42 | pub struct Settings { 43 | /// The database configuration object. 44 | database: Database, 45 | /// The Canvas Data API Auth Configuration Object. 46 | canvasdataauth: Canvasdataauth, 47 | /// The place to save files. 48 | save_location: String, 49 | /// The place to store the Rocks DB Database. 50 | rocksdb_location: String, 51 | /// Whether or not to skip historical imports. 52 | skip_historical_imports: bool, 53 | /// Only attempts to load the latest import. 54 | only_load_final: Option, 55 | /// Treats all tables as volatile. 56 | all_tables_volatile: Option, 57 | } 58 | 59 | impl Settings { 60 | /// Creates a new settings object. 61 | pub fn new() -> Self { 62 | let mut base_configuration = Config::new(); 63 | base_configuration 64 | .merge(File::with_name("config/default")) 65 | .expect("Could not find default configuration file"); 66 | 67 | base_configuration 68 | .merge(File::with_name("config/local").required(false)) 69 | .expect("Transient error getting local configuration."); 70 | 71 | let mut env = Environment::with_prefix("cdl"); 72 | env = env.separator("__"); 73 | base_configuration 74 | .merge(env) 75 | .expect("Transient error getting environment variables"); 76 | 77 | base_configuration.try_into().expect( 78 | "Failed to create base configuration", 79 | ) 80 | } 81 | 82 | /// Gets the save location provided by the settings. 83 | pub fn get_save_location(&self) -> String { 84 | self.save_location.clone() 85 | } 86 | 87 | /// Gets the rocksdb location provided by the settings. 88 | pub fn get_rocksdb_location(&self) -> String { 89 | self.rocksdb_location.clone() 90 | } 91 | 92 | /// Gets the notion of whether or not to skip historical imports from the settings. 93 | pub fn get_should_skip_historical_imports(&self) -> bool { 94 | self.skip_historical_imports 95 | } 96 | 97 | /// Gets the notion of whether or not to only load the final import. 98 | pub fn get_should_only_load_final(&self) -> bool { 99 | self.only_load_final.unwrap_or(false) 100 | } 101 | 102 | /// Gets the notion of whether or not to treat all tables as volatile. 103 | pub fn get_all_tables_volatile(&self) -> bool { 104 | self.all_tables_volatile.unwrap_or(false) 105 | } 106 | 107 | /// Gets the database url provided by the settings. 108 | pub fn get_database_url(&self) -> String { 109 | self.database.url.clone() 110 | } 111 | 112 | /// Gets the database type provided by the settings. 113 | pub fn get_database_type(&self) -> DatabaseType { 114 | match self.database.db_type.to_lowercase().as_str() { 115 | "mysql" => DatabaseType::Mysql, 116 | _ => DatabaseType::Psql, 117 | } 118 | } 119 | 120 | /// Gets the Canvas Data API Key provided by the settings. 121 | pub fn get_canvas_data_api_key(&self) -> String { 122 | self.canvasdataauth.api_key.clone() 123 | } 124 | 125 | /// Gets the Canvas Data API Secret provided by the settings. 126 | pub fn get_canvas_data_api_secret(&self) -> String { 127 | self.canvasdataauth.api_secret.clone() 128 | } 129 | } 130 | -------------------------------------------------------------------------------- /src/type_converter.rs: -------------------------------------------------------------------------------- 1 | //! Managed the type converter for Rust 2 | 3 | use errors::*; 4 | use settings::DatabaseType; 5 | 6 | /// Converts a type from a name to a FRD Database Type. 7 | /// 8 | /// Takes a type from the Canvas Data Schema API, and turns it into the name of the type 9 | /// for the passed in database. 10 | /// 11 | /// * `orig_type` - The Type passed in from the Canvas Data API. 12 | /// * `db_type` - The Database type to convert into. 13 | pub fn convert_type_for_db(orig_type: String, db_type: DatabaseType) -> Result { 14 | match orig_type.as_str() { 15 | "bigint" => Ok("BIGINT".to_owned()), 16 | "boolean" => { 17 | match db_type { 18 | DatabaseType::Psql => Ok("BOOLEAN".to_owned()), 19 | DatabaseType::Mysql => Ok("VARCHAR(10)".to_owned()), 20 | } 21 | } 22 | "double precision" => { 23 | match db_type { 24 | DatabaseType::Psql => Ok("double precision".to_owned()), 25 | DatabaseType::Mysql => Ok("FLOAT(17)".to_owned()), 26 | } 27 | } 28 | "enum" => Ok("TEXT".to_owned()), 29 | "int" => Ok("INT".to_owned()), 30 | "integer" => Ok("INT".to_owned()), 31 | "text" => { 32 | match db_type { 33 | DatabaseType::Psql => Ok("TEXT".to_owned()), 34 | DatabaseType::Mysql => Ok("LONGTEXT".to_owned()), 35 | } 36 | } 37 | "timestamp" => { 38 | match db_type { 39 | DatabaseType::Psql => Ok("TIMESTAMP".to_owned()), 40 | DatabaseType::Mysql => Ok("DATETIME".to_owned()), 41 | } 42 | } 43 | "date" => Ok("DATE".to_owned()), 44 | "varchar" => { 45 | match db_type { 46 | DatabaseType::Psql => Ok("TEXT".to_owned()), 47 | DatabaseType::Mysql => Ok("LONGTEXT".to_owned()), 48 | } 49 | } 50 | "guid" => { 51 | match db_type { 52 | DatabaseType::Psql => Ok("TEXT".to_owned()), 53 | DatabaseType::Mysql => Ok("LONGTEXT".to_owned()), 54 | } 55 | } 56 | "datetime" => { 57 | match db_type { 58 | DatabaseType::Psql => Ok("TIMESTAMP".to_owned()), 59 | DatabaseType::Mysql => Ok("DATETIME".to_owned()), 60 | } 61 | } 62 | some_random_value => Err( 63 | ErrorKind::InvalidTypeToConvert(some_random_value.to_owned()).into(), 64 | ), 65 | } 66 | } 67 | 68 | /// Converts a Database Type into a Cast type. 69 | /// 70 | /// Databases can't auto cast strings as other types. So we need to sometimes manually specify 71 | /// "hey cast this string to another type". This function takes in a type of database (postgres, etc) 72 | /// and the type of the column, and turns into a cast type, or an empty string. 73 | /// 74 | /// * `orig_type` - The type of the column in the database. 75 | /// * `db_type` - The Type of the Database. 76 | pub fn get_cast_as(orig_type: String, db_type: DatabaseType) -> String { 77 | match db_type { 78 | DatabaseType::Psql => { 79 | match orig_type.to_lowercase().as_str() { 80 | "bigint" => "int8".to_owned(), 81 | "boolean" => "boolean".to_owned(), 82 | "double precision" => "double precision".to_owned(), 83 | "int" => "int".to_owned(), 84 | "timestamp" => "timestamp".to_owned(), 85 | _ => "".to_owned(), 86 | } 87 | } 88 | DatabaseType::Mysql => { 89 | match orig_type.to_lowercase().as_str() { 90 | "bigint" => "SIGNED".to_owned(), 91 | "int" => "SIGNED".to_owned(), 92 | "float(17)" => "DECIMAL(34, 17)".to_owned(), 93 | "datetime" => "DATETIME".to_owned(), 94 | "date" => "DATE".to_owned(), 95 | _ => "".to_owned(), 96 | } 97 | } 98 | } 99 | } 100 | --------------------------------------------------------------------------------