├── .styleci.yml ├── .gitignore ├── config └── prologue-big.php ├── .editorconfig ├── src ├── Types.php ├── BigServiceProvider.php └── Big.php ├── LICENSE ├── composer.json ├── CHANGELOG.md └── README.md /.styleci.yml: -------------------------------------------------------------------------------- 1 | preset: psr2 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | vendor/ 2 | node_modules/ 3 | npm-debug.log 4 | /composer.lock 5 | 6 | # Laravel 5 & Lumen specific 7 | public/storage 8 | public/hot 9 | storage/*.key 10 | .env.*.php 11 | .env.php 12 | .env 13 | Homestead.yaml 14 | Homestead.json 15 | -------------------------------------------------------------------------------- /config/prologue-big.php: -------------------------------------------------------------------------------- 1 | [ 5 | 'auth_file' => env('BIG_AUTH_FILE', null), 6 | 'project_id' => env('BIG_PROJECT_ID'), 7 | 'default_dataset' => env('BIG_DEFAULT_DATASET'), 8 | ], 9 | ]; 10 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | ; This file is for unifying the coding style for different editors and IDEs. 2 | ; More information at http://editorconfig.org 3 | 4 | root = true 5 | 6 | [*] 7 | charset = utf-8 8 | indent_size = 4 9 | indent_style = space 10 | end_of_line = lf 11 | insert_final_newline = true 12 | trim_trailing_whitespace = true 13 | 14 | [*.md] 15 | trim_trailing_whitespace = false 16 | -------------------------------------------------------------------------------- /src/Types.php: -------------------------------------------------------------------------------- 1 | publishes([ 25 | $configPath => config_path('prologue-big.php'), 26 | ], 'config'); 27 | } 28 | 29 | /** 30 | * Register bindings for our big wrapper in our container 31 | * 32 | * @return void 33 | */ 34 | public function register() 35 | { 36 | $this->mergeConfigFrom(__DIR__ . '/../config/prologue-big.php', 'prologue-big'); 37 | 38 | $this->app->singleton(Big::class, function ($app) { 39 | return new Big(); 40 | }); 41 | } 42 | 43 | /** 44 | * Get the services provided by the provider. 45 | * 46 | * @return array 47 | */ 48 | public function provides() 49 | { 50 | return [Big::class]; 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "prologuetech/big", 3 | "type": "library", 4 | "description": "Google BigQuery for Laravel", 5 | "keywords": [ 6 | "Prologue Technology", 7 | "Big", 8 | "Google", 9 | "BigQuery", 10 | "Laravel", 11 | "PHP" 12 | ], 13 | "homepage": "https://github.com/prologuetech/laravel-big", 14 | "license": "MIT", 15 | "authors": [ 16 | { 17 | "name": "Eric Percifield", 18 | "email": "epercifield@prologuetechnology.com", 19 | "homepage": "http://prologuetechnology.com/", 20 | "role": "Developer" 21 | } 22 | ], 23 | "require": { 24 | "illuminate/support": "^5.4", 25 | "illuminate/database": "^5.4", 26 | "illuminate/cache": "^5.4", 27 | "php" : "~7.0", 28 | "google/cloud": ">=0.49 <=0.60" 29 | }, 30 | "autoload": { 31 | "psr-4": { 32 | "Prologuetech\\Big\\": "src" 33 | } 34 | }, 35 | "config": { 36 | "sort-packages": true, 37 | "minimum-stability": "dev", 38 | "prefer-stable": true 39 | }, 40 | "extra": { 41 | "laravel": { 42 | "providers": [ 43 | "Prologuetech\\Big\\BigServiceProvider" 44 | ], 45 | "aliases": { 46 | "Big": "Prologuetech\\Big\\BigServiceProvider" 47 | } 48 | } 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | All notable changes to this project will be documented in this file. 3 | 4 | The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) 5 | and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). 6 | 7 | ## [1.0.1] - 2018-8-16 8 | ### Changed 9 | - Fixed Job Configuration options 10 | 11 | ## [1.0.0] - 2018-4-19 12 | ### Added 13 | - Added nullable/required field modes. 14 | - Added support for Google's ADC. 15 | 16 | ### Changed 17 | - Fix company name typo. 18 | - Fixed `vendor:publish` command. 19 | - Google Cloud SDK 0.49 -> 0.60. 20 | 21 | ### Removed 22 | - Removed old composer requirements. 23 | 24 | ## [0.1.2] - 2018-4-06 25 | ### Added 26 | - Added double support for BQ types. 27 | - Added get max ID/date helpers. 28 | - Added STRUCT support for JSON type fields. 29 | - Added Laravels auto discovery support. 30 | - Added ability to specify a default dataset - config('prologue-big.big.default_dataset'). 31 | 32 | ### Changed 33 | - Google Cloud v0.32.1 -> v0.49. 34 | 35 | ## [0.1.1] - 2017-9-15 36 | ### Added 37 | - Added ```BIG_DEFAULT_DATASET``` env option. 38 | - Added optional delay to ```Big::createFromModel()``` to allow BigQuery time to create. 39 | - Added STRUCT support for JSON type fields. 40 | 41 | ### Changed 42 | - We now use [BigQuery's insertId](https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataconsistency) if we have an ID column. 43 | - Insert now returns true on success. 44 | - We now pass errors onward to implementation. 45 | 46 | ### Fixed 47 | - Fixed TIMESTAMP type. 48 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Google BigQuery for Laravel 2 | 3 | This package aims to wrap laravel functionality around Google's BigQuery. 4 | 5 | ## Install 6 | 7 | Via Composer 8 | 9 | ``` bash 10 | $ composer require prologuetech/big 11 | ``` 12 | 13 | ## Setup 14 | Publish our config file into your application: 15 | 16 | ``` bash 17 | php artisan vendor:publish --provider="Prologuetech\Big\BigServiceProvider" 18 | ``` 19 | 20 | You should have a `config/prologue-big.php` file to configure defaults. 21 | 22 | ### Laravel 5.4.x 23 | Older versions of Laravel require you to add our big service provider to your application providers array in `config/app.php`: 24 | 25 | ``` php 26 | Prologuetech\Big\BigServiceProvider::class, 27 | ``` 28 | 29 | You now have access to a familiar laravel experience, enjoy! 30 | 31 | ## Google Authentication 32 | The Google SDK supports Application Default Credentials (ADC) and thus this package does as well. You may leave your `auth_file` field inside of your config file `null` to use ADC. Credentials fetcher is not currently supported but may be added in the future. 33 | 34 | For more information see the [adc docs](https://cloud.google.com/docs/authentication/production#auth-cloud-implicit-php). 35 | 36 | ## How to use 37 | 38 | ### Configuration 39 | 40 | By default we use the following global config options with BigQuery. 41 | 42 | ```php 43 | $this->options = [ 44 | 'useLegacySql' => false, 45 | 'useQueryCache' => false, 46 | ]; 47 | ``` 48 | 49 | ### Tables 50 | 51 | When creating tables in BQ we automatically flip a Eloquent model schema for you. Let's cover an example of archiving data 52 | from our events table into BQ using laravel's chunk method. 53 | 54 | ```php 55 | $datasetId = 'test'; 56 | $tableId = 'events'; 57 | 58 | // Create our BQ helper 59 | $big = new Big(); 60 | 61 | // Create table, we will pass in a mocked model to mutate into BQ schema 62 | // Note: create table will only make a new table if it does not exist 63 | 64 | /** @var Google\Cloud\BigQuery\Table $table */ 65 | $table = $big->createFromModel($datasetId, $tableId, new Event()); 66 | 67 | // Let's stream our events into BQ in large chunks 68 | // Note: notArchived() is a simple scope, use whatever scopes you have on your model 69 | Event::notArchived()->chunk(1000, function ($events) use ($big, $table) { 70 | // Prepare our rows 71 | $rows = $big->prepareData($events); 72 | 73 | // Stream into BQ, you may also pass in any options with a 3rd param. 74 | // Note: By default we use: 'ignoreUnknownValues' => true 75 | $big->insert($table, $rows); 76 | 77 | // Get our current id's 78 | /** @var Illuminate\Support\Collection $events */ 79 | $ids = $events->pluck('id')->toArray(); 80 | 81 | // Update these event's as processed 82 | Event::whereIn('id', $ids)->update([ 83 | 'system_processed' => 1 84 | ]); 85 | }); 86 | ``` 87 | 88 | That's it! You now have a replica of your events table in BigQuery, enjoy! 89 | 90 | ### Queries 91 | 92 | Instantiating ```Big``` will automatically setup a Google ServiceBuilder and give us direct access to ```BigQuery``` through 93 | our internals via ```$big->query```. However there are many helpers built into Big that make interacting with BigQuery a 94 | piece of cake (or a tasty carrot if you're into that kind of thing). 95 | 96 | For example when running a query on BigQuery we must use the reload method in a loop to poll results. Big comes with a 97 | useful method ```run``` so all you need to do is this: 98 | 99 | ``` php 100 | $query = 'SELECT count(id) FROM test.events'; 101 | 102 | $big = new Big(); 103 | $results = $big->run($query); 104 | ``` 105 | 106 | When using ```run``` we automatically poll BigQuery and return all results as a laravel collection object for you so you 107 | can enjoy your results as a refreshing cup of Laravel. 108 | 109 | ## Change log 110 | 111 | Please see [CHANGELOG](CHANGELOG.md) for more information on what has changed recently. 112 | 113 | ## License 114 | 115 | The MIT License (MIT). Please see [License File](LICENSE.md) for more information. 116 | -------------------------------------------------------------------------------- /src/Big.php: -------------------------------------------------------------------------------- 1 | options = [ 38 | 'useLegacySql' => false, 39 | 'useQueryCache' => false, 40 | ]; 41 | 42 | // Build our Google config options 43 | $config = [ 44 | 'projectId' => config('prologue-big.big.project_id'), 45 | ]; 46 | 47 | // Allow Google's default application credentials if developer chooses 48 | if (!is_null(config('prologue-big.big.auth_file'))) { 49 | $config['keyFilePath'] = config('prologue-big.big.auth_file'); 50 | } 51 | 52 | // Setup google service with credentials 53 | $googleService = new ServiceBuilder($config); 54 | 55 | // Set a default dataset 56 | $this->defaultDataset = config('prologue-big.big.default_dataset'); 57 | 58 | // Return our instance of BigQuery 59 | $this->query = $googleService->bigQuery(); 60 | } 61 | 62 | /** 63 | * Wrap around Google's BigQuery run method and handle results 64 | * 65 | * @param string $query 66 | * @param array|null $options 67 | * 68 | * @return \Illuminate\Support\Collection 69 | */ 70 | public function run($query, $options = null) 71 | { 72 | // Set default options if nothing is passed in 73 | $options = $options ?? $this->options; 74 | 75 | $queryResults = $this->query->runQuery($this->query->query($query, $options)); 76 | 77 | // Setup our result checks 78 | $isComplete = $queryResults->isComplete(); 79 | 80 | while (!$isComplete) { 81 | sleep(.5); // let's wait for a moment... 82 | $queryResults->reload(); // trigger a network request 83 | $isComplete = $queryResults->isComplete(); // check the query's status 84 | } 85 | 86 | // Mutate into a laravel collection 87 | foreach ($queryResults->rows() as $row) { 88 | $data[] = $row; 89 | } 90 | 91 | return collect($data ?? []); 92 | } 93 | 94 | /** 95 | * Wrap around Google's BigQuery insert method 96 | * 97 | * @param Table $table 98 | * @param array $rows 99 | * @param array|null $options 100 | * 101 | * @return bool|array 102 | * @throws \Exception 103 | */ 104 | public function insert($table, $rows, $options = null, $verbose = null) 105 | { 106 | // Set default options if nothing is passed in 107 | $options = $options ?? ['ignoreUnknownValues' => true]; 108 | 109 | $insertResponse = $table->insertRows($rows, $options); 110 | 111 | 112 | if ($insertResponse->isSuccessful() && ! $verbose) { 113 | return true; 114 | } else { 115 | foreach ($insertResponse->failedRows() as $row) { 116 | foreach ($row['errors'] as $error) { 117 | $errors[] = $error; 118 | } 119 | } 120 | // If verbose return affected_rows, info, and any errors 121 | if ($verbose) 122 | { 123 | $errors = $errors ?? []; 124 | 125 | return ['affected_rows' => count($rows) - count($errors), 'errors' => $errors, 'info' => $insertResponse->info()]; 126 | } 127 | else { 128 | return $errors ?? []; 129 | } 130 | } 131 | 132 | 133 | } 134 | 135 | /** 136 | * @param string $tableName 137 | * @param string|null $dataset 138 | * 139 | * @return Table|null 140 | * @throws Exception 141 | */ 142 | public function getTable($tableName, $dataset = null) 143 | { 144 | // Defaults 145 | $dataset = $dataset ?? $this->defaultDataset; 146 | 147 | $tables = $this->query->dataset($dataset)->tables(); 148 | 149 | /** @var Table $table */ 150 | foreach ($tables as $table) { 151 | if ($table->id() == $tableName) { 152 | return $table; 153 | } 154 | } 155 | 156 | return null; 157 | } 158 | 159 | /** 160 | * @param \Illuminate\Database\Eloquent\Collection|\Illuminate\Support\Collection|array $data 161 | * 162 | * @return array 163 | */ 164 | public function prepareData($data) 165 | { 166 | $preparedData = []; 167 | 168 | // We loop our data and handle object conversion to an array 169 | foreach ($data as $item) { 170 | if (!is_array($item)) { 171 | $item = $item->toArray(); 172 | } 173 | 174 | $struct = []; 175 | 176 | // Handle nested array's as STRUCT<> 177 | foreach ($item as $field => $value) { 178 | // Map array's to STRUCT name/type 179 | if (is_array($value)) { 180 | foreach ($value as $key => $attr) { 181 | $struct[] = [ 182 | 'name' => $key, 183 | 'type' => strtoupper(gettype($attr)), 184 | ]; 185 | } 186 | } 187 | } 188 | 189 | // If we have an id column use Google's insertId 190 | // https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataconsistency 191 | if (array_key_exists('id', $item)) { 192 | $rowData = [ 193 | 'insertId' => $item['id'], 194 | 'data' => $item, 195 | 'fields' => $struct, 196 | ]; 197 | } else { 198 | $rowData = ['data' => $item]; 199 | } 200 | 201 | // Set our struct definition if we have one 202 | if (!empty($struct)) { 203 | $rowData['fields'] = $struct; 204 | } 205 | 206 | $preparedData[] = $rowData; 207 | } 208 | 209 | return $preparedData; 210 | } 211 | 212 | /** 213 | * Wrapper function around the BigQuery create_table() function. 214 | * We also have the benefit of mutating a Laravel Eloquent Model into a proper field map for automation 215 | * 216 | * Example: 217 | * $fields = [ 218 | * [ 219 | * 'name' => 'field1', 220 | * 'type' => 'string', 221 | * 'mode' => 'required' 222 | * ], 223 | * [ 224 | * 'name' => 'field2', 225 | * 'type' => 'integer' 226 | * ], 227 | * ]; 228 | * $schema = ['fields' => $fields]; 229 | * create_table($projectId, $datasetId, $tableId, $schema); 230 | * 231 | * @param string $datasetId 232 | * @param string $tableId 233 | * @param Model $model 234 | * @param array|null $structs 235 | * @param bool $useDelay 236 | * 237 | * @throws Exception 238 | * @return Table|null 239 | */ 240 | public function createFromModel($datasetId, $tableId, $model, $structs = null, $useDelay = true) 241 | { 242 | // Check if we have this table 243 | $table = $this->getTable($tableId, $datasetId); 244 | 245 | // If this table has been created, return it 246 | if ($table instanceof Table) { 247 | return $table; 248 | } 249 | 250 | // Generate a new dataset 251 | $dataset = $this->query->dataset($datasetId); 252 | 253 | // Flip our Eloquent model into a BigQuery schema map 254 | $options = ['schema' => static::flipModel($model, $structs)]; 255 | 256 | // Create the table 257 | $table = $dataset->createTable($tableId, $options); 258 | 259 | // New tables are not instantly available, we will insert a delay to help the developer 260 | if ($useDelay) { 261 | sleep(10); 262 | } 263 | 264 | return $table; 265 | } 266 | 267 | /** 268 | * Flip a Laravel Eloquent Models into a Big Query Schemas 269 | * 270 | * @param Model $model 271 | * @param array|null $structs 272 | * 273 | * @throws Exception 274 | * @return array 275 | */ 276 | public static function flipModel($model, $structs) 277 | { 278 | // Verify we have an Eloquent Model 279 | if (!$model instanceof Model) { 280 | throw new Exception(__METHOD__ . ' requires a Eloquent model, ' . get_class($model) . ' used.'); 281 | } 282 | 283 | // Cache name based on table 284 | $cacheName = __CLASS__ . '.cache.' . $model->getTable(); 285 | 286 | // Cache duration 287 | $liveFor = Carbon::now()->addDays(5); 288 | 289 | // Cache our results as these rarely change 290 | $fields = Cache::remember($cacheName, $liveFor, function () use ($model) { 291 | return DB::select('describe ' . $model->getTable()); 292 | }); 293 | 294 | // Loop our fields and return a Google BigQuery field map array 295 | return ['fields' => static::fieldMap($fields, $structs)]; 296 | } 297 | 298 | /** 299 | * Map our fields to BigQuery compatible data types 300 | * 301 | * @param array $fields 302 | * @param array|null $structs 303 | * 304 | * @return array 305 | */ 306 | public static function fieldMap($fields, $structs) 307 | { 308 | // Holders 309 | $map = []; 310 | 311 | // Loop our fields and map them 312 | foreach ($fields as $value) { 313 | // Compute short name for matching type 314 | $shortType = trim(explode('(', $value->Type)[0]); 315 | switch ($shortType) { 316 | // Custom handler 317 | case Types::TIMESTAMP: 318 | $type = 'TIMESTAMP'; 319 | break; 320 | // Custom handler 321 | case Types::INT: 322 | $type = 'INTEGER'; 323 | break; 324 | // Custom handler 325 | case Types::TINYINT: 326 | $type = 'INTEGER'; 327 | break; 328 | case Types::BIGINT: 329 | $type = 'INTEGER'; 330 | break; 331 | case Types::BOOLEAN: 332 | $type = 'BOOLEAN'; 333 | break; 334 | case Types::DATE: 335 | $type = 'DATETIME'; 336 | break; 337 | case Types::DATETIME: 338 | $type = 'DATETIME'; 339 | break; 340 | case Types::DECIMAL: 341 | $type = 'FLOAT'; 342 | break; 343 | case Types::FLOAT: 344 | $type = 'FLOAT'; 345 | break; 346 | case Types::INTEGER: 347 | $type = 'INTEGER'; 348 | break; 349 | case Types::SMALLINT: 350 | $type = 'INTEGER'; 351 | break; 352 | case Types::TIME: 353 | $type = 'TIME'; 354 | break; 355 | case Types::DOUBLE: 356 | $type = 'FLOAT'; 357 | break; 358 | case Types::JSON: 359 | // JSON data-types require a struct to be defined, here we check for developer hints or skip these 360 | if (!empty($structs)) { 361 | $struct = $structs[$value->Field]; 362 | } else { 363 | continue 2; 364 | } 365 | 366 | $type = 'STRUCT'; 367 | 368 | break; 369 | default: 370 | $type = 'STRING'; 371 | break; 372 | } 373 | 374 | // Nullable handler 375 | $mode = (strtolower($value->Null) === 'yes' ? 'NULLABLE' : 'REQUIRED'); 376 | 377 | // Construct our BQ schema data 378 | $fieldData = [ 379 | 'name' => $value->Field, 380 | 'type' => $type, 381 | 'mode' => $mode, 382 | ]; 383 | 384 | // Set our struct definition if we have one 385 | if (!empty($struct)) { 386 | $fieldData['fields'] = $struct; 387 | 388 | unset($struct); 389 | } 390 | 391 | $map[] = $fieldData; 392 | } 393 | 394 | // Return our map 395 | return $map; 396 | } 397 | 398 | /** 399 | * Return the max ID 400 | * 401 | * @param string $table 402 | * @param string|null $dataset 403 | * 404 | * @return mixed 405 | */ 406 | public function getMaxId($table, $dataset = null) 407 | { 408 | // Defaults 409 | $dataset = $dataset ?? $this->defaultDataset; 410 | 411 | // Run our max ID query 412 | $results = $this->run('SELECT max(id) id FROM `' . $dataset . '.' . $table . '`'); 413 | 414 | return $results->first()['id']; 415 | } 416 | 417 | /** 418 | * Return the max created_at date 419 | * 420 | * @param string $table 421 | * @param string|null $dataset 422 | * 423 | * @return mixed 424 | */ 425 | public function getMaxCreationDate($table, $dataset = null) 426 | { 427 | // Defaults 428 | $dataset = $dataset ?? $this->defaultDataset; 429 | 430 | // Run our max created_at query 431 | $results = $this->run('SELECT max(created_at) created_at FROM `' . $dataset . '.' . $table . '`'); 432 | 433 | return $results->first()['created_at']; 434 | } 435 | 436 | /** 437 | * Return the max of field 438 | * 439 | * @param string $table 440 | * @param string $field 441 | * @param string|null $dataset 442 | * 443 | * @return mixed 444 | */ 445 | public function getMaxField($table, $field, $dataset = null) 446 | { 447 | // Defaults 448 | $dataset = $dataset ?? $this->defaultDataset; 449 | 450 | // Run our max query 451 | $results = $this->run('SELECT max(' . $field . ') ' . $field . ' FROM `' . $dataset . '.' . $table . '`'); 452 | 453 | return $results->first()[$field]; 454 | } 455 | } 456 | --------------------------------------------------------------------------------