├── Chapter0 ├── .gitkeep ├── R_SQL-Server_Connect.ipynb └── RxSqlServerData_sqlQuery_quoting_quirks.ipynb ├── README.md ├── RServices_Examples ├── readme ├── Data_Science_with_SQL_Server_2016_asis.ipynb └── Data_Science_with_SQL_Server_2016_could_be.ipynb ├── R_Hospital_LoS.ipynb ├── MSFT_dplyr02_Medicaid.ipynb ├── Data_Science_with_SQL_Server_2016_asis.ipynb └── MSFT_dplyr01.ipynb /Chapter0/.gitkeep: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RforSASUsers 2 | -------------------------------------------------------------------------------- /RServices_Examples/readme: -------------------------------------------------------------------------------- 1 | Empty README File 2 | -------------------------------------------------------------------------------- /R_Hospital_LoS.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "#### Length of Hospital Stay\n", 8 | "\n", 9 | "from:\n", 10 | "https://github.com/Microsoft/r-server-hospital-length-of-stay/blob/master/R/Hospital_Length_Of_Stay_Notebook.ipynb\n" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 1, 16 | "metadata": { 17 | "collapsed": true 18 | }, 19 | "outputs": [], 20 | "source": [ 21 | "# Load packages.\n", 22 | "library(RevoScaleR)\n", 23 | "library(\"MicrosoftML\")" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 2, 29 | "metadata": { 30 | "collapsed": true 31 | }, 32 | "outputs": [], 33 | "source": [ 34 | "db <- \"HospitalR\"" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 3, 40 | "metadata": { 41 | "collapsed": false 42 | }, 43 | "outputs": [], 44 | "source": [ 45 | "#connect_str <- \"Driver={ODBC Driver 13 for SQL Server};Server=TRB_MICROSOFT;\n", 46 | "# Database=AdventureWorks;Trusted_Connection=yes\"" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 4, 52 | "metadata": { 53 | "collapsed": true 54 | }, 55 | "outputs": [], 56 | "source": [ 57 | "# Uses the SQL/Server Native Client 11.0 library DSN\n", 58 | "# This is the same library which the WPS example successfully uses to connect with at:\n", 59 | "# https://notebooks.azure.com/n/c11muvnt15w/notebooks/WPS_SQL_Server.ipynb\n", 60 | "\n", 61 | "connect_str <- \"Driver={SQL Server Native Client 11.0};Server=localhost;\n", 62 | " Database=master;uid=REDMOND\\v-thbeta Pwd=XXXXXXXXX\"" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": null, 68 | "metadata": { 69 | "collapsed": false 70 | }, 71 | "outputs": [], 72 | "source": [ 73 | "outOdbcDS <- RxOdbcData(table=\"NewData\", connectionString = connect_str, useFastRead=TRUE)\n", 74 | "rxOpen(outOdbcDS, \"w\")" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 6, 80 | "metadata": { 81 | "collapsed": false 82 | }, 83 | "outputs": [ 84 | { 85 | "data": { 86 | "text/html": [ 87 | "FALSE" 88 | ], 89 | "text/latex": [ 90 | "FALSE" 91 | ], 92 | "text/markdown": [ 93 | "FALSE" 94 | ], 95 | "text/plain": [ 96 | "[1] FALSE" 97 | ] 98 | }, 99 | "metadata": {}, 100 | "output_type": "display_data" 101 | } 102 | ], 103 | "source": [ 104 | "query <- sprintf( \"if not exists(SELECT * FROM sys.databases WHERE name = '%s') CREATE DATABASE %s;\", db, db)\n", 105 | "\n", 106 | "## Create database. \n", 107 | "rxExecuteSQLDDL(outOdbcDS, sSQLString = query)" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": null, 113 | "metadata": { 114 | "collapsed": false 115 | }, 116 | "outputs": [], 117 | "source": [ 118 | "# Create database. \n", 119 | "rxExecuteSQLDDL(outOdbcDS, sSQLString = query)" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": 9, 125 | "metadata": { 126 | "collapsed": true 127 | }, 128 | "outputs": [], 129 | "source": [ 130 | "# Uses the ODBC Driver 13 for SQL Server User DSN\n", 131 | "connect_str1 <- \"Driver={ODBC Driver 13 for SQL Server};Server=localhost;\n", 132 | " Database=master;uid=REDMOND\\v-thbeta Pwd=Porsche#1\"" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": null, 138 | "metadata": { 139 | "collapsed": false 140 | }, 141 | "outputs": [], 142 | "source": [ 143 | "outOdbcDS <- RxOdbcData(table=\"NewData\", connectionString = connect_str1, useFastRead=TRUE)\n", 144 | "rxOpen(outOdbcDS, \"w\")" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": 3, 150 | "metadata": { 151 | "collapsed": false 152 | }, 153 | "outputs": [ 154 | { 155 | "ename": "ERROR", 156 | "evalue": "Error in paste(\"Driver= {SQL Server Native Client 11.0};Server=\", instance_name, : object 'instance_name' not found\n", 157 | "output_type": "error", 158 | "traceback": [ 159 | "Error in paste(\"Driver= {SQL Server Native Client 11.0};Server=\", instance_name, : object 'instance_name' not found\nTraceback:\n", 160 | "1. paste(\"Driver= {SQL Server Native Client 11.0};Server=\", instance_name, \n . \";Database=\", database_name, \";Trusted_Connection=yes;\", \n . sep = \"\")" 161 | ] 162 | } 163 | ], 164 | "source": [ 165 | "connStr <- paste(\"Driver= {SQL Server Native Client 11.0};Server=\",instance_name, \";Database=\",database_name,\";Trusted_Connection=yes;\",sep=\"\");" 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": null, 171 | "metadata": { 172 | "collapsed": true 173 | }, 174 | "outputs": [], 175 | "source": [] 176 | } 177 | ], 178 | "metadata": { 179 | "kernelspec": { 180 | "display_name": "R 3.3", 181 | "language": "R", 182 | "name": "ir33" 183 | }, 184 | "language_info": { 185 | "codemirror_mode": "r", 186 | "file_extension": ".r", 187 | "mimetype": "text/x-r-source", 188 | "name": "R", 189 | "pygments_lexer": "r", 190 | "version": "3.3.2" 191 | } 192 | }, 193 | "nbformat": 4, 194 | "nbformat_minor": 2 195 | } 196 | -------------------------------------------------------------------------------- /Chapter0/R_SQL-Server_Connect.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "#### Generate a RxSqlServerData data source object\n", 8 | "\n", 9 | "https://msdn.microsoft.com/en-us/microsoft-r/scaler/rxsqlserverdata#example" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "metadata": { 16 | "collapsed": true 17 | }, 18 | "outputs": [], 19 | "source": [ 20 | "library(RevoScaleR)" 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": {}, 26 | "source": [ 27 | "#### using the doc's example for Windows integrated authentication you would build:" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 2, 33 | "metadata": { 34 | "collapsed": false 35 | }, 36 | "outputs": [], 37 | "source": [ 38 | "#connStr\n", 39 | "instance_name <- \"TRB_MICROSOFT\"\n", 40 | "database_name <- \"WideWorldImportersDW\"\n", 41 | "connStr <- paste(\"Driver={SQL Server Native Client 11.0};Server=\",\n", 42 | " instance_name, \";Database=\",database_name,\";Trusted_Connection=true;\",sep=\"\");" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 3, 48 | "metadata": { 49 | "collapsed": false 50 | }, 51 | "outputs": [ 52 | { 53 | "data": { 54 | "text/html": [ 55 | "'Driver={SQL Server Native Client 11.0};Server=TRB_MICROSOFT;Database=WideWorldImportersDW;Trusted_Connection=true;'" 56 | ], 57 | "text/latex": [ 58 | "'Driver=\\{SQL Server Native Client 11.0\\};Server=TRB\\_MICROSOFT;Database=WideWorldImportersDW;Trusted\\_Connection=true;'" 59 | ], 60 | "text/markdown": [ 61 | "'Driver={SQL Server Native Client 11.0};Server=TRB_MICROSOFT;Database=WideWorldImportersDW;Trusted_Connection=true;'" 62 | ], 63 | "text/plain": [ 64 | "[1] \"Driver={SQL Server Native Client 11.0};Server=TRB_MICROSOFT;Database=WideWorldImportersDW;Trusted_Connection=true;\"" 65 | ] 66 | }, 67 | "metadata": {}, 68 | "output_type": "display_data" 69 | } 70 | ], 71 | "source": [ 72 | "connStr" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": 4, 78 | "metadata": { 79 | "collapsed": false 80 | }, 81 | "outputs": [ 82 | { 83 | "name": "stdout", 84 | "output_type": "stream", 85 | "text": [ 86 | "\n", 87 | "[Microsoft][SQL Server Native Client 11.0]Invalid value specified for connection string attribute 'Trusted_Connection'\n", 88 | "\n", 89 | "\n", 90 | "\n", 91 | "[Microsoft][ODBC Driver Manager] Connection not open\n", 92 | "\n", 93 | "\n", 94 | "\n", 95 | "ODBC Error in SQLDisconnect\n" 96 | ] 97 | }, 98 | { 99 | "ename": "ERROR", 100 | "evalue": "Error in doTryCatch(return(expr), name, parentenv, handler): [Microsoft][SQL Server Native Client 11.0]Invalid value specified for connection string attribute 'Trusted_Connection'\n\n\n", 101 | "output_type": "error", 102 | "traceback": [ 103 | "Error in doTryCatch(return(expr), name, parentenv, handler): [Microsoft][SQL Server Native Client 11.0]Invalid value specified for connection string attribute 'Trusted_Connection'\n\n\nTraceback:\n", 104 | "1. rxOpen(outOdbcDS, \"w\")", 105 | "2. rxOpen(outOdbcDS, \"w\")", 106 | "3. rxCall(\"RxOpenDataSource\", params)", 107 | "4. tryCatch(.Call(sym, ..., PACKAGE = PACKAGE), interrupt = function(x) {\n . .C(\"RxUserBreak\", ..., PACKAGE = PACKAGE)\n . stop(\"RevoScaleR function interrupted\", call. = FALSE)\n . })", 108 | "5. tryCatchList(expr, classes, parentenv, handlers)", 109 | "6. tryCatchOne(expr, names, parentenv, handlers[[1L]])", 110 | "7. doTryCatch(return(expr), name, parentenv, handler)" 111 | ] 112 | } 113 | ], 114 | "source": [ 115 | "## Open a connection with SQL Server to be able to write queries with the rxExecuteSQLDDL function.\n", 116 | "\n", 117 | "outOdbcDS <- RxOdbcData(table = \"NewData\", connectionString = connStr, useFastRead=TRUE)\n", 118 | "rxOpen(outOdbcDS, \"w\")" 119 | ] 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "metadata": { 124 | "collapsed": true 125 | }, 126 | "source": [ 127 | "#### the correct syntax for Windows integrated authentication in the example should be 'trusted_Connection=yes'" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": 5, 133 | "metadata": { 134 | "collapsed": true 135 | }, 136 | "outputs": [], 137 | "source": [ 138 | "#connStr\n", 139 | "instance_name <- \"TRB_MICROSOFT\"\n", 140 | "database_name <- \"WideWorldImportersDW\"\n", 141 | "connStr <- paste(\"Driver={SQL Server Native Client 11.0};Server=\",\n", 142 | " instance_name, \";Database=\",database_name,\";Trusted_Connection=yes;\",sep=\"\");" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": 6, 148 | "metadata": { 149 | "collapsed": false 150 | }, 151 | "outputs": [ 152 | { 153 | "data": { 154 | "text/html": [ 155 | "'Driver={SQL Server Native Client 11.0};Server=TRB_MICROSOFT;Database=WideWorldImportersDW;Trusted_Connection=yes;'" 156 | ], 157 | "text/latex": [ 158 | "'Driver=\\{SQL Server Native Client 11.0\\};Server=TRB\\_MICROSOFT;Database=WideWorldImportersDW;Trusted\\_Connection=yes;'" 159 | ], 160 | "text/markdown": [ 161 | "'Driver={SQL Server Native Client 11.0};Server=TRB_MICROSOFT;Database=WideWorldImportersDW;Trusted_Connection=yes;'" 162 | ], 163 | "text/plain": [ 164 | "[1] \"Driver={SQL Server Native Client 11.0};Server=TRB_MICROSOFT;Database=WideWorldImportersDW;Trusted_Connection=yes;\"" 165 | ] 166 | }, 167 | "metadata": {}, 168 | "output_type": "display_data" 169 | } 170 | ], 171 | "source": [ 172 | "connStr" 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": 7, 178 | "metadata": { 179 | "collapsed": false 180 | }, 181 | "outputs": [ 182 | { 183 | "data": { 184 | "text/html": [ 185 | "TRUE" 186 | ], 187 | "text/latex": [ 188 | "TRUE" 189 | ], 190 | "text/markdown": [ 191 | "TRUE" 192 | ], 193 | "text/plain": [ 194 | "[1] TRUE" 195 | ] 196 | }, 197 | "metadata": {}, 198 | "output_type": "display_data" 199 | } 200 | ], 201 | "source": [ 202 | "## Open a connection with SQL Server to be able to write queries with the rxExecuteSQLDDL function.\n", 203 | "\n", 204 | "outOdbcDS <- RxOdbcData(table = \"NewData\", connectionString = connStr, useFastRead=TRUE)\n", 205 | "rxOpen(outOdbcDS, \"w\")" 206 | ] 207 | }, 208 | { 209 | "cell_type": "code", 210 | "execution_count": null, 211 | "metadata": { 212 | "collapsed": true 213 | }, 214 | "outputs": [], 215 | "source": [] 216 | } 217 | ], 218 | "metadata": { 219 | "kernelspec": { 220 | "display_name": "R 3.3", 221 | "language": "R", 222 | "name": "ir33" 223 | }, 224 | "language_info": { 225 | "codemirror_mode": "r", 226 | "file_extension": ".r", 227 | "mimetype": "text/x-r-source", 228 | "name": "R", 229 | "pygments_lexer": "r", 230 | "version": "3.3.2" 231 | } 232 | }, 233 | "nbformat": 4, 234 | "nbformat_minor": 2 235 | } 236 | -------------------------------------------------------------------------------- /MSFT_dplyr02_Medicaid.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "# From Steve Miller's post at\n", 12 | "# http://www.dataversity.net/frequencies-r-part-1/\n" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 13, 18 | "metadata": { 19 | "collapsed": false 20 | }, 21 | "outputs": [ 22 | { 23 | "name": "stderr", 24 | "output_type": "stream", 25 | "text": [ 26 | "Installing package into 'C:/Users/v-thbeta/Documents/R/win-library/3.3'\n", 27 | "(as 'lib' is unspecified)\n" 28 | ] 29 | }, 30 | { 31 | "name": "stdout", 32 | "output_type": "stream", 33 | "text": [ 34 | "package 'dtplyr' successfully unpacked and MD5 sums checked\n", 35 | "\n", 36 | "The downloaded binary packages are in\n", 37 | "\tC:\\Users\\v-thbeta\\AppData\\Local\\Temp\\RtmpeUeyLf\\downloaded_packages\n" 38 | ] 39 | } 40 | ], 41 | "source": [ 42 | "install.packages(\"dtplyr\")" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 11, 48 | "metadata": { 49 | "collapsed": false 50 | }, 51 | "outputs": [ 52 | { 53 | "name": "stderr", 54 | "output_type": "stream", 55 | "text": [ 56 | "Installing package into 'C:/Users/v-thbeta/Documents/R/win-library/3.3'\n", 57 | "(as 'lib' is unspecified)\n" 58 | ] 59 | }, 60 | { 61 | "name": "stdout", 62 | "output_type": "stream", 63 | "text": [ 64 | "package 'readr' successfully unpacked and MD5 sums checked\n", 65 | "\n", 66 | "The downloaded binary packages are in\n", 67 | "\tC:\\Users\\v-thbeta\\AppData\\Local\\Temp\\RtmpeUeyLf\\downloaded_packages\n" 68 | ] 69 | } 70 | ], 71 | "source": [ 72 | "install.packages(\"readr\")" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": 9, 78 | "metadata": { 79 | "collapsed": false 80 | }, 81 | "outputs": [ 82 | { 83 | "name": "stderr", 84 | "output_type": "stream", 85 | "text": [ 86 | "Installing package into 'C:/Users/v-thbeta/Documents/R/win-library/3.3'\n", 87 | "(as 'lib' is unspecified)\n" 88 | ] 89 | }, 90 | { 91 | "name": "stdout", 92 | "output_type": "stream", 93 | "text": [ 94 | "package 'pryr' successfully unpacked and MD5 sums checked\n", 95 | "\n", 96 | "The downloaded binary packages are in\n", 97 | "\tC:\\Users\\v-thbeta\\AppData\\Local\\Temp\\RtmpeUeyLf\\downloaded_packages\n" 98 | ] 99 | } 100 | ], 101 | "source": [ 102 | "install.packages(\"pryr\")" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": 5, 108 | "metadata": { 109 | "collapsed": false 110 | }, 111 | "outputs": [ 112 | { 113 | "name": "stderr", 114 | "output_type": "stream", 115 | "text": [ 116 | "Installing package into 'C:/Users/v-thbeta/Documents/R/win-library/3.3'\n", 117 | "(as 'lib' is unspecified)\n", 118 | "also installing the dependency 'hms'\n", 119 | "\n" 120 | ] 121 | }, 122 | { 123 | "name": "stdout", 124 | "output_type": "stream", 125 | "text": [ 126 | "package 'hms' successfully unpacked and MD5 sums checked\n", 127 | "package 'feather' successfully unpacked and MD5 sums checked\n", 128 | "\n", 129 | "The downloaded binary packages are in\n", 130 | "\tC:\\Users\\v-thbeta\\AppData\\Local\\Temp\\RtmpeUeyLf\\downloaded_packages\n" 131 | ] 132 | } 133 | ], 134 | "source": [ 135 | "install.packages(\"feather\")" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": 3, 141 | "metadata": { 142 | "collapsed": false 143 | }, 144 | "outputs": [ 145 | { 146 | "name": "stderr", 147 | "output_type": "stream", 148 | "text": [ 149 | "Installing package into 'C:/Users/v-thbeta/Documents/R/win-library/3.3'\n", 150 | "(as 'lib' is unspecified)\n", 151 | "also installing the dependency 'chron'\n", 152 | "\n" 153 | ] 154 | }, 155 | { 156 | "name": "stdout", 157 | "output_type": "stream", 158 | "text": [ 159 | "package 'chron' successfully unpacked and MD5 sums checked\n", 160 | "package 'data.table' successfully unpacked and MD5 sums checked\n", 161 | "\n", 162 | "The downloaded binary packages are in\n", 163 | "\tC:\\Users\\v-thbeta\\AppData\\Local\\Temp\\RtmpeUeyLf\\downloaded_packages\n" 164 | ] 165 | } 166 | ], 167 | "source": [ 168 | "install.packages(\"data.table\")" 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": 14, 174 | "metadata": { 175 | "collapsed": false 176 | }, 177 | "outputs": [], 178 | "source": [ 179 | "library(data.table)\n", 180 | "library(stringr)\n", 181 | "library(feather)\n", 182 | "library(data.table)\n", 183 | "suppressMessages(library(pryr))\n", 184 | "suppressMessages(library(readr))\n", 185 | "suppressMessages(library(dtplyr))\n", 186 | "setwd(\"C://Users//v-thbeta//Desktop//examples//data//medicaid\")" 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": 20, 192 | "metadata": { 193 | "collapsed": false 194 | }, 195 | "outputs": [ 196 | { 197 | "ename": "ERROR", 198 | "evalue": "Error in eval(expr, envir, enclos): could not find function \"import\"\n", 199 | "output_type": "error", 200 | "traceback": [ 201 | "Error in eval(expr, envir, enclos): could not find function \"import\"\nTraceback:\n" 202 | ] 203 | } 204 | ], 205 | "source": [ 206 | "import(data.table)" 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": 17, 212 | "metadata": { 213 | "collapsed": false 214 | }, 215 | "outputs": [ 216 | { 217 | "name": "stderr", 218 | "output_type": "stream", 219 | "text": [ 220 | "Warning message:\n", 221 | "\"10612 parsing failures.\n", 222 | " row col expected actual\n", 223 | " 695 X20 no trailing characters .7\n", 224 | "2327 X20 no trailing characters .6\n", 225 | "2758 X20 no trailing characters .5\n", 226 | "3095 X20 no trailing characters .1\n", 227 | "3780 X20 no trailing characters .1\n", 228 | ".... ... ...................... ......\n", 229 | "See problems(...) for more details.\n", 230 | "\"" 231 | ] 232 | }, 233 | { 234 | "ename": "ERROR", 235 | "evalue": "Error in `:=`(year, year): Check that is.data.table(DT) == TRUE. Otherwise, := and `:=`(...) are defined for use in j, once only and in particular ways. See help(\":=\").\n", 236 | "output_type": "error", 237 | "traceback": [ 238 | "Error in `:=`(year, year): Check that is.data.table(DT) == TRUE. Otherwise, := and `:=`(...) are defined for use in j, once only and in particular ways. See help(\":=\").\nTraceback:\n", 239 | "1. rbindlist(lapply(list.files(path = \".\", pattern = \"*.txt\"), mkdata), \n . use.names = TRUE, fill = TRUE) %>% tbl_dt", 240 | "2. eval(lhs, parent, parent)", 241 | "3. eval(expr, envir, enclos)", 242 | "4. rbindlist(lapply(list.files(path = \".\", pattern = \"*.txt\"), mkdata), \n . use.names = TRUE, fill = TRUE)", 243 | "5. lapply(list.files(path = \".\", pattern = \"*.txt\"), mkdata)", 244 | "6. FUN(X[[i]], ...)", 245 | "7. `:=`(year, year) # at line 8 of file ", 246 | "8. stop(\"Check that is.data.table(DT) == TRUE. Otherwise, := and `:=`(...) are defined for use in j, once only and in particular ways. See help(\\\":=\\\").\")" 247 | ] 248 | } 249 | ], 250 | "source": [ 251 | "mkdata <- function(text)\n", 252 | "{\n", 253 | " slug <- strsplit(text,\"[.]\")[[1]][1]\n", 254 | " len <- length(slug)\n", 255 | " suppressMessages(cols <- names(read_tsv(text,n_max=1)))\n", 256 | " suppressMessages(dt <- tbl_dt(read_tsv(text,skip=2,col_names=FALSE)))\n", 257 | " setnames(dt,gsub(\"_\", \"\",tolower(cols)))\n", 258 | " df[,year:=year]\n", 259 | "}\n", 260 | "md_df <-rbindlist(lapply(list.files(path = \".\", pattern=\"*.txt\"), mkdata),\n", 261 | " use.names=TRUE, fill=TRUE) %>% tbl_dt" 262 | ] 263 | }, 264 | { 265 | "cell_type": "code", 266 | "execution_count": 19, 267 | "metadata": { 268 | "collapsed": false 269 | }, 270 | "outputs": [], 271 | "source": [ 272 | "help(\":=\")" 273 | ] 274 | }, 275 | { 276 | "cell_type": "code", 277 | "execution_count": null, 278 | "metadata": { 279 | "collapsed": true 280 | }, 281 | "outputs": [], 282 | "source": [] 283 | } 284 | ], 285 | "metadata": { 286 | "kernelspec": { 287 | "display_name": "R 3.3", 288 | "language": "R", 289 | "name": "ir33" 290 | }, 291 | "language_info": { 292 | "codemirror_mode": "r", 293 | "file_extension": ".r", 294 | "mimetype": "text/x-r-source", 295 | "name": "R", 296 | "pygments_lexer": "r", 297 | "version": "3.3.2" 298 | } 299 | }, 300 | "nbformat": 4, 301 | "nbformat_minor": 2 302 | } 303 | -------------------------------------------------------------------------------- /Chapter0/RxSqlServerData_sqlQuery_quoting_quirks.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "#### Generate a RxSqlServerData data source object\n", 8 | "\n", 9 | "https://msdn.microsoft.com/en-us/microsoft-r/scaler/rxsqlserverdata" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "metadata": { 16 | "collapsed": true 17 | }, 18 | "outputs": [], 19 | "source": [ 20 | "library(RevoScaleR)" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 2, 26 | "metadata": { 27 | "collapsed": false 28 | }, 29 | "outputs": [], 30 | "source": [ 31 | "#connStr\n", 32 | "instance_name <- \"TRB_MICROSOFT\"\n", 33 | "database_name <- \"WideWorldImportersDW\"\n", 34 | "connStr <- paste(\"Driver={SQL Server Native Client 11.0};Server=\",\n", 35 | " instance_name, \";Database=\",database_name,\";Trusted_Connection=yes;\",sep=\"\");" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 3, 41 | "metadata": { 42 | "collapsed": false 43 | }, 44 | "outputs": [ 45 | { 46 | "data": { 47 | "text/html": [ 48 | "TRUE" 49 | ], 50 | "text/latex": [ 51 | "TRUE" 52 | ], 53 | "text/markdown": [ 54 | "TRUE" 55 | ], 56 | "text/plain": [ 57 | "[1] TRUE" 58 | ] 59 | }, 60 | "metadata": {}, 61 | "output_type": "display_data" 62 | } 63 | ], 64 | "source": [ 65 | "## Open a connection with SQL Server to be able to write queries with the rxExecuteSQLDDL function.\n", 66 | "\n", 67 | "outOdbcDS <- RxOdbcData(table = \"NewData\", connectionString = connStr, useFastRead=TRUE)\n", 68 | "rxOpen(outOdbcDS, \"w\")" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": 4, 74 | "metadata": { 75 | "collapsed": false 76 | }, 77 | "outputs": [ 78 | { 79 | "name": "stdout", 80 | "output_type": "stream", 81 | "text": [ 82 | "[Microsoft][SQL Server Native Client 11.0][SQL Server]Incorrect syntax near 'ID'.\n", 83 | "[Microsoft][SQL Server Native Client 11.0][SQL Server]Statement(s) could not be prepared.\n", 84 | "[Microsoft][SQL Server Native Client 11.0][SQL Server]Incorrect syntax near 'ID'.\n", 85 | "[Microsoft][SQL Server Native Client 11.0][SQL Server]Statement(s) could not be prepared.\n" 86 | ] 87 | }, 88 | { 89 | "ename": "ERROR", 90 | "evalue": "Error in doTryCatch(return(expr), name, parentenv, handler): [Microsoft][SQL Server Native Client 11.0][SQL Server]Incorrect syntax near 'ID'.\n[Microsoft][SQL Server Native Client 11.0][SQL Server]Statement(s) could not be prepared.\n\n\n", 91 | "output_type": "error", 92 | "traceback": [ 93 | "Error in doTryCatch(return(expr), name, parentenv, handler): [Microsoft][SQL Server Native Client 11.0][SQL Server]Incorrect syntax near 'ID'.\n[Microsoft][SQL Server Native Client 11.0][SQL Server]Statement(s) could not be prepared.\n\n\nTraceback:\n", 94 | "1. rxGetInfo(SqlQ0, getVarInfo = TRUE)", 95 | "2. rxGetInfoXdfInternal(file = data, getVarInfo = getVarInfo, getBlockSizes = getBlockSizes, \n . getValueLabels = getValueLabels, varsToKeep = varsToKeep, \n . varsToDrop = varsToDrop, startRow = startRow, numRows = numRows, \n . computeInfo = computeInfo, verbose = verbose)", 96 | "3. rxGetVarInfo(data = file, varsToKeep = varsToKeep, varsToDrop = varsToDrop, \n . getValueLabels = getValueLabels, computeInfo = computeInfo)", 97 | "4. rxGetVarInfoXdfInternal(file = data, varsToKeep = varsToKeep, \n . varsToDrop = varsToDrop, getValueLabels = getValueLabels, \n . computeInfo = computeInfo)", 98 | "5. rxGetVarInfoXdfBase(file, varsToKeep = varsToKeep, varsToDrop = varsToDrop, \n . computeInfo = computeInfo)", 99 | "6. rxCall(\"RxDataSourceGetMetaInfo\", list(DataSource = dataSource, \n . VarsToKeep = varsToKeep, VarsToDrop = varsToDrop, computeInfo = computeInfo))", 100 | "7. tryCatch(.Call(sym, ..., PACKAGE = PACKAGE), interrupt = function(x) {\n . .C(\"RxUserBreak\", ..., PACKAGE = PACKAGE)\n . stop(\"RevoScaleR function interrupted\", call. = FALSE)\n . })", 101 | "8. tryCatchList(expr, classes, parentenv, handlers)", 102 | "9. tryCatchOne(expr, names, parentenv, handlers[[1L]])", 103 | "10. doTryCatch(return(expr), name, parentenv, handler)" 104 | ] 105 | } 106 | ], 107 | "source": [ 108 | "# Fails since the column names have embedded blanks\n", 109 | "SqlQ0 <- RxSqlServerData(sqlQuery = \"SELECT WWI Customer ID,\n", 110 | " Customer,\n", 111 | " Category,\n", 112 | " Buying Group,\n", 113 | " Postal Code\n", 114 | " FROM Dimension.Customer\",\n", 115 | " connectionString = connStr)\n", 116 | "rxGetInfo(SqlQ0, getVarInfo = TRUE)" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": 5, 122 | "metadata": { 123 | "collapsed": false 124 | }, 125 | "outputs": [ 126 | { 127 | "data": { 128 | "text/plain": [ 129 | "Connection string: Driver={SQL Server Native Client 11.0};Server=TRB_MICROSOFT;Database=WideWorldImportersDW;Trusted_Connection=yes; \n", 130 | "Data Source: SQLSERVER \n", 131 | "Number of variables: 5 \n", 132 | "Variable information: \n", 133 | "Var 1: column1, Type: character\n", 134 | "Var 2: column2, Type: character\n", 135 | "Var 3: column3, Type: character\n", 136 | "Var 4: column4, Type: character\n", 137 | "Var 5: column5, Type: character" 138 | ] 139 | }, 140 | "metadata": {}, 141 | "output_type": "display_data" 142 | } 143 | ], 144 | "source": [ 145 | "# Undeseriable behavior since single quoted column names fail to fetch the SQL/Server column names\n", 146 | "SqlQ1 <- RxSqlServerData(sqlQuery = \"SELECT 'WWI Customer ID',\n", 147 | " 'Customer',\n", 148 | " 'Category',\n", 149 | " 'Buying Group',\n", 150 | " 'Postal Code'\n", 151 | " FROM Dimension.Customer\",\n", 152 | " connectionString = connStr)\n", 153 | "rxGetInfo(SqlQ1, getVarInfo = TRUE)" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": 6, 159 | "metadata": { 160 | "collapsed": false 161 | }, 162 | "outputs": [ 163 | { 164 | "data": { 165 | "text/plain": [ 166 | "Connection string: Driver={SQL Server Native Client 11.0};Server=TRB_MICROSOFT;Database=WideWorldImportersDW;Trusted_Connection=yes; \n", 167 | "Data Source: SQLSERVER \n", 168 | "Number of variables: 5 \n", 169 | "Variable information: \n", 170 | "Var 1: WWI Customer ID, Type: integer\n", 171 | "Var 2: Customer, Type: character\n", 172 | "Var 3: Category, Type: character\n", 173 | "Var 4: Buying Group, Type: character\n", 174 | "Var 5: Postal Code, Type: character" 175 | ] 176 | }, 177 | "metadata": {}, 178 | "output_type": "display_data" 179 | } 180 | ], 181 | "source": [ 182 | "#desired behavior since double quoted column names fetches the SQL Server column names\n", 183 | "SqlQ2 <- RxSqlServerData(sqlQuery = 'SELECT \"WWI Customer ID\",\n", 184 | " \"Customer\",\n", 185 | " \"Category\",\n", 186 | " \"Buying Group\",\n", 187 | " \"Postal Code\"\n", 188 | " FROM Dimension.Customer',\n", 189 | " connectionString = connStr)\n", 190 | "rxGetInfo(SqlQ2, getVarInfo = TRUE)" 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": null, 196 | "metadata": { 197 | "collapsed": false 198 | }, 199 | "outputs": [], 200 | "source": [] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": null, 205 | "metadata": { 206 | "collapsed": false 207 | }, 208 | "outputs": [], 209 | "source": [ 210 | "rxGetInfo(SqlQ2, getVarInfo = TRUE)" 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": null, 216 | "metadata": { 217 | "collapsed": false 218 | }, 219 | "outputs": [], 220 | "source": [ 221 | "CustDf1 <- rxImport(SqlQ1)" 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": null, 227 | "metadata": { 228 | "collapsed": false 229 | }, 230 | "outputs": [], 231 | "source": [ 232 | "is.data.frame(CustDf1)" 233 | ] 234 | }, 235 | { 236 | "cell_type": "code", 237 | "execution_count": null, 238 | "metadata": { 239 | "collapsed": true 240 | }, 241 | "outputs": [], 242 | "source": [] 243 | } 244 | ], 245 | "metadata": { 246 | "kernelspec": { 247 | "display_name": "R 3.3", 248 | "language": "R", 249 | "name": "ir33" 250 | }, 251 | "language_info": { 252 | "codemirror_mode": "r", 253 | "file_extension": ".r", 254 | "mimetype": "text/x-r-source", 255 | "name": "R", 256 | "pygments_lexer": "r", 257 | "version": "3.3.2" 258 | } 259 | }, 260 | "nbformat": 4, 261 | "nbformat_minor": 2 262 | } 263 | -------------------------------------------------------------------------------- /Data_Science_with_SQL_Server_2016_asis.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "#### Data Science with Microsoft SQL Server 2016\n", 8 | "\n", 9 | "\"file:\\\\\\C:\\Users\\v-thbeta\\Downloads\\9781509304318_Data%20Science%20with%20Microsoft%20SQL%20Server%202016_pdf.pdf\"" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 5, 15 | "metadata": { 16 | "collapsed": true 17 | }, 18 | "outputs": [], 19 | "source": [ 20 | "library(RevoScaleR)" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 6, 26 | "metadata": { 27 | "collapsed": true 28 | }, 29 | "outputs": [], 30 | "source": [ 31 | "#connStr\n", 32 | "instance_name <- \"TRB_MICROSOFT\"\n", 33 | "database_name <- \"WideWorldImportersDW\"\n", 34 | "connStr <- paste(\"Driver={SQL Server Native Client 11.0};Server=\",\n", 35 | " instance_name, \";Database=\",database_name,\";Trusted_Connection=yes;\",sep=\"\");" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 7, 41 | "metadata": { 42 | "collapsed": false 43 | }, 44 | "outputs": [ 45 | { 46 | "data": { 47 | "text/html": [ 48 | "TRUE" 49 | ], 50 | "text/latex": [ 51 | "TRUE" 52 | ], 53 | "text/markdown": [ 54 | "TRUE" 55 | ], 56 | "text/plain": [ 57 | "[1] TRUE" 58 | ] 59 | }, 60 | "metadata": {}, 61 | "output_type": "display_data" 62 | } 63 | ], 64 | "source": [ 65 | "## Open a connection with SQL Server to be able to write queries with the rxExecuteSQLDDL function.\n", 66 | "\n", 67 | "outOdbcDS <- RxOdbcData(table = \"NewData\", connectionString = connStr, useFastRead=TRUE)\n", 68 | "rxOpen(outOdbcDS, \"w\")" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": 8, 74 | "metadata": { 75 | "collapsed": true 76 | }, 77 | "outputs": [], 78 | "source": [ 79 | "# Create a variable to store the data returned from the SQL Server, with the user’s name,\n", 80 | "# a variable for the parameters to pass to the SQL Server,\n", 81 | "# the values you can pass to the RxSQLServerdata constructor\n", 82 | "sqlShareDir <- paste(\"C:\\\\temp\\\\\",Sys.getenv(\"USERNAME\"),sep=\"\")\n", 83 | "sqlWait <- TRUE\n", 84 | "sqlConsoleOutput <- FALSE" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": 9, 90 | "metadata": { 91 | "collapsed": false 92 | }, 93 | "outputs": [ 94 | { 95 | "data": { 96 | "text/html": [ 97 | "'C:\\temp\\v-thbeta'" 98 | ], 99 | "text/latex": [ 100 | "'C:\\textbackslash{}temp\\textbackslash{}v-thbeta'" 101 | ], 102 | "text/markdown": [ 103 | "'C:\\temp\\v-thbeta'" 104 | ], 105 | "text/plain": [ 106 | "[1] \"C:\\\\temp\\\\v-thbeta\"" 107 | ] 108 | }, 109 | "metadata": {}, 110 | "output_type": "display_data" 111 | } 112 | ], 113 | "source": [ 114 | "sqlShareDir" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": 10, 120 | "metadata": { 121 | "collapsed": true 122 | }, 123 | "outputs": [], 124 | "source": [ 125 | "# Now we’ll set the compute context for the data object, using all the variables\n", 126 | "# we just created.\n", 127 | "cc <- RxInSqlServer(connectionString = connStr, shareDir = sqlShareDir, wait = sqlWait, consoleOutput =\n", 128 | "sqlConsoleOutput)" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": 11, 134 | "metadata": { 135 | "collapsed": true 136 | }, 137 | "outputs": [], 138 | "source": [ 139 | "# Next we can set the compute context to point to SQL Server R Services, defined earlier.\n", 140 | "rxSetComputeContext(cc)" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": 12, 146 | "metadata": { 147 | "collapsed": true 148 | }, 149 | "outputs": [], 150 | "source": [ 151 | "# We can then construct the T-SQL query. This one simply brings back three columns.\n", 152 | "sampleDataQuery <- \"select Col1, Col2, Col3 from MyTableName\"" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": 13, 158 | "metadata": { 159 | "collapsed": true 160 | }, 161 | "outputs": [], 162 | "source": [ 163 | "# Finally we run the query, using all of the objects set up in the script.\n", 164 | "# Note that we’re using a colClasses variable to convert the data types to something\n", 165 | "# R understands, since SQL Server has more datatypes than R, and we’re reading 500 rows\n", 166 | "# at a time.\n", 167 | "inDataSource <- RxSqlServerData(sqlQuery = sampleDataQuery, connectionString = connStr,\n", 168 | "colClasses = c(Col1 = \"numeric\", Col2 = \"numeric\", Col3 = \"numeric\"), rowsPerRead=500)" 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": 14, 174 | "metadata": { 175 | "collapsed": false 176 | }, 177 | "outputs": [ 178 | { 179 | "data": { 180 | "text/html": [ 181 | "FALSE" 182 | ], 183 | "text/latex": [ 184 | "FALSE" 185 | ], 186 | "text/markdown": [ 187 | "FALSE" 188 | ], 189 | "text/plain": [ 190 | "[1] FALSE" 191 | ] 192 | }, 193 | "metadata": {}, 194 | "output_type": "display_data" 195 | } 196 | ], 197 | "source": [ 198 | "# If I am a new user I might assume at the end of this exercise, I would have extracted rows and columns from SQL/Server\n", 199 | "# and expected an R dataframe to be returned. They would have guessed wrong.\n", 200 | "is.data.frame(inDataSource)" 201 | ] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "execution_count": 15, 206 | "metadata": { 207 | "collapsed": false 208 | }, 209 | "outputs": [ 210 | { 211 | "name": "stdout", 212 | "output_type": "stream", 213 | "text": [ 214 | "Formal class 'RxSqlServerData' [package \"RevoScaleR\"] with 23 slots\n", 215 | " ..@ inSqlServer : logi(0) \n", 216 | " ..@ computeSqlQueryOnly : logi(0) \n", 217 | " ..@ table : NULL\n", 218 | " ..@ sqlQuery : chr \"select Col1, Col2, Col3 from MyTableName\"\n", 219 | " ..@ useFastRead : logi TRUE\n", 220 | " ..@ trimSpace : logi TRUE\n", 221 | " ..@ server : NULL\n", 222 | " ..@ dbmsName : NULL\n", 223 | " ..@ databaseName : NULL\n", 224 | " ..@ dsn : NULL\n", 225 | " ..@ user : NULL\n", 226 | " ..@ password : NULL\n", 227 | " ..@ connectionString : chr \"Driver={SQL Server Native Client 11.0};Server=TRB_MICROSOFT;Database=WideWorldImportersDW;Trusted_Connection=yes;\"\n", 228 | " ..@ rowBuffering : logi TRUE\n", 229 | " ..@ writeFactorsAsIndexes: logi FALSE\n", 230 | " ..@ isolationLevel : NULL\n", 231 | " ..@ id : \n", 232 | " ..@ colClasses : Named chr [1:3] \"numeric\" \"numeric\" \"numeric\"\n", 233 | " .. ..- attr(*, \"names\")= chr [1:3] \"Col1\" \"Col2\" \"Col3\"\n", 234 | " ..@ colInfo : NULL\n", 235 | " ..@ returnDataFrame : logi TRUE\n", 236 | " ..@ stringsAsFactors : logi FALSE\n", 237 | " ..@ rowsOrBlocksPerRead : int 500\n", 238 | " ..@ compatibilityRequest :Classes 'CompatibilityRequest', 'R6' \n", 239 | " Public:\n", 240 | " assertServerCapability: function (capability, notSupported, notKnown) \n", 241 | " clone: function (deep = FALSE) \n", 242 | " deferredAssertServerCapability: function (capability, notSupported, notKnown) \n", 243 | " getRequestedCapabilities: function () \n", 244 | " initialize: function (server, notSupported = capabilityNotSupported, notKnown = serverNotKnown) \n", 245 | " merge: function (request) \n", 246 | " requestCapability: function (capability) \n", 247 | " runDeferredAssertions: function (server) \n", 248 | " serialize: function (file) \n", 249 | " Private:\n", 250 | " deferredRequests: list\n", 251 | " notKnown: function (server, capability, warningMessage) \n", 252 | " notSupported: function (server, capability, errorMessage) \n", 253 | " requestedCapabilities: \n", 254 | " runCallback: function (type, server, capability, userHandler) \n", 255 | " server: ServerDefinition, AbstractServerDefinition \n" 256 | ] 257 | } 258 | ], 259 | "source": [ 260 | "# For a new user, they need to begin understanding the object that was just created.\n", 261 | "str(inDataSource)" 262 | ] 263 | }, 264 | { 265 | "cell_type": "code", 266 | "execution_count": 16, 267 | "metadata": { 268 | "collapsed": false 269 | }, 270 | "outputs": [ 271 | { 272 | "name": "stdout", 273 | "output_type": "stream", 274 | "text": [ 275 | "[Microsoft][SQL Server Native Client 11.0][SQL Server]Invalid object name 'MyTableName'.\n", 276 | "[Microsoft][SQL Server Native Client 11.0][SQL Server]Statement(s) could not be prepared.\n", 277 | "[Microsoft][SQL Server Native Client 11.0][SQL Server]Invalid object name 'MyTableName'.\n", 278 | "[Microsoft][SQL Server Native Client 11.0][SQL Server]Statement(s) could not be prepared.\n", 279 | "\n", 280 | "Could not open data source.\n" 281 | ] 282 | }, 283 | { 284 | "ename": "ERROR", 285 | "evalue": "Error in doTryCatch(return(expr), name, parentenv, handler): Could not open data source.\n", 286 | "output_type": "error", 287 | "traceback": [ 288 | "Error in doTryCatch(return(expr), name, parentenv, handler): Could not open data source.\nTraceback:\n", 289 | "1. rxImport(inDataSource)", 290 | "2. rxImportBase(inSource = inData, outSource = outFile, rowSelection = rowSelection, \n . transforms = transforms, transformFunc = transformFunc, transformVars = transformVars, \n . transformEnvir = transformEnvir, transformPackages = transformPackages, \n . transformObjects = transformObjects, append = append, overwrite = overwrite, \n . numRows = numRows, reportProgress = reportProgress, verbose = verbose, \n . maxRowsByCols = maxRowsByCols, xdfCompressionLevel = xdfCompressionLevel, \n . createCompositeSet = createCompositeSet, blocksPerCompositeFile = blocksPerCompositeFile)", 291 | "3. rxCall(\"RxImportDataSource\", params)", 292 | "4. tryCatch(.Call(sym, ..., PACKAGE = PACKAGE), interrupt = function(x) {\n . .C(\"RxUserBreak\", ..., PACKAGE = PACKAGE)\n . stop(\"RevoScaleR function interrupted\", call. = FALSE)\n . })", 293 | "5. tryCatchList(expr, classes, parentenv, handlers)", 294 | "6. tryCatchOne(expr, names, parentenv, handlers[[1L]])", 295 | "7. doTryCatch(return(expr), name, parentenv, handler)" 296 | ] 297 | } 298 | ], 299 | "source": [ 300 | "# A new user can't really do anything with the \"inDataSource\" object and since this chapter ends on page 14, a logical guess\n", 301 | "# is to call rxImport on the object. Unfortunately, the user is not warned the example references a non-existent table object. \n", 302 | "# And as part of the overall set-up, they were not told to load 'MyTableName' into SQL/Server or alter the example that maps\n", 303 | "# to an existing table!\n", 304 | "\n", 305 | "df1 <-rxImport(inDataSource)" 306 | ] 307 | }, 308 | { 309 | "cell_type": "code", 310 | "execution_count": null, 311 | "metadata": { 312 | "collapsed": true 313 | }, 314 | "outputs": [], 315 | "source": [] 316 | } 317 | ], 318 | "metadata": { 319 | "kernelspec": { 320 | "display_name": "R 3.3", 321 | "language": "R", 322 | "name": "ir33" 323 | }, 324 | "language_info": { 325 | "codemirror_mode": "r", 326 | "file_extension": ".r", 327 | "mimetype": "text/x-r-source", 328 | "name": "R", 329 | "pygments_lexer": "r", 330 | "version": "3.3.2" 331 | } 332 | }, 333 | "nbformat": 4, 334 | "nbformat_minor": 2 335 | } 336 | -------------------------------------------------------------------------------- /RServices_Examples/Data_Science_with_SQL_Server_2016_asis.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "#### Data Science with Microsoft SQL Server 2016\n", 8 | "\n", 9 | "\"file:\\\\\\C:\\Users\\v-thbeta\\Downloads\\9781509304318_Data%20Science%20with%20Microsoft%20SQL%20Server%202016_pdf.pdf\"" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 5, 15 | "metadata": { 16 | "collapsed": true 17 | }, 18 | "outputs": [], 19 | "source": [ 20 | "library(RevoScaleR)" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 6, 26 | "metadata": { 27 | "collapsed": true 28 | }, 29 | "outputs": [], 30 | "source": [ 31 | "#connStr\n", 32 | "instance_name <- \"TRB_MICROSOFT\"\n", 33 | "database_name <- \"WideWorldImportersDW\"\n", 34 | "connStr <- paste(\"Driver={SQL Server Native Client 11.0};Server=\",\n", 35 | " instance_name, \";Database=\",database_name,\";Trusted_Connection=yes;\",sep=\"\");" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 7, 41 | "metadata": { 42 | "collapsed": false 43 | }, 44 | "outputs": [ 45 | { 46 | "data": { 47 | "text/html": [ 48 | "TRUE" 49 | ], 50 | "text/latex": [ 51 | "TRUE" 52 | ], 53 | "text/markdown": [ 54 | "TRUE" 55 | ], 56 | "text/plain": [ 57 | "[1] TRUE" 58 | ] 59 | }, 60 | "metadata": {}, 61 | "output_type": "display_data" 62 | } 63 | ], 64 | "source": [ 65 | "## Open a connection with SQL Server to be able to write queries with the rxExecuteSQLDDL function.\n", 66 | "\n", 67 | "outOdbcDS <- RxOdbcData(table = \"NewData\", connectionString = connStr, useFastRead=TRUE)\n", 68 | "rxOpen(outOdbcDS, \"w\")" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": 8, 74 | "metadata": { 75 | "collapsed": true 76 | }, 77 | "outputs": [], 78 | "source": [ 79 | "# Create a variable to store the data returned from the SQL Server, with the user’s name,\n", 80 | "# a variable for the parameters to pass to the SQL Server,\n", 81 | "# the values you can pass to the RxSQLServerdata constructor\n", 82 | "sqlShareDir <- paste(\"C:\\\\temp\\\\\",Sys.getenv(\"USERNAME\"),sep=\"\")\n", 83 | "sqlWait <- TRUE\n", 84 | "sqlConsoleOutput <- FALSE" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": 9, 90 | "metadata": { 91 | "collapsed": false 92 | }, 93 | "outputs": [ 94 | { 95 | "data": { 96 | "text/html": [ 97 | "'C:\\temp\\v-thbeta'" 98 | ], 99 | "text/latex": [ 100 | "'C:\\textbackslash{}temp\\textbackslash{}v-thbeta'" 101 | ], 102 | "text/markdown": [ 103 | "'C:\\temp\\v-thbeta'" 104 | ], 105 | "text/plain": [ 106 | "[1] \"C:\\\\temp\\\\v-thbeta\"" 107 | ] 108 | }, 109 | "metadata": {}, 110 | "output_type": "display_data" 111 | } 112 | ], 113 | "source": [ 114 | "sqlShareDir" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": 10, 120 | "metadata": { 121 | "collapsed": true 122 | }, 123 | "outputs": [], 124 | "source": [ 125 | "# Now we’ll set the compute context for the data object, using all the variables\n", 126 | "# we just created.\n", 127 | "cc <- RxInSqlServer(connectionString = connStr, shareDir = sqlShareDir, wait = sqlWait, consoleOutput =\n", 128 | "sqlConsoleOutput)" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": 11, 134 | "metadata": { 135 | "collapsed": true 136 | }, 137 | "outputs": [], 138 | "source": [ 139 | "# Next we can set the compute context to point to SQL Server R Services, defined earlier.\n", 140 | "rxSetComputeContext(cc)" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": 12, 146 | "metadata": { 147 | "collapsed": true 148 | }, 149 | "outputs": [], 150 | "source": [ 151 | "# We can then construct the T-SQL query. This one simply brings back three columns.\n", 152 | "sampleDataQuery <- \"select Col1, Col2, Col3 from MyTableName\"" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": 13, 158 | "metadata": { 159 | "collapsed": true 160 | }, 161 | "outputs": [], 162 | "source": [ 163 | "# Finally we run the query, using all of the objects set up in the script.\n", 164 | "# Note that we’re using a colClasses variable to convert the data types to something\n", 165 | "# R understands, since SQL Server has more datatypes than R, and we’re reading 500 rows\n", 166 | "# at a time.\n", 167 | "inDataSource <- RxSqlServerData(sqlQuery = sampleDataQuery, connectionString = connStr,\n", 168 | "colClasses = c(Col1 = \"numeric\", Col2 = \"numeric\", Col3 = \"numeric\"), rowsPerRead=500)" 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": 14, 174 | "metadata": { 175 | "collapsed": false 176 | }, 177 | "outputs": [ 178 | { 179 | "data": { 180 | "text/html": [ 181 | "FALSE" 182 | ], 183 | "text/latex": [ 184 | "FALSE" 185 | ], 186 | "text/markdown": [ 187 | "FALSE" 188 | ], 189 | "text/plain": [ 190 | "[1] FALSE" 191 | ] 192 | }, 193 | "metadata": {}, 194 | "output_type": "display_data" 195 | } 196 | ], 197 | "source": [ 198 | "# If I am a new user I might assume at the end of this exercise, I would have extracted rows and columns from SQL/Server\n", 199 | "# and expected an R dataframe to be returned. They would have guessed wrong.\n", 200 | "is.data.frame(inDataSource)" 201 | ] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "execution_count": 15, 206 | "metadata": { 207 | "collapsed": false 208 | }, 209 | "outputs": [ 210 | { 211 | "name": "stdout", 212 | "output_type": "stream", 213 | "text": [ 214 | "Formal class 'RxSqlServerData' [package \"RevoScaleR\"] with 23 slots\n", 215 | " ..@ inSqlServer : logi(0) \n", 216 | " ..@ computeSqlQueryOnly : logi(0) \n", 217 | " ..@ table : NULL\n", 218 | " ..@ sqlQuery : chr \"select Col1, Col2, Col3 from MyTableName\"\n", 219 | " ..@ useFastRead : logi TRUE\n", 220 | " ..@ trimSpace : logi TRUE\n", 221 | " ..@ server : NULL\n", 222 | " ..@ dbmsName : NULL\n", 223 | " ..@ databaseName : NULL\n", 224 | " ..@ dsn : NULL\n", 225 | " ..@ user : NULL\n", 226 | " ..@ password : NULL\n", 227 | " ..@ connectionString : chr \"Driver={SQL Server Native Client 11.0};Server=TRB_MICROSOFT;Database=WideWorldImportersDW;Trusted_Connection=yes;\"\n", 228 | " ..@ rowBuffering : logi TRUE\n", 229 | " ..@ writeFactorsAsIndexes: logi FALSE\n", 230 | " ..@ isolationLevel : NULL\n", 231 | " ..@ id : \n", 232 | " ..@ colClasses : Named chr [1:3] \"numeric\" \"numeric\" \"numeric\"\n", 233 | " .. ..- attr(*, \"names\")= chr [1:3] \"Col1\" \"Col2\" \"Col3\"\n", 234 | " ..@ colInfo : NULL\n", 235 | " ..@ returnDataFrame : logi TRUE\n", 236 | " ..@ stringsAsFactors : logi FALSE\n", 237 | " ..@ rowsOrBlocksPerRead : int 500\n", 238 | " ..@ compatibilityRequest :Classes 'CompatibilityRequest', 'R6' \n", 239 | " Public:\n", 240 | " assertServerCapability: function (capability, notSupported, notKnown) \n", 241 | " clone: function (deep = FALSE) \n", 242 | " deferredAssertServerCapability: function (capability, notSupported, notKnown) \n", 243 | " getRequestedCapabilities: function () \n", 244 | " initialize: function (server, notSupported = capabilityNotSupported, notKnown = serverNotKnown) \n", 245 | " merge: function (request) \n", 246 | " requestCapability: function (capability) \n", 247 | " runDeferredAssertions: function (server) \n", 248 | " serialize: function (file) \n", 249 | " Private:\n", 250 | " deferredRequests: list\n", 251 | " notKnown: function (server, capability, warningMessage) \n", 252 | " notSupported: function (server, capability, errorMessage) \n", 253 | " requestedCapabilities: \n", 254 | " runCallback: function (type, server, capability, userHandler) \n", 255 | " server: ServerDefinition, AbstractServerDefinition \n" 256 | ] 257 | } 258 | ], 259 | "source": [ 260 | "# For a new user, they need to begin understanding the object that was just created.\n", 261 | "str(inDataSource)" 262 | ] 263 | }, 264 | { 265 | "cell_type": "code", 266 | "execution_count": 16, 267 | "metadata": { 268 | "collapsed": false 269 | }, 270 | "outputs": [ 271 | { 272 | "name": "stdout", 273 | "output_type": "stream", 274 | "text": [ 275 | "[Microsoft][SQL Server Native Client 11.0][SQL Server]Invalid object name 'MyTableName'.\n", 276 | "[Microsoft][SQL Server Native Client 11.0][SQL Server]Statement(s) could not be prepared.\n", 277 | "[Microsoft][SQL Server Native Client 11.0][SQL Server]Invalid object name 'MyTableName'.\n", 278 | "[Microsoft][SQL Server Native Client 11.0][SQL Server]Statement(s) could not be prepared.\n", 279 | "\n", 280 | "Could not open data source.\n" 281 | ] 282 | }, 283 | { 284 | "ename": "ERROR", 285 | "evalue": "Error in doTryCatch(return(expr), name, parentenv, handler): Could not open data source.\n", 286 | "output_type": "error", 287 | "traceback": [ 288 | "Error in doTryCatch(return(expr), name, parentenv, handler): Could not open data source.\nTraceback:\n", 289 | "1. rxImport(inDataSource)", 290 | "2. rxImportBase(inSource = inData, outSource = outFile, rowSelection = rowSelection, \n . transforms = transforms, transformFunc = transformFunc, transformVars = transformVars, \n . transformEnvir = transformEnvir, transformPackages = transformPackages, \n . transformObjects = transformObjects, append = append, overwrite = overwrite, \n . numRows = numRows, reportProgress = reportProgress, verbose = verbose, \n . maxRowsByCols = maxRowsByCols, xdfCompressionLevel = xdfCompressionLevel, \n . createCompositeSet = createCompositeSet, blocksPerCompositeFile = blocksPerCompositeFile)", 291 | "3. rxCall(\"RxImportDataSource\", params)", 292 | "4. tryCatch(.Call(sym, ..., PACKAGE = PACKAGE), interrupt = function(x) {\n . .C(\"RxUserBreak\", ..., PACKAGE = PACKAGE)\n . stop(\"RevoScaleR function interrupted\", call. = FALSE)\n . })", 293 | "5. tryCatchList(expr, classes, parentenv, handlers)", 294 | "6. tryCatchOne(expr, names, parentenv, handlers[[1L]])", 295 | "7. doTryCatch(return(expr), name, parentenv, handler)" 296 | ] 297 | } 298 | ], 299 | "source": [ 300 | "# A new user can't really do anything with the \"inDataSource\" object and since this chapter ends on page 14, a logical guess\n", 301 | "# is to call rxImport on the object. Unfortunately, the user is not warned the example references a non-existent table object. \n", 302 | "# And as part of the overall set-up, they were not told to load 'MyTableName' into SQL/Server or alter the example that maps\n", 303 | "# to an existing table!\n", 304 | "\n", 305 | "df1 <-rxImport(inDataSource)" 306 | ] 307 | }, 308 | { 309 | "cell_type": "code", 310 | "execution_count": null, 311 | "metadata": { 312 | "collapsed": true 313 | }, 314 | "outputs": [], 315 | "source": [] 316 | } 317 | ], 318 | "metadata": { 319 | "kernelspec": { 320 | "display_name": "R 3.3", 321 | "language": "R", 322 | "name": "ir33" 323 | }, 324 | "language_info": { 325 | "codemirror_mode": "r", 326 | "file_extension": ".r", 327 | "mimetype": "text/x-r-source", 328 | "name": "R", 329 | "pygments_lexer": "r", 330 | "version": "3.3.2" 331 | } 332 | }, 333 | "nbformat": 4, 334 | "nbformat_minor": 2 335 | } 336 | -------------------------------------------------------------------------------- /MSFT_dplyr01.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "#from the:\n", 12 | "http://www.listendata.com/2016/08/dplyr-tutorial.html\n" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": null, 18 | "metadata": { 19 | "collapsed": true 20 | }, 21 | "outputs": [], 22 | "source": [ 23 | "# dplyr package was written by the most popular R programmer Hadley Wickham who has written many \n", 24 | "# useful R packages such as ggplot2, tidyr etc.\n", 25 | "# Two azaming hire for MST would be \n", 26 | "1. Hadley Wickham -> R Fame\n", 27 | "2. Wes McKinney -> python panda Fame \n" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 1, 33 | "metadata": { 34 | "collapsed": false 35 | }, 36 | "outputs": [ 37 | { 38 | "name": "stderr", 39 | "output_type": "stream", 40 | "text": [ 41 | "Installing package into 'C:/Users/v-thbeta/Documents/R/win-library/3.3'\n", 42 | "(as 'lib' is unspecified)\n", 43 | "also installing the dependencies 'assertthat', 'tibble', 'lazyeval', 'DBI', 'BH'\n", 44 | "\n" 45 | ] 46 | }, 47 | { 48 | "name": "stdout", 49 | "output_type": "stream", 50 | "text": [ 51 | "package 'assertthat' successfully unpacked and MD5 sums checked\n", 52 | "package 'tibble' successfully unpacked and MD5 sums checked\n", 53 | "package 'lazyeval' successfully unpacked and MD5 sums checked\n", 54 | "package 'DBI' successfully unpacked and MD5 sums checked\n", 55 | "package 'BH' successfully unpacked and MD5 sums checked\n", 56 | "package 'dplyr' successfully unpacked and MD5 sums checked\n", 57 | "\n", 58 | "The downloaded binary packages are in\n", 59 | "\tC:\\Users\\v-thbeta\\AppData\\Local\\Temp\\RtmpiObDvN\\downloaded_packages\n" 60 | ] 61 | } 62 | ], 63 | "source": [ 64 | "install.packages(\"dplyr\")" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": 2, 70 | "metadata": { 71 | "collapsed": false 72 | }, 73 | "outputs": [ 74 | { 75 | "name": "stderr", 76 | "output_type": "stream", 77 | "text": [ 78 | "\n", 79 | "Attaching package: 'dplyr'\n", 80 | "\n", 81 | "The following objects are masked from 'package:stats':\n", 82 | "\n", 83 | " filter, lag\n", 84 | "\n", 85 | "The following objects are masked from 'package:base':\n", 86 | "\n", 87 | " intersect, setdiff, setequal, union\n", 88 | "\n" 89 | ] 90 | } 91 | ], 92 | "source": [ 93 | "library(dplyr)" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": 6, 99 | "metadata": { 100 | "collapsed": false 101 | }, 102 | "outputs": [], 103 | "source": [ 104 | "df <- read.csv(\"c:\\\\Users\\\\v-thbeta\\\\Desktop\\\\examples\\\\data\\\\states_income\\\\sampledata.csv\")" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": 7, 110 | "metadata": { 111 | "collapsed": false 112 | }, 113 | "outputs": [ 114 | { 115 | "name": "stdout", 116 | "output_type": "stream", 117 | "text": [ 118 | "Observations: 51\n", 119 | "Variables: 16\n", 120 | "$ Index A, A, A, A, C, C, C, D, D, F, G, H, I, I, I, I, K, K, L, M, ...\n", 121 | "$ State Alabama, Alaska, Arizona, Arkansas, California, Colorado, Co...\n", 122 | "$ Y2002 1296530, 1170302, 1742027, 1485531, 1685349, 1343824, 1610512...\n", 123 | "$ Y2003 1317711, 1960378, 1968140, 1994927, 1675807, 1878473, 1232844...\n", 124 | "$ Y2004 1118631, 1818085, 1377583, 1119299, 1889570, 1886149, 1181949...\n", 125 | "$ Y2005 1492583, 1447852, 1782199, 1947979, 1480280, 1236697, 1518933...\n", 126 | "$ Y2006 1107408, 1861639, 1102568, 1669191, 1735069, 1871471, 1841266...\n", 127 | "$ Y2007 1440134, 1465841, 1109382, 1801213, 1812546, 1814218, 1976976...\n", 128 | "$ Y2008 1945229, 1551826, 1752886, 1188104, 1487315, 1875146, 1764457...\n", 129 | "$ Y2009 1944173, 1436541, 1554330, 1628980, 1663809, 1752387, 1972730...\n", 130 | "$ Y2010 1237582, 1629616, 1300521, 1669295, 1624509, 1913275, 1968730...\n", 131 | "$ Y2011 1440756, 1230866, 1130709, 1928238, 1639670, 1665877, 1945524...\n", 132 | "$ Y2012 1186741, 1512804, 1907284, 1216675, 1921845, 1491604, 1228529...\n", 133 | "$ Y2013 1852841, 1985302, 1363279, 1591896, 1156536, 1178355, 1582249...\n", 134 | "$ Y2014 1558906, 1580394, 1525866, 1360959, 1388461, 1383978, 1503156...\n", 135 | "$ Y2015 1916661, 1979143, 1647724, 1329341, 1644607, 1330736, 1718072...\n" 136 | ] 137 | } 138 | ], 139 | "source": [ 140 | "glimpse(df)" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": 8, 146 | "metadata": { 147 | "collapsed": false 148 | }, 149 | "outputs": [ 150 | { 151 | "data": { 152 | "text/html": [ 153 | "
    \n", 154 | "\t
  1. 51
  2. \n", 155 | "\t
  3. 16
  4. \n", 156 | "
\n" 157 | ], 158 | "text/latex": [ 159 | "\\begin{enumerate*}\n", 160 | "\\item 51\n", 161 | "\\item 16\n", 162 | "\\end{enumerate*}\n" 163 | ], 164 | "text/markdown": [ 165 | "1. 51\n", 166 | "2. 16\n", 167 | "\n", 168 | "\n" 169 | ], 170 | "text/plain": [ 171 | "[1] 51 16" 172 | ] 173 | }, 174 | "metadata": {}, 175 | "output_type": "display_data" 176 | } 177 | ], 178 | "source": [ 179 | "dim(df)" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": 9, 185 | "metadata": { 186 | "collapsed": false 187 | }, 188 | "outputs": [ 189 | { 190 | "data": { 191 | "text/html": [ 192 | "\n", 193 | "\n", 194 | "\n", 195 | "\t\n", 196 | "\t\n", 197 | "\t\n", 198 | "\t\n", 199 | "\t\n", 200 | "\n", 201 | "
IndexStateY2002Y2003Y2004Y2005Y2006Y2007Y2008Y2009Y2010Y2011Y2012Y2013Y2014Y2015
6C Colorado 1343824 1878473 1886149 1236697 1871471 1814218 1875146 1752387 1913275 1665877 1491604 1178355 1383978 1330736
32N New Mexico 1819239 1226057 1935991 1124400 1723493 1475985 1237704 1820856 1801430 1653384 1475715 1623388 1533494 1868612
36O Ohio 1802132 1648498 1441386 1670280 1534888 1314824 1516621 1511460 1585465 1887714 1227303 1840898 1880804 1573117
45U Utah 1771096 1195861 1979395 1241662 1437456 1859416 1939284 1915865 1619186 1288285 1108281 1123353 1801019 1729273
41S South Carolina1631522 1803455 1425193 1458191 1538731 1825195 1250499 1864685 1345102 1116203 1532332 1591735 1188417 1110655
\n" 202 | ], 203 | "text/latex": [ 204 | "\\begin{tabular}{r|llllllllllllllll}\n", 205 | " & Index & State & Y2002 & Y2003 & Y2004 & Y2005 & Y2006 & Y2007 & Y2008 & Y2009 & Y2010 & Y2011 & Y2012 & Y2013 & Y2014 & Y2015\\\\\n", 206 | "\\hline\n", 207 | "\t6 & C & Colorado & 1343824 & 1878473 & 1886149 & 1236697 & 1871471 & 1814218 & 1875146 & 1752387 & 1913275 & 1665877 & 1491604 & 1178355 & 1383978 & 1330736 \\\\\n", 208 | "\t32 & N & New Mexico & 1819239 & 1226057 & 1935991 & 1124400 & 1723493 & 1475985 & 1237704 & 1820856 & 1801430 & 1653384 & 1475715 & 1623388 & 1533494 & 1868612 \\\\\n", 209 | "\t36 & O & Ohio & 1802132 & 1648498 & 1441386 & 1670280 & 1534888 & 1314824 & 1516621 & 1511460 & 1585465 & 1887714 & 1227303 & 1840898 & 1880804 & 1573117 \\\\\n", 210 | "\t45 & U & Utah & 1771096 & 1195861 & 1979395 & 1241662 & 1437456 & 1859416 & 1939284 & 1915865 & 1619186 & 1288285 & 1108281 & 1123353 & 1801019 & 1729273 \\\\\n", 211 | "\t41 & S & South Carolina & 1631522 & 1803455 & 1425193 & 1458191 & 1538731 & 1825195 & 1250499 & 1864685 & 1345102 & 1116203 & 1532332 & 1591735 & 1188417 & 1110655 \\\\\n", 212 | "\\end{tabular}\n" 213 | ], 214 | "text/plain": [ 215 | " Index State Y2002 Y2003 Y2004 Y2005 Y2006 Y2007 Y2008 \n", 216 | "6 C Colorado 1343824 1878473 1886149 1236697 1871471 1814218 1875146\n", 217 | "32 N New Mexico 1819239 1226057 1935991 1124400 1723493 1475985 1237704\n", 218 | "36 O Ohio 1802132 1648498 1441386 1670280 1534888 1314824 1516621\n", 219 | "45 U Utah 1771096 1195861 1979395 1241662 1437456 1859416 1939284\n", 220 | "41 S South Carolina 1631522 1803455 1425193 1458191 1538731 1825195 1250499\n", 221 | " Y2009 Y2010 Y2011 Y2012 Y2013 Y2014 Y2015 \n", 222 | "6 1752387 1913275 1665877 1491604 1178355 1383978 1330736\n", 223 | "32 1820856 1801430 1653384 1475715 1623388 1533494 1868612\n", 224 | "36 1511460 1585465 1887714 1227303 1840898 1880804 1573117\n", 225 | "45 1915865 1619186 1288285 1108281 1123353 1801019 1729273\n", 226 | "41 1864685 1345102 1116203 1532332 1591735 1188417 1110655" 227 | ] 228 | }, 229 | "metadata": {}, 230 | "output_type": "display_data" 231 | } 232 | ], 233 | "source": [ 234 | "sample_n(df,5)" 235 | ] 236 | }, 237 | { 238 | "cell_type": "code", 239 | "execution_count": 10, 240 | "metadata": { 241 | "collapsed": false 242 | }, 243 | "outputs": [ 244 | { 245 | "data": { 246 | "text/html": [ 247 | "\n", 248 | "\n", 249 | "\n", 250 | "\t\n", 251 | "\t\n", 252 | "\t\n", 253 | "\t\n", 254 | "\t\n", 255 | "\n", 256 | "
IndexStateY2002Y2003Y2004Y2005Y2006Y2007Y2008Y2009Y2010Y2011Y2012Y2013Y2014Y2015
35N North Dakota 1618807 1510193 1876940 1443172 1425030 1868788 1720352 1671468 1534571 1271132 1430978 1529024 1563898 1604118
34N North Carolina1616742 1292223 1482792 1532347 1158716 1827420 1267737 1116168 1791535 1553750 1472258 1104893 1596452 1229085
18K Kentucky 1813878 1448846 1800760 1250524 1137913 1911227 1301848 1956681 1350895 1512894 1916616 1878271 1722762 1913350
28N Nebraska 1885081 1309769 1425527 1240465 1500594 1278272 1140598 1270585 1128711 1187207 1569665 1690920 1459243 1802211
6C Colorado 1343824 1878473 1886149 1236697 1871471 1814218 1875146 1752387 1913275 1665877 1491604 1178355 1383978 1330736
\n" 257 | ], 258 | "text/latex": [ 259 | "\\begin{tabular}{r|llllllllllllllll}\n", 260 | " & Index & State & Y2002 & Y2003 & Y2004 & Y2005 & Y2006 & Y2007 & Y2008 & Y2009 & Y2010 & Y2011 & Y2012 & Y2013 & Y2014 & Y2015\\\\\n", 261 | "\\hline\n", 262 | "\t35 & N & North Dakota & 1618807 & 1510193 & 1876940 & 1443172 & 1425030 & 1868788 & 1720352 & 1671468 & 1534571 & 1271132 & 1430978 & 1529024 & 1563898 & 1604118 \\\\\n", 263 | "\t34 & N & North Carolina & 1616742 & 1292223 & 1482792 & 1532347 & 1158716 & 1827420 & 1267737 & 1116168 & 1791535 & 1553750 & 1472258 & 1104893 & 1596452 & 1229085 \\\\\n", 264 | "\t18 & K & Kentucky & 1813878 & 1448846 & 1800760 & 1250524 & 1137913 & 1911227 & 1301848 & 1956681 & 1350895 & 1512894 & 1916616 & 1878271 & 1722762 & 1913350 \\\\\n", 265 | "\t28 & N & Nebraska & 1885081 & 1309769 & 1425527 & 1240465 & 1500594 & 1278272 & 1140598 & 1270585 & 1128711 & 1187207 & 1569665 & 1690920 & 1459243 & 1802211 \\\\\n", 266 | "\t6 & C & Colorado & 1343824 & 1878473 & 1886149 & 1236697 & 1871471 & 1814218 & 1875146 & 1752387 & 1913275 & 1665877 & 1491604 & 1178355 & 1383978 & 1330736 \\\\\n", 267 | "\\end{tabular}\n" 268 | ], 269 | "text/plain": [ 270 | " Index State Y2002 Y2003 Y2004 Y2005 Y2006 Y2007 Y2008 \n", 271 | "35 N North Dakota 1618807 1510193 1876940 1443172 1425030 1868788 1720352\n", 272 | "34 N North Carolina 1616742 1292223 1482792 1532347 1158716 1827420 1267737\n", 273 | "18 K Kentucky 1813878 1448846 1800760 1250524 1137913 1911227 1301848\n", 274 | "28 N Nebraska 1885081 1309769 1425527 1240465 1500594 1278272 1140598\n", 275 | "6 C Colorado 1343824 1878473 1886149 1236697 1871471 1814218 1875146\n", 276 | " Y2009 Y2010 Y2011 Y2012 Y2013 Y2014 Y2015 \n", 277 | "35 1671468 1534571 1271132 1430978 1529024 1563898 1604118\n", 278 | "34 1116168 1791535 1553750 1472258 1104893 1596452 1229085\n", 279 | "18 1956681 1350895 1512894 1916616 1878271 1722762 1913350\n", 280 | "28 1270585 1128711 1187207 1569665 1690920 1459243 1802211\n", 281 | "6 1752387 1913275 1665877 1491604 1178355 1383978 1330736" 282 | ] 283 | }, 284 | "metadata": {}, 285 | "output_type": "display_data" 286 | } 287 | ], 288 | "source": [ 289 | "sample_n(df,5)" 290 | ] 291 | }, 292 | { 293 | "cell_type": "code", 294 | "execution_count": 11, 295 | "metadata": { 296 | "collapsed": true 297 | }, 298 | "outputs": [], 299 | "source": [ 300 | "dedup = distinct(df)" 301 | ] 302 | }, 303 | { 304 | "cell_type": "code", 305 | "execution_count": 12, 306 | "metadata": { 307 | "collapsed": false 308 | }, 309 | "outputs": [ 310 | { 311 | "name": "stdout", 312 | "output_type": "stream", 313 | "text": [ 314 | "'data.frame':\t51 obs. of 16 variables:\n", 315 | " $ Index: Factor w/ 19 levels \"A\",\"C\",\"D\",\"F\",..: 1 1 1 1 2 2 2 3 3 4 ...\n", 316 | " $ State: Factor w/ 51 levels \"Alabama\",\"Alaska\",..: 1 2 3 4 5 6 7 8 9 10 ...\n", 317 | " $ Y2002: int 1296530 1170302 1742027 1485531 1685349 1343824 1610512 1330403 1111437 1964626 ...\n", 318 | " $ Y2003: int 1317711 1960378 1968140 1994927 1675807 1878473 1232844 1268673 1993741 1468852 ...\n", 319 | " $ Y2004: int 1118631 1818085 1377583 1119299 1889570 1886149 1181949 1706751 1374643 1419738 ...\n", 320 | " $ Y2005: int 1492583 1447852 1782199 1947979 1480280 1236697 1518933 1403759 1827949 1362787 ...\n", 321 | " $ Y2006: int 1107408 1861639 1102568 1669191 1735069 1871471 1841266 1441351 1803852 1339608 ...\n", 322 | " $ Y2007: int 1440134 1465841 1109382 1801213 1812546 1814218 1976976 1300836 1595981 1278550 ...\n", 323 | " $ Y2008: int 1945229 1551826 1752886 1188104 1487315 1875146 1764457 1762096 1193245 1756185 ...\n", 324 | " $ Y2009: int 1944173 1436541 1554330 1628980 1663809 1752387 1972730 1553585 1739748 1818438 ...\n", 325 | " $ Y2010: int 1237582 1629616 1300521 1669295 1624509 1913275 1968730 1370984 1707823 1198403 ...\n", 326 | " $ Y2011: int 1440756 1230866 1130709 1928238 1639670 1665877 1945524 1318669 1353449 1497051 ...\n", 327 | " $ Y2012: int 1186741 1512804 1907284 1216675 1921845 1491604 1228529 1984027 1979708 1131928 ...\n", 328 | " $ Y2013: int 1852841 1985302 1363279 1591896 1156536 1178355 1582249 1671279 1912654 1107448 ...\n", 329 | " $ Y2014: int 1558906 1580394 1525866 1360959 1388461 1383978 1503156 1803169 1782169 1407784 ...\n", 330 | " $ Y2015: int 1916661 1979143 1647724 1329341 1644607 1330736 1718072 1627508 1410183 1170389 ...\n" 331 | ] 332 | } 333 | ], 334 | "source": [ 335 | "str(dedup)" 336 | ] 337 | }, 338 | { 339 | "cell_type": "code", 340 | "execution_count": 13, 341 | "metadata": { 342 | "collapsed": true 343 | }, 344 | "outputs": [], 345 | "source": [ 346 | "dedup2 = distinct(df, Index, .keep_all=TRUE)" 347 | ] 348 | }, 349 | { 350 | "cell_type": "code", 351 | "execution_count": 14, 352 | "metadata": { 353 | "collapsed": false 354 | }, 355 | "outputs": [ 356 | { 357 | "name": "stdout", 358 | "output_type": "stream", 359 | "text": [ 360 | "'data.frame':\t51 obs. of 16 variables:\n", 361 | " $ Index: Factor w/ 19 levels \"A\",\"C\",\"D\",\"F\",..: 1 1 1 1 2 2 2 3 3 4 ...\n", 362 | " $ State: Factor w/ 51 levels \"Alabama\",\"Alaska\",..: 1 2 3 4 5 6 7 8 9 10 ...\n", 363 | " $ Y2002: int 1296530 1170302 1742027 1485531 1685349 1343824 1610512 1330403 1111437 1964626 ...\n", 364 | " $ Y2003: int 1317711 1960378 1968140 1994927 1675807 1878473 1232844 1268673 1993741 1468852 ...\n", 365 | " $ Y2004: int 1118631 1818085 1377583 1119299 1889570 1886149 1181949 1706751 1374643 1419738 ...\n", 366 | " $ Y2005: int 1492583 1447852 1782199 1947979 1480280 1236697 1518933 1403759 1827949 1362787 ...\n", 367 | " $ Y2006: int 1107408 1861639 1102568 1669191 1735069 1871471 1841266 1441351 1803852 1339608 ...\n", 368 | " $ Y2007: int 1440134 1465841 1109382 1801213 1812546 1814218 1976976 1300836 1595981 1278550 ...\n", 369 | " $ Y2008: int 1945229 1551826 1752886 1188104 1487315 1875146 1764457 1762096 1193245 1756185 ...\n", 370 | " $ Y2009: int 1944173 1436541 1554330 1628980 1663809 1752387 1972730 1553585 1739748 1818438 ...\n", 371 | " $ Y2010: int 1237582 1629616 1300521 1669295 1624509 1913275 1968730 1370984 1707823 1198403 ...\n", 372 | " $ Y2011: int 1440756 1230866 1130709 1928238 1639670 1665877 1945524 1318669 1353449 1497051 ...\n", 373 | " $ Y2012: int 1186741 1512804 1907284 1216675 1921845 1491604 1228529 1984027 1979708 1131928 ...\n", 374 | " $ Y2013: int 1852841 1985302 1363279 1591896 1156536 1178355 1582249 1671279 1912654 1107448 ...\n", 375 | " $ Y2014: int 1558906 1580394 1525866 1360959 1388461 1383978 1503156 1803169 1782169 1407784 ...\n", 376 | " $ Y2015: int 1916661 1979143 1647724 1329341 1644607 1330736 1718072 1627508 1410183 1170389 ...\n" 377 | ] 378 | } 379 | ], 380 | "source": [ 381 | "str(dedup)" 382 | ] 383 | }, 384 | { 385 | "cell_type": "code", 386 | "execution_count": 19, 387 | "metadata": { 388 | "collapsed": false 389 | }, 390 | "outputs": [], 391 | "source": [ 392 | "df2 = select(df, Index, State:Y2008, Y2015)" 393 | ] 394 | }, 395 | { 396 | "cell_type": "code", 397 | "execution_count": null, 398 | "metadata": { 399 | "collapsed": true 400 | }, 401 | "outputs": [], 402 | "source": [ 403 | "# Expecting to see column Y2015 as part of the results-set" 404 | ] 405 | }, 406 | { 407 | "cell_type": "code", 408 | "execution_count": 17, 409 | "metadata": { 410 | "collapsed": false 411 | }, 412 | "outputs": [ 413 | { 414 | "name": "stdout", 415 | "output_type": "stream", 416 | "text": [ 417 | "'data.frame':\t51 obs. of 9 variables:\n", 418 | " $ Index: Factor w/ 19 levels \"A\",\"C\",\"D\",\"F\",..: 1 1 1 1 2 2 2 3 3 4 ...\n", 419 | " $ State: Factor w/ 51 levels \"Alabama\",\"Alaska\",..: 1 2 3 4 5 6 7 8 9 10 ...\n", 420 | " $ Y2002: int 1296530 1170302 1742027 1485531 1685349 1343824 1610512 1330403 1111437 1964626 ...\n", 421 | " $ Y2003: int 1317711 1960378 1968140 1994927 1675807 1878473 1232844 1268673 1993741 1468852 ...\n", 422 | " $ Y2004: int 1118631 1818085 1377583 1119299 1889570 1886149 1181949 1706751 1374643 1419738 ...\n", 423 | " $ Y2005: int 1492583 1447852 1782199 1947979 1480280 1236697 1518933 1403759 1827949 1362787 ...\n", 424 | " $ Y2006: int 1107408 1861639 1102568 1669191 1735069 1871471 1841266 1441351 1803852 1339608 ...\n", 425 | " $ Y2007: int 1440134 1465841 1109382 1801213 1812546 1814218 1976976 1300836 1595981 1278550 ...\n", 426 | " $ Y2008: int 1945229 1551826 1752886 1188104 1487315 1875146 1764457 1762096 1193245 1756185 ...\n" 427 | ] 428 | } 429 | ], 430 | "source": [ 431 | "str(df2)" 432 | ] 433 | }, 434 | { 435 | "cell_type": "code", 436 | "execution_count": null, 437 | "metadata": { 438 | "collapsed": true 439 | }, 440 | "outputs": [], 441 | "source": [ 442 | "# A set of verbs for operations such as applying filter, selecting specific columns, \n", 443 | "# sorting data, adding or deleting columns and aggregating data. " 444 | ] 445 | } 446 | ], 447 | "metadata": { 448 | "kernelspec": { 449 | "display_name": "R 3.3", 450 | "language": "R", 451 | "name": "ir33" 452 | }, 453 | "language_info": { 454 | "codemirror_mode": "r", 455 | "file_extension": ".r", 456 | "mimetype": "text/x-r-source", 457 | "name": "R", 458 | "pygments_lexer": "r", 459 | "version": "3.3.2" 460 | } 461 | }, 462 | "nbformat": 4, 463 | "nbformat_minor": 2 464 | } 465 | -------------------------------------------------------------------------------- /RServices_Examples/Data_Science_with_SQL_Server_2016_could_be.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "#### Data Science with Microsoft SQL Server 2016\n", 8 | "\n", 9 | "\"file:\\\\\\C:\\Users\\v-thbeta\\Downloads\\9781509304318_Data%20Science%20with%20Microsoft%20SQL%20Server%202016_pdf.pdf\"" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "metadata": { 16 | "collapsed": true 17 | }, 18 | "outputs": [], 19 | "source": [ 20 | "library(RevoScaleR)" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 2, 26 | "metadata": { 27 | "collapsed": true 28 | }, 29 | "outputs": [], 30 | "source": [ 31 | "#connStr\n", 32 | "instance_name <- \"TRB_MICROSOFT\"\n", 33 | "database_name <- \"WideWorldImportersDW\"\n", 34 | "connStr <- paste(\"Driver={SQL Server Native Client 11.0};Server=\",\n", 35 | " instance_name, \";Database=\",database_name,\";Trusted_Connection=yes;\",sep=\"\");" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 3, 41 | "metadata": { 42 | "collapsed": false 43 | }, 44 | "outputs": [ 45 | { 46 | "data": { 47 | "text/html": [ 48 | "TRUE" 49 | ], 50 | "text/latex": [ 51 | "TRUE" 52 | ], 53 | "text/markdown": [ 54 | "TRUE" 55 | ], 56 | "text/plain": [ 57 | "[1] TRUE" 58 | ] 59 | }, 60 | "metadata": {}, 61 | "output_type": "display_data" 62 | } 63 | ], 64 | "source": [ 65 | "## Open a connection with SQL Server to be able to write queries with the rxExecuteSQLDDL function.\n", 66 | "\n", 67 | "outOdbcDS <- RxOdbcData(table = \"NewData\", connectionString = connStr, useFastRead=TRUE)\n", 68 | "rxOpen(outOdbcDS, \"w\")" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": 4, 74 | "metadata": { 75 | "collapsed": true 76 | }, 77 | "outputs": [], 78 | "source": [ 79 | "# Create a variable to store the data returned from the SQL Server, with the user’s name,\n", 80 | "# a variable for the parameters to pass to the SQL Server,\n", 81 | "# the values you can pass to the RxSQLServerdata constructor\n", 82 | "sqlShareDir <- paste(\"C:\\\\temp\\\\\",Sys.getenv(\"USERNAME\"),sep=\"\")\n", 83 | "sqlWait <- TRUE\n", 84 | "sqlConsoleOutput <- FALSE" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": 5, 90 | "metadata": { 91 | "collapsed": false 92 | }, 93 | "outputs": [ 94 | { 95 | "data": { 96 | "text/html": [ 97 | "'C:\\temp\\v-thbeta'" 98 | ], 99 | "text/latex": [ 100 | "'C:\\textbackslash{}temp\\textbackslash{}v-thbeta'" 101 | ], 102 | "text/markdown": [ 103 | "'C:\\temp\\v-thbeta'" 104 | ], 105 | "text/plain": [ 106 | "[1] \"C:\\\\temp\\\\v-thbeta\"" 107 | ] 108 | }, 109 | "metadata": {}, 110 | "output_type": "display_data" 111 | } 112 | ], 113 | "source": [ 114 | "sqlShareDir" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": 6, 120 | "metadata": { 121 | "collapsed": true 122 | }, 123 | "outputs": [], 124 | "source": [ 125 | "# Now we’ll set the compute context for the data object, using all the variables\n", 126 | "# we just created.\n", 127 | "cc <- RxInSqlServer(connectionString = connStr, shareDir = sqlShareDir, wait = sqlWait, consoleOutput =\n", 128 | "sqlConsoleOutput)" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": 7, 134 | "metadata": { 135 | "collapsed": true 136 | }, 137 | "outputs": [], 138 | "source": [ 139 | "# Next we can set the compute context to point to SQL Server R Services, defined earlier.\n", 140 | "rxSetComputeContext(cc)" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": 8, 146 | "metadata": { 147 | "collapsed": true 148 | }, 149 | "outputs": [], 150 | "source": [ 151 | "# We can then construct the T-SQL query. This one simply brings back three columns. \n", 152 | "# This example would work if the user happened to have a SQL/Server table named MyTableName with columns \"Col1\", \"Col2\", \"Col3\"\n", 153 | "# The authors assumed that users would be clever enough to realize this is not the case\n", 154 | "#sampleDataQuery <- \"select Col1, Col2, Col3 from MyTableName\"" 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": 9, 160 | "metadata": { 161 | "collapsed": true 162 | }, 163 | "outputs": [], 164 | "source": [ 165 | "# What if there were a canonical database that was used for all the R services examples? Afterall, there is the \n", 166 | "# \"AdventureWorks\" database for previous releases of SQL/Server. And there is the \"WideWorldImportersDW\" database for \n", 167 | "# SQL/Server 2016. Why not just tell users to go fetch this database Microsoft has on github at:\n", 168 | "# https://github.com/Microsoft/sql-server-samples/tree/master/samples/databases/wide-world-importers" 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": 10, 174 | "metadata": { 175 | "collapsed": true 176 | }, 177 | "outputs": [], 178 | "source": [ 179 | "# After we told the user to load the \"WideWorldImportersDW\" database, we can construct queries with a known database\n", 180 | "sampleDataQuery <- \"select [Customer Key], [Total Excluding Tax], [Tax Amount] FROM [WideWorldImportersDW].[Fact].[Transaction]\"" 181 | ] 182 | }, 183 | { 184 | "cell_type": "code", 185 | "execution_count": 11, 186 | "metadata": { 187 | "collapsed": false 188 | }, 189 | "outputs": [], 190 | "source": [ 191 | "# Finally we run the query, using all of the objects set up in the script.\n", 192 | "# Note that we’re using a colClasses variable to convert the data types to something\n", 193 | "# R understands, since SQL Server has more datatypes than R, and we’re reading 500 rows\n", 194 | "# at a time.\n", 195 | "inDataSource <- RxSqlServerData(sqlQuery = sampleDataQuery, connectionString = connStr,\n", 196 | "colClasses = c(Col1 = \"numeric\", Col2 = \"numeric\", Col3 = \"numeric\"), rowsPerRead=500)" 197 | ] 198 | }, 199 | { 200 | "cell_type": "code", 201 | "execution_count": 12, 202 | "metadata": { 203 | "collapsed": false 204 | }, 205 | "outputs": [ 206 | { 207 | "data": { 208 | "text/html": [ 209 | "FALSE" 210 | ], 211 | "text/latex": [ 212 | "FALSE" 213 | ], 214 | "text/markdown": [ 215 | "FALSE" 216 | ], 217 | "text/plain": [ 218 | "[1] FALSE" 219 | ] 220 | }, 221 | "metadata": {}, 222 | "output_type": "display_data" 223 | } 224 | ], 225 | "source": [ 226 | "# If I am a new user I might assume at the end of this exercise, I would have extracted rows and columns from SQL/Server\n", 227 | "# and expected an R dataframe to be returned. They would have guessed wrong.\n", 228 | "is.data.frame(inDataSource)" 229 | ] 230 | }, 231 | { 232 | "cell_type": "code", 233 | "execution_count": 13, 234 | "metadata": { 235 | "collapsed": false 236 | }, 237 | "outputs": [ 238 | { 239 | "name": "stdout", 240 | "output_type": "stream", 241 | "text": [ 242 | "Formal class 'RxSqlServerData' [package \"RevoScaleR\"] with 23 slots\n", 243 | " ..@ inSqlServer : logi(0) \n", 244 | " ..@ computeSqlQueryOnly : logi(0) \n", 245 | " ..@ table : NULL\n", 246 | " ..@ sqlQuery : chr \"select [Customer Key], [Total Excluding Tax], [Tax Amount] FROM [WideWorldImportersDW].[Fact].[Transaction]\"\n", 247 | " ..@ useFastRead : logi TRUE\n", 248 | " ..@ trimSpace : logi TRUE\n", 249 | " ..@ server : NULL\n", 250 | " ..@ dbmsName : NULL\n", 251 | " ..@ databaseName : NULL\n", 252 | " ..@ dsn : NULL\n", 253 | " ..@ user : NULL\n", 254 | " ..@ password : NULL\n", 255 | " ..@ connectionString : chr \"Driver={SQL Server Native Client 11.0};Server=TRB_MICROSOFT;Database=WideWorldImportersDW;Trusted_Connection=yes;\"\n", 256 | " ..@ rowBuffering : logi TRUE\n", 257 | " ..@ writeFactorsAsIndexes: logi FALSE\n", 258 | " ..@ isolationLevel : NULL\n", 259 | " ..@ id : \n", 260 | " ..@ colClasses : Named chr [1:3] \"numeric\" \"numeric\" \"numeric\"\n", 261 | " .. ..- attr(*, \"names\")= chr [1:3] \"Col1\" \"Col2\" \"Col3\"\n", 262 | " ..@ colInfo : NULL\n", 263 | " ..@ returnDataFrame : logi TRUE\n", 264 | " ..@ stringsAsFactors : logi FALSE\n", 265 | " ..@ rowsOrBlocksPerRead : int 500\n", 266 | " ..@ compatibilityRequest :Classes 'CompatibilityRequest', 'R6' \n", 267 | " Public:\n", 268 | " assertServerCapability: function (capability, notSupported, notKnown) \n", 269 | " clone: function (deep = FALSE) \n", 270 | " deferredAssertServerCapability: function (capability, notSupported, notKnown) \n", 271 | " getRequestedCapabilities: function () \n", 272 | " initialize: function (server, notSupported = capabilityNotSupported, notKnown = serverNotKnown) \n", 273 | " merge: function (request) \n", 274 | " requestCapability: function (capability) \n", 275 | " runDeferredAssertions: function (server) \n", 276 | " serialize: function (file) \n", 277 | " Private:\n", 278 | " deferredRequests: list\n", 279 | " notKnown: function (server, capability, warningMessage) \n", 280 | " notSupported: function (server, capability, errorMessage) \n", 281 | " requestedCapabilities: \n", 282 | " runCallback: function (type, server, capability, userHandler) \n", 283 | " server: ServerDefinition, AbstractServerDefinition \n" 284 | ] 285 | } 286 | ], 287 | "source": [ 288 | "# For a new user, they need to begin understanding the object that was just created.\n", 289 | "str(inDataSource)" 290 | ] 291 | }, 292 | { 293 | "cell_type": "code", 294 | "execution_count": 14, 295 | "metadata": { 296 | "collapsed": false 297 | }, 298 | "outputs": [ 299 | { 300 | "name": "stdout", 301 | "output_type": "stream", 302 | "text": [ 303 | "Rows Read: 500, Total Rows Processed: 500, Total Chunk Time: 0.035 seconds\n", 304 | "Rows Read: 500, Total Rows Processed: 1000, Total Chunk Time: 0.001 seconds\n", 305 | "Rows Read: 500, Total Rows Processed: 1500, Total Chunk Time: 0.001 seconds\n", 306 | "Rows Read: 500, Total Rows Processed: 2000, Total Chunk Time: Less than .001 seconds\n", 307 | "Rows Read: 500, Total Rows Processed: 2500, Total Chunk Time: 0.001 seconds\n", 308 | "Rows Read: 500, Total Rows Processed: 3000, Total Chunk Time: Less than .001 seconds\n", 309 | "Rows Read: 500, Total Rows Processed: 3500, Total Chunk Time: 0.001 seconds\n", 310 | "Rows Read: 500, Total Rows Processed: 4000, Total Chunk Time: 0.002 seconds\n", 311 | "Rows Read: 500, Total Rows Processed: 4500, Total Chunk Time: 0.001 seconds\n", 312 | "Rows Read: 500, Total Rows Processed: 5000, Total Chunk Time: Less than .001 seconds\n", 313 | "Rows Read: 500, Total Rows Processed: 5500, Total Chunk Time: 0.002 seconds\n", 314 | "Rows Read: 500, Total Rows Processed: 6000, Total Chunk Time: Less than .001 seconds\n", 315 | "Rows Read: 500, Total Rows Processed: 6500, Total Chunk Time: 0.001 seconds\n", 316 | "Rows Read: 500, Total Rows Processed: 7000, Total Chunk Time: 0.001 seconds\n", 317 | "Rows Read: 500, Total Rows Processed: 7500, Total Chunk Time: Less than .001 seconds\n", 318 | "Rows Read: 500, Total Rows Processed: 8000, Total Chunk Time: 0.002 seconds\n", 319 | "Rows Read: 500, Total Rows Processed: 8500, Total Chunk Time: Less than .001 seconds\n", 320 | "Rows Read: 500, Total Rows Processed: 9000, Total Chunk Time: 0.001 seconds\n", 321 | "Rows Read: 500, Total Rows Processed: 9500, Total Chunk Time: 0.001 seconds\n", 322 | "Rows Read: 500, Total Rows Processed: 10000, Total Chunk Time: Less than .001 seconds\n", 323 | "Rows Read: 500, Total Rows Processed: 10500, Total Chunk Time: 0.002 seconds\n", 324 | "Rows Read: 500, Total Rows Processed: 11000, Total Chunk Time: Less than .001 seconds\n", 325 | "Rows Read: 500, Total Rows Processed: 11500, Total Chunk Time: 0.001 seconds\n", 326 | "Rows Read: 500, Total Rows Processed: 12000, Total Chunk Time: 0.001 seconds\n", 327 | "Rows Read: 500, Total Rows Processed: 12500, Total Chunk Time: Less than .001 seconds\n", 328 | "Rows Read: 500, Total Rows Processed: 13000, Total Chunk Time: 0.001 seconds\n", 329 | "Rows Read: 500, Total Rows Processed: 13500, Total Chunk Time: 0.001 seconds\n", 330 | "Rows Read: 500, Total Rows Processed: 14000, Total Chunk Time: Less than .001 seconds\n", 331 | "Rows Read: 500, Total Rows Processed: 14500, Total Chunk Time: 0.001 seconds\n", 332 | "Rows Read: 500, Total Rows Processed: 15000, Total Chunk Time: 0.001 seconds\n", 333 | "Rows Read: 500, Total Rows Processed: 15500, Total Chunk Time: Less than .001 seconds\n", 334 | "Rows Read: 500, Total Rows Processed: 16000, Total Chunk Time: 0.002 seconds\n", 335 | "Rows Read: 500, Total Rows Processed: 16500, Total Chunk Time: Less than .001 seconds\n", 336 | "Rows Read: 500, Total Rows Processed: 17000, Total Chunk Time: 0.001 seconds\n", 337 | "Rows Read: 500, Total Rows Processed: 17500, Total Chunk Time: Less than .001 seconds\n", 338 | "Rows Read: 500, Total Rows Processed: 18000, Total Chunk Time: 0.001 seconds\n", 339 | "Rows Read: 500, Total Rows Processed: 18500, Total Chunk Time: 0.001 seconds\n", 340 | "Rows Read: 500, Total Rows Processed: 19000, Total Chunk Time: Less than .001 seconds\n", 341 | "Rows Read: 500, Total Rows Processed: 19500, Total Chunk Time: 0.001 seconds\n", 342 | "Rows Read: 500, Total Rows Processed: 20000, Total Chunk Time: 0.001 seconds\n", 343 | "Rows Read: 500, Total Rows Processed: 20500, Total Chunk Time: Less than .001 seconds\n", 344 | "Rows Read: 500, Total Rows Processed: 21000, Total Chunk Time: 0.001 seconds\n", 345 | "Rows Read: 500, Total Rows Processed: 21500, Total Chunk Time: 0.002 seconds\n", 346 | "Rows Read: 500, Total Rows Processed: 22000, Total Chunk Time: 0.001 seconds\n", 347 | "Rows Read: 500, Total Rows Processed: 22500, Total Chunk Time: 0.001 seconds\n", 348 | "Rows Read: 500, Total Rows Processed: 23000, Total Chunk Time: Less than .001 seconds\n", 349 | "Rows Read: 500, Total Rows Processed: 23500, Total Chunk Time: 0.001 seconds\n", 350 | "Rows Read: 500, Total Rows Processed: 24000, Total Chunk Time: 0.001 seconds\n", 351 | "Rows Read: 500, Total Rows Processed: 24500, Total Chunk Time: Less than .001 seconds\n", 352 | "Rows Read: 500, Total Rows Processed: 25000, Total Chunk Time: 0.001 seconds\n", 353 | "Rows Read: 500, Total Rows Processed: 25500, Total Chunk Time: 0.002 seconds\n", 354 | "Rows Read: 500, Total Rows Processed: 26000, Total Chunk Time: 0.001 seconds\n", 355 | "Rows Read: 500, Total Rows Processed: 26500, Total Chunk Time: 0.001 seconds\n", 356 | "Rows Read: 500, Total Rows Processed: 27000, Total Chunk Time: Less than .001 seconds\n", 357 | "Rows Read: 500, Total Rows Processed: 27500, Total Chunk Time: 0.001 seconds\n", 358 | "Rows Read: 500, Total Rows Processed: 28000, Total Chunk Time: 0.001 seconds\n", 359 | "Rows Read: 500, Total Rows Processed: 28500, Total Chunk Time: Less than .001 seconds\n", 360 | "Rows Read: 500, Total Rows Processed: 29000, Total Chunk Time: 0.001 seconds\n", 361 | "Rows Read: 500, Total Rows Processed: 29500, Total Chunk Time: 0.001 seconds\n", 362 | "Rows Read: 500, Total Rows Processed: 30000, Total Chunk Time: Less than .001 seconds\n", 363 | "Rows Read: 500, Total Rows Processed: 30500, Total Chunk Time: 0.001 seconds\n", 364 | "Rows Read: 500, Total Rows Processed: 31000, Total Chunk Time: 0.002 seconds\n", 365 | "Rows Read: 500, Total Rows Processed: 31500, Total Chunk Time: 0.001 seconds\n", 366 | "Rows Read: 500, Total Rows Processed: 32000, Total Chunk Time: Less than .001 seconds\n", 367 | "Rows Read: 500, Total Rows Processed: 32500, Total Chunk Time: 0.001 seconds\n", 368 | "Rows Read: 500, Total Rows Processed: 33000, Total Chunk Time: 0.001 seconds\n", 369 | "Rows Read: 500, Total Rows Processed: 33500, Total Chunk Time: Less than .001 seconds\n", 370 | "Rows Read: 500, Total Rows Processed: 34000, Total Chunk Time: 0.002 seconds\n", 371 | "Rows Read: 500, Total Rows Processed: 34500, Total Chunk Time: Less than .001 seconds\n", 372 | "Rows Read: 500, Total Rows Processed: 35000, Total Chunk Time: 0.001 seconds\n", 373 | "Rows Read: 500, Total Rows Processed: 35500, Total Chunk Time: 0.001 seconds\n", 374 | "Rows Read: 500, Total Rows Processed: 36000, Total Chunk Time: Less than .001 seconds\n", 375 | "Rows Read: 500, Total Rows Processed: 36500, Total Chunk Time: 0.001 seconds\n", 376 | "Rows Read: 500, Total Rows Processed: 37000, Total Chunk Time: 0.001 seconds\n", 377 | "Rows Read: 500, Total Rows Processed: 37500, Total Chunk Time: Less than .001 seconds\n", 378 | "Rows Read: 500, Total Rows Processed: 38000, Total Chunk Time: 0.001 seconds\n", 379 | "Rows Read: 500, Total Rows Processed: 38500, Total Chunk Time: 0.002 seconds\n", 380 | "Rows Read: 500, Total Rows Processed: 39000, Total Chunk Time: 0.001 seconds\n", 381 | "Rows Read: 500, Total Rows Processed: 39500, Total Chunk Time: 0.001 seconds\n", 382 | "Rows Read: 500, Total Rows Processed: 40000, Total Chunk Time: Less than .001 seconds\n", 383 | "Rows Read: 500, Total Rows Processed: 40500, Total Chunk Time: 0.001 seconds\n", 384 | "Rows Read: 500, Total Rows Processed: 41000, Total Chunk Time: 0.001 seconds\n", 385 | "Rows Read: 500, Total Rows Processed: 41500, Total Chunk Time: Less than .001 seconds\n", 386 | "Rows Read: 500, Total Rows Processed: 42000, Total Chunk Time: 0.001 seconds\n", 387 | "Rows Read: 500, Total Rows Processed: 42500, Total Chunk Time: 0.001 seconds\n", 388 | "Rows Read: 500, Total Rows Processed: 43000, Total Chunk Time: 0.002 seconds\n", 389 | "Rows Read: 500, Total Rows Processed: 43500, Total Chunk Time: 0.001 seconds\n", 390 | "Rows Read: 500, Total Rows Processed: 44000, Total Chunk Time: Less than .001 seconds\n", 391 | "Rows Read: 500, Total Rows Processed: 44500, Total Chunk Time: 0.001 seconds\n", 392 | "Rows Read: 500, Total Rows Processed: 45000, Total Chunk Time: 0.001 seconds\n", 393 | "Rows Read: 500, Total Rows Processed: 45500, Total Chunk Time: Less than .001 seconds\n", 394 | "Rows Read: 500, Total Rows Processed: 46000, Total Chunk Time: 0.001 seconds\n", 395 | "Rows Read: 500, Total Rows Processed: 46500, Total Chunk Time: Less than .001 seconds\n", 396 | "Rows Read: 500, Total Rows Processed: 47000, Total Chunk Time: 0.001 seconds\n", 397 | "Rows Read: 500, Total Rows Processed: 47500, Total Chunk Time: 0.001 seconds\n", 398 | "Rows Read: 500, Total Rows Processed: 48000, Total Chunk Time: Less than .001 seconds\n", 399 | "Rows Read: 500, Total Rows Processed: 48500, Total Chunk Time: 0.001 seconds\n", 400 | "Rows Read: 500, Total Rows Processed: 49000, Total Chunk Time: 0.001 seconds\n", 401 | "Rows Read: 500, Total Rows Processed: 49500, Total Chunk Time: Less than .001 seconds\n", 402 | "Rows Read: 500, Total Rows Processed: 50000, Total Chunk Time: 0.001 seconds\n", 403 | "Rows Read: 500, Total Rows Processed: 50500, Total Chunk Time: 0.001 seconds\n", 404 | "Rows Read: 500, Total Rows Processed: 51000, Total Chunk Time: Less than .001 seconds\n", 405 | "Rows Read: 500, Total Rows Processed: 51500, Total Chunk Time: 0.002 seconds\n", 406 | "Rows Read: 500, Total Rows Processed: 52000, Total Chunk Time: Less than .001 seconds\n", 407 | "Rows Read: 500, Total Rows Processed: 52500, Total Chunk Time: 0.001 seconds\n", 408 | "Rows Read: 500, Total Rows Processed: 53000, Total Chunk Time: 0.001 seconds\n", 409 | "Rows Read: 500, Total Rows Processed: 53500, Total Chunk Time: Less than .001 seconds\n", 410 | "Rows Read: 500, Total Rows Processed: 54000, Total Chunk Time: 0.001 seconds\n", 411 | "Rows Read: 500, Total Rows Processed: 54500, Total Chunk Time: 0.001 seconds\n", 412 | "Rows Read: 500, Total Rows Processed: 55000, Total Chunk Time: Less than .001 seconds\n", 413 | "Rows Read: 500, Total Rows Processed: 55500, Total Chunk Time: 0.001 seconds\n", 414 | "Rows Read: 500, Total Rows Processed: 56000, Total Chunk Time: 0.001 seconds\n", 415 | "Rows Read: 500, Total Rows Processed: 56500, Total Chunk Time: Less than .001 seconds\n", 416 | "Rows Read: 500, Total Rows Processed: 57000, Total Chunk Time: 0.001 seconds\n", 417 | "Rows Read: 500, Total Rows Processed: 57500, Total Chunk Time: 0.001 seconds\n", 418 | "Rows Read: 500, Total Rows Processed: 58000, Total Chunk Time: Less than .001 seconds\n", 419 | "Rows Read: 500, Total Rows Processed: 58500, Total Chunk Time: 0.001 seconds\n", 420 | "Rows Read: 500, Total Rows Processed: 59000, Total Chunk Time: 0.001 seconds\n", 421 | "Rows Read: 500, Total Rows Processed: 59500, Total Chunk Time: 0.002 seconds\n", 422 | "Rows Read: 500, Total Rows Processed: 60000, Total Chunk Time: Less than .001 seconds\n", 423 | "Rows Read: 500, Total Rows Processed: 60500, Total Chunk Time: 0.001 seconds\n", 424 | "Rows Read: 500, Total Rows Processed: 61000, Total Chunk Time: 0.001 seconds\n", 425 | "Rows Read: 500, Total Rows Processed: 61500, Total Chunk Time: Less than .001 seconds\n", 426 | "Rows Read: 500, Total Rows Processed: 62000, Total Chunk Time: 0.001 seconds\n", 427 | "Rows Read: 500, Total Rows Processed: 62500, Total Chunk Time: 0.001 seconds\n", 428 | "Rows Read: 500, Total Rows Processed: 63000, Total Chunk Time: Less than .001 seconds\n", 429 | "Rows Read: 500, Total Rows Processed: 63500, Total Chunk Time: 0.001 seconds\n", 430 | "Rows Read: 500, Total Rows Processed: 64000, Total Chunk Time: 0.001 seconds\n", 431 | "Rows Read: 500, Total Rows Processed: 64500, Total Chunk Time: Less than .001 seconds\n", 432 | "Rows Read: 500, Total Rows Processed: 65000, Total Chunk Time: 0.001 seconds\n", 433 | "Rows Read: 500, Total Rows Processed: 65500, Total Chunk Time: 0.001 seconds\n", 434 | "Rows Read: 500, Total Rows Processed: 66000, Total Chunk Time: Less than .001 seconds\n", 435 | "Rows Read: 500, Total Rows Processed: 66500, Total Chunk Time: 0.001 seconds\n", 436 | "Rows Read: 500, Total Rows Processed: 67000, Total Chunk Time: 0.002 seconds\n", 437 | "Rows Read: 500, Total Rows Processed: 67500, Total Chunk Time: 0.001 seconds\n", 438 | "Rows Read: 500, Total Rows Processed: 68000, Total Chunk Time: 0.001 seconds\n", 439 | "Rows Read: 500, Total Rows Processed: 68500, Total Chunk Time: Less than .001 seconds\n", 440 | "Rows Read: 500, Total Rows Processed: 69000, Total Chunk Time: 0.001 seconds\n", 441 | "Rows Read: 500, Total Rows Processed: 69500, Total Chunk Time: 0.001 seconds\n", 442 | "Rows Read: 500, Total Rows Processed: 70000, Total Chunk Time: Less than .001 seconds\n", 443 | "Rows Read: 500, Total Rows Processed: 70500, Total Chunk Time: 0.001 seconds\n", 444 | "Rows Read: 500, Total Rows Processed: 71000, Total Chunk Time: 0.001 seconds\n", 445 | "Rows Read: 500, Total Rows Processed: 71500, Total Chunk Time: Less than .001 seconds\n", 446 | "Rows Read: 500, Total Rows Processed: 72000, Total Chunk Time: 0.001 seconds\n", 447 | "Rows Read: 500, Total Rows Processed: 72500, Total Chunk Time: 0.001 seconds\n", 448 | "Rows Read: 500, Total Rows Processed: 73000, Total Chunk Time: 0.002 seconds\n", 449 | "Rows Read: 500, Total Rows Processed: 73500, Total Chunk Time: 0.001 seconds\n", 450 | "Rows Read: 500, Total Rows Processed: 74000, Total Chunk Time: Less than .001 seconds\n", 451 | "Rows Read: 500, Total Rows Processed: 74500, Total Chunk Time: 0.001 seconds\n", 452 | "Rows Read: 500, Total Rows Processed: 75000, Total Chunk Time: Less than .001 seconds\n", 453 | "Rows Read: 500, Total Rows Processed: 75500, Total Chunk Time: 0.001 seconds\n", 454 | "Rows Read: 500, Total Rows Processed: 76000, Total Chunk Time: 0.001 seconds\n", 455 | "Rows Read: 500, Total Rows Processed: 76500, Total Chunk Time: Less than .001 seconds\n", 456 | "Rows Read: 500, Total Rows Processed: 77000, Total Chunk Time: 0.001 seconds\n", 457 | "Rows Read: 500, Total Rows Processed: 77500, Total Chunk Time: 0.001 seconds\n", 458 | "Rows Read: 500, Total Rows Processed: 78000, Total Chunk Time: Less than .001 seconds\n", 459 | "Rows Read: 500, Total Rows Processed: 78500, Total Chunk Time: 0.002 seconds\n", 460 | "Rows Read: 500, Total Rows Processed: 79000, Total Chunk Time: Less than .001 seconds\n", 461 | "Rows Read: 500, Total Rows Processed: 79500, Total Chunk Time: 0.001 seconds\n", 462 | "Rows Read: 500, Total Rows Processed: 80000, Total Chunk Time: 0.001 seconds\n", 463 | "Rows Read: 500, Total Rows Processed: 80500, Total Chunk Time: Less than .001 seconds\n", 464 | "Rows Read: 500, Total Rows Processed: 81000, Total Chunk Time: 0.002 seconds\n", 465 | "Rows Read: 500, Total Rows Processed: 81500, Total Chunk Time: Less than .001 seconds\n", 466 | "Rows Read: 500, Total Rows Processed: 82000, Total Chunk Time: 0.001 seconds\n", 467 | "Rows Read: 500, Total Rows Processed: 82500, Total Chunk Time: 0.001 seconds\n", 468 | "Rows Read: 500, Total Rows Processed: 83000, Total Chunk Time: Less than .001 seconds\n", 469 | "Rows Read: 500, Total Rows Processed: 83500, Total Chunk Time: 0.001 seconds\n", 470 | "Rows Read: 500, Total Rows Processed: 84000, Total Chunk Time: 0.001 seconds\n", 471 | "Rows Read: 500, Total Rows Processed: 84500, Total Chunk Time: Less than .001 seconds\n", 472 | "Rows Read: 500, Total Rows Processed: 85000, Total Chunk Time: 0.001 seconds\n", 473 | "Rows Read: 500, Total Rows Processed: 85500, Total Chunk Time: 0.001 seconds\n", 474 | "Rows Read: 500, Total Rows Processed: 86000, Total Chunk Time: 0.002 seconds\n", 475 | "Rows Read: 500, Total Rows Processed: 86500, Total Chunk Time: 0.001 seconds\n", 476 | "Rows Read: 500, Total Rows Processed: 87000, Total Chunk Time: Less than .001 seconds\n", 477 | "Rows Read: 500, Total Rows Processed: 87500, Total Chunk Time: 0.001 seconds\n", 478 | "Rows Read: 500, Total Rows Processed: 88000, Total Chunk Time: 0.001 seconds\n", 479 | "Rows Read: 500, Total Rows Processed: 88500, Total Chunk Time: Less than .001 seconds\n", 480 | "Rows Read: 500, Total Rows Processed: 89000, Total Chunk Time: 0.001 seconds\n", 481 | "Rows Read: 500, Total Rows Processed: 89500, Total Chunk Time: Less than .001 seconds\n", 482 | "Rows Read: 500, Total Rows Processed: 90000, Total Chunk Time: 0.001 seconds\n", 483 | "Rows Read: 500, Total Rows Processed: 90500, Total Chunk Time: 0.001 seconds\n", 484 | "Rows Read: 500, Total Rows Processed: 91000, Total Chunk Time: 0.002 seconds\n", 485 | "Rows Read: 500, Total Rows Processed: 91500, Total Chunk Time: 0.001 seconds\n", 486 | "Rows Read: 500, Total Rows Processed: 92000, Total Chunk Time: Less than .001 seconds\n", 487 | "Rows Read: 500, Total Rows Processed: 92500, Total Chunk Time: 0.001 seconds\n", 488 | "Rows Read: 500, Total Rows Processed: 93000, Total Chunk Time: 0.001 seconds\n", 489 | "Rows Read: 500, Total Rows Processed: 93500, Total Chunk Time: Less than .001 seconds\n", 490 | "Rows Read: 500, Total Rows Processed: 94000, Total Chunk Time: 0.001 seconds\n", 491 | "Rows Read: 500, Total Rows Processed: 94500, Total Chunk Time: 0.002 seconds\n", 492 | "Rows Read: 500, Total Rows Processed: 95000, Total Chunk Time: 0.001 seconds\n", 493 | "Rows Read: 500, Total Rows Processed: 95500, Total Chunk Time: 0.001 seconds\n", 494 | "Rows Read: 500, Total Rows Processed: 96000, Total Chunk Time: Less than .001 seconds\n", 495 | "Rows Read: 500, Total Rows Processed: 96500, Total Chunk Time: 0.001 seconds\n", 496 | "Rows Read: 500, Total Rows Processed: 97000, Total Chunk Time: 0.001 seconds\n", 497 | "Rows Read: 500, Total Rows Processed: 97500, Total Chunk Time: Less than .001 seconds\n", 498 | "Rows Read: 500, Total Rows Processed: 98000, Total Chunk Time: 0.001 seconds\n", 499 | "Rows Read: 500, Total Rows Processed: 98500, Total Chunk Time: 0.002 seconds\n", 500 | "Rows Read: 500, Total Rows Processed: 99000, Total Chunk Time: 0.001 seconds\n", 501 | "Rows Read: 500, Total Rows Processed: 99500, Total Chunk Time: 0.001 seconds\n", 502 | "Rows Read: 85, Total Rows Processed: 99585, Total Chunk Time: Less than .001 seconds \n" 503 | ] 504 | } 505 | ], 506 | "source": [ 507 | "# Ultimately, we will execute an R script inside of SQL/Server since there is a resource limit on how large a dataframe we can\n", 508 | "# load into memory. The rxImport step with an outFile argument produces a dataframe. With the outFile argument produces\n", 509 | "# an xdF file. \n", 510 | "\n", 511 | "df1 <-rxImport(inDataSource)" 512 | ] 513 | }, 514 | { 515 | "cell_type": "code", 516 | "execution_count": null, 517 | "metadata": { 518 | "collapsed": true 519 | }, 520 | "outputs": [], 521 | "source": [] 522 | } 523 | ], 524 | "metadata": { 525 | "kernelspec": { 526 | "display_name": "R 3.3", 527 | "language": "R", 528 | "name": "ir33" 529 | }, 530 | "language_info": { 531 | "codemirror_mode": "r", 532 | "file_extension": ".r", 533 | "mimetype": "text/x-r-source", 534 | "name": "R", 535 | "pygments_lexer": "r", 536 | "version": "3.3.2" 537 | } 538 | }, 539 | "nbformat": 4, 540 | "nbformat_minor": 2 541 | } 542 | --------------------------------------------------------------------------------