├── Python ├── MANIFEST.in ├── setup.cfg ├── tests │ ├── scripts │ │ ├── test_packages │ │ │ ├── testpackageA │ │ │ │ ├── testpackageA │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── ClassA.py │ │ │ │ ├── MANIFEST │ │ │ │ ├── dist │ │ │ │ │ └── testpackageA-0.0.1.zip │ │ │ │ └── setup.py │ │ │ ├── termcolor-1.1.0.tar.gz │ │ │ ├── testpackageA-0.0.1.zip │ │ │ ├── testpackageA-0.0.2.zip │ │ │ ├── astor-0.8.1-py2.py3-none-any.whl │ │ │ └── html5lib-1.1-py2.py3-none-any.whl │ │ ├── exec_script_no_out_params.py │ │ ├── exec_script.py │ │ ├── exec_script_no_params.py │ │ ├── exec_script_sproc_out_df.py │ │ └── exec_script_out_param.py │ ├── package_helper_functions.py │ ├── conftest.py │ ├── execute_function_test.py │ ├── package_management_pypi_test.py │ └── package_management_file_test.py ├── requirements.txt ├── buildandinstall.sh ├── sqlmlutils │ ├── packagemanagement │ │ ├── __init__.py │ │ ├── scope.py │ │ ├── messages.py │ │ ├── pkgutils.py │ │ ├── servermethods.py │ │ ├── download_script.py │ │ ├── dependencyresolver.py │ │ ├── pipdownloader.py │ │ ├── packagesqlbuilder.py │ │ └── sqlpackagemanager.py │ ├── __init__.py │ ├── connectioninfo.py │ ├── sqlqueryexecutor.py │ └── sqlpythonexecutor.py ├── buildandinstall.cmd ├── samples │ ├── sample_simple_function_test.py │ ├── sample_linear_regression_test.py │ ├── sample_scatter_plot_test.py │ └── sample_stored_procedure.py ├── MANIFEST ├── setup.py ├── LICENSE.txt └── README.md ├── AirlineTestDB.bak ├── R ├── .Rbuildignore ├── tests │ ├── testthat │ │ ├── scripts │ │ │ ├── script3.R │ │ │ ├── script2.txt │ │ │ └── script.txt │ │ ├── test.checkLogins.R │ │ ├── test.sqlPackage.unit.R │ │ ├── test.sqlPackage.fileNameParse.unit.R │ │ ├── helper-Setup.R │ │ ├── test.sqlPackage.basic.R │ │ ├── test.sqlPackage.createExternalLibrary.R │ │ ├── test.sqlPackage.scope.R │ │ ├── test.sqlPackage.toplevel.R │ │ ├── helper-sqlPackage.R │ │ └── test.executeInSqlTests.R │ ├── testthat-executeInSql.R │ ├── testthat-sqlPackage-base.R │ ├── testthat-Sproc.R │ ├── testthat-sqlPackage-scope.R │ ├── testthat-checkLogins.R │ ├── testthat-sqlPackage-unit.R │ ├── testthat-sqlPackage-dependencies.R │ ├── testthat-sqlPackage-external.R │ └── testthat-sqlPackage-toplevel.R ├── dist │ ├── sqlmlutils_0.7.1.zip │ └── sqlmlutils_0.7.1.tar.gz ├── buildandinstall.cmd ├── NAMESPACE ├── sqlmlutils.Rproj ├── DESCRIPTION ├── man │ ├── executeSQLQuery.Rd │ ├── executeScriptInSQL.Rd │ ├── dropSproc.Rd │ ├── connectionInfo.Rd │ ├── checkSproc.Rd │ ├── executeFunctionInSQL.Rd │ ├── executeSproc.Rd │ ├── sqlmlutils-package.Rd │ ├── sql_installed.packages.Rd │ ├── sql_remove.packages.Rd │ ├── sql_install.packages.Rd │ └── createSprocFromFunction.Rd ├── LICENSE ├── R │ ├── sqlmlutils.R │ └── storedProcedureScripting.R └── README.md ├── CODEOWNERS ├── .gitignore ├── CONTRIBUTING.md ├── LICENSE ├── SECURITY.md ├── README.md └── .github └── workflows ├── ci.yaml ├── SQL2019.yaml └── SQL2022.yml /Python/MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.md 2 | include LICENSE.txt -------------------------------------------------------------------------------- /Python/setup.cfg: -------------------------------------------------------------------------------- 1 | # Inside of setup.cfg 2 | [metadata] 3 | description-file = README.md -------------------------------------------------------------------------------- /AirlineTestDB.bak: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/sqlmlutils/HEAD/AirlineTestDB.bak -------------------------------------------------------------------------------- /R/.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^dist$ 4 | ^buildandinstall.cmd 5 | -------------------------------------------------------------------------------- /Python/tests/scripts/test_packages/testpackageA/testpackageA/__init__.py: -------------------------------------------------------------------------------- 1 | from .ClassA import ClassA -------------------------------------------------------------------------------- /R/tests/testthat/scripts/script3.R: -------------------------------------------------------------------------------- 1 | product <- num1 * num2 2 | out_df <- rbind(in_df, product) 3 | -------------------------------------------------------------------------------- /R/dist/sqlmlutils_0.7.1.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/sqlmlutils/HEAD/R/dist/sqlmlutils_0.7.1.zip -------------------------------------------------------------------------------- /R/dist/sqlmlutils_0.7.1.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/sqlmlutils/HEAD/R/dist/sqlmlutils_0.7.1.tar.gz -------------------------------------------------------------------------------- /R/tests/testthat/scripts/script2.txt: -------------------------------------------------------------------------------- 1 | 2 | sum1 <- 1+2 3 | sum2 <- 5+6 4 | product <- sum1 * sum2 5 | product 6 | -------------------------------------------------------------------------------- /Python/requirements.txt: -------------------------------------------------------------------------------- 1 | pip>=9.0.1 2 | pyodbc>=4.0.25 3 | dill>=0.2.6 4 | pkginfo>=1.4.2 5 | requirements-parser>=0.2.0 6 | pandas>=0.19.2 7 | wheel>=0.32.3,<0.35.0 8 | -------------------------------------------------------------------------------- /Python/tests/scripts/test_packages/termcolor-1.1.0.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/sqlmlutils/HEAD/Python/tests/scripts/test_packages/termcolor-1.1.0.tar.gz -------------------------------------------------------------------------------- /Python/tests/scripts/test_packages/testpackageA-0.0.1.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/sqlmlutils/HEAD/Python/tests/scripts/test_packages/testpackageA-0.0.1.zip -------------------------------------------------------------------------------- /Python/tests/scripts/test_packages/testpackageA-0.0.2.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/sqlmlutils/HEAD/Python/tests/scripts/test_packages/testpackageA-0.0.2.zip -------------------------------------------------------------------------------- /Python/buildandinstall.sh: -------------------------------------------------------------------------------- 1 | rm -f dist/* 2 | python setup.py sdist --formats=zip 3 | python -m pip install --upgrade --upgrade-strategy only-if-needed --find-links=dist sqlmlutils -------------------------------------------------------------------------------- /Python/tests/scripts/test_packages/testpackageA/MANIFEST: -------------------------------------------------------------------------------- 1 | # file GENERATED by distutils, do NOT edit 2 | setup.py 3 | testpackageA\ClassA.py 4 | testpackageA\__init__.py 5 | -------------------------------------------------------------------------------- /R/buildandinstall.cmd: -------------------------------------------------------------------------------- 1 | pushd . 2 | cd .. 3 | R -e "if (!require('odbc')) install.packages('odbc')" 4 | R CMD INSTALL --build R 5 | mv sqlmlutils_*.zip R/dist 6 | popd 7 | -------------------------------------------------------------------------------- /Python/sqlmlutils/packagemanagement/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright(c) Microsoft Corporation. 2 | # Licensed under the MIT license. 3 | 4 | from .sqlpackagemanager import SQLPackageManager -------------------------------------------------------------------------------- /Python/tests/scripts/test_packages/astor-0.8.1-py2.py3-none-any.whl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/sqlmlutils/HEAD/Python/tests/scripts/test_packages/astor-0.8.1-py2.py3-none-any.whl -------------------------------------------------------------------------------- /Python/tests/scripts/test_packages/html5lib-1.1-py2.py3-none-any.whl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/sqlmlutils/HEAD/Python/tests/scripts/test_packages/html5lib-1.1-py2.py3-none-any.whl -------------------------------------------------------------------------------- /R/tests/testthat/scripts/script.txt: -------------------------------------------------------------------------------- 1 | foo <- function(t1, t2, t3) 2 | { 3 | print(t1) 4 | warning(t2) 5 | return(t3) 6 | } 7 | 8 | foo("Hello","WARNING", InputDataSet) 9 | -------------------------------------------------------------------------------- /Python/tests/scripts/test_packages/testpackageA/dist/testpackageA-0.0.1.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/sqlmlutils/HEAD/Python/tests/scripts/test_packages/testpackageA/dist/testpackageA-0.0.1.zip -------------------------------------------------------------------------------- /Python/tests/scripts/exec_script_no_out_params.py: -------------------------------------------------------------------------------- 1 | def foo(t1, t2, t3): 2 | print(t1 + t2) 3 | print(t3) 4 | return t3 5 | 6 | 7 | res = foo(t1,t2,t3) 8 | 9 | print("Testing output!") 10 | -------------------------------------------------------------------------------- /Python/tests/scripts/exec_script.py: -------------------------------------------------------------------------------- 1 | def foo(t1, t2, t3): 2 | print(t1 + t2) 3 | print(t3) 4 | return t3 5 | 6 | 7 | res = foo("Hello","World",InputDataSet) 8 | 9 | print("Testing output!") 10 | -------------------------------------------------------------------------------- /Python/tests/scripts/test_packages/testpackageA/testpackageA/ClassA.py: -------------------------------------------------------------------------------- 1 | class ClassA: 2 | 3 | def __init__(self, val): 4 | self._val = val 5 | 6 | @property 7 | def val(self): 8 | return self._val -------------------------------------------------------------------------------- /R/tests/testthat-executeInSql.R: -------------------------------------------------------------------------------- 1 | # Copyright(c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | 4 | library(testthat) 5 | library(sqlmlutils) 6 | 7 | test_check("sqlmlutils", filter = "execute") -------------------------------------------------------------------------------- /R/tests/testthat-sqlPackage-base.R: -------------------------------------------------------------------------------- 1 | # Copyright(c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | 4 | library(testthat) 5 | library(sqlmlutils) 6 | 7 | test_check("sqlmlutils", filter = "basic") -------------------------------------------------------------------------------- /Python/tests/scripts/exec_script_no_params.py: -------------------------------------------------------------------------------- 1 | def foo(t1, t2, t3): 2 | print(t1 + t2) 3 | print(t3) 4 | return t3 5 | 6 | 7 | res = foo("No ", "Inputs", "Required") 8 | 9 | print("Testing output!") 10 | -------------------------------------------------------------------------------- /R/tests/testthat-Sproc.R: -------------------------------------------------------------------------------- 1 | # Copyright(c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | 4 | library(testthat) 5 | library(sqlmlutils) 6 | 7 | test_check("sqlmlutils", filter = "storedProcedure") 8 | -------------------------------------------------------------------------------- /R/tests/testthat-sqlPackage-scope.R: -------------------------------------------------------------------------------- 1 | # Copyright(c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | 4 | library(testthat) 5 | library(sqlmlutils) 6 | 7 | test_check("sqlmlutils", filter = "scope") -------------------------------------------------------------------------------- /R/tests/testthat-checkLogins.R: -------------------------------------------------------------------------------- 1 | # Copyright(c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | 4 | library(testthat) 5 | library(sqlmlutils) 6 | 7 | test_check("sqlmlutils", filter = "Logins") 8 | -------------------------------------------------------------------------------- /R/tests/testthat-sqlPackage-unit.R: -------------------------------------------------------------------------------- 1 | # Copyright(c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | 4 | library(testthat) 5 | library(sqlmlutils) 6 | 7 | test_check("sqlmlutils", filter = "unit") 8 | 9 | -------------------------------------------------------------------------------- /Python/tests/scripts/exec_script_sproc_out_df.py: -------------------------------------------------------------------------------- 1 | def foo(t1, t2, t3): 2 | print(t1) 3 | print(t2) 4 | print(t3) 5 | return t3 6 | 7 | 8 | OutputDataSet = foo(t1,t2,t3) 9 | 10 | print("Testing output!") 11 | -------------------------------------------------------------------------------- /R/tests/testthat-sqlPackage-dependencies.R: -------------------------------------------------------------------------------- 1 | # Copyright(c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | 4 | library(testthat) 5 | library(sqlmlutils) 6 | 7 | test_check("sqlmlutils", filter = "dependencies") -------------------------------------------------------------------------------- /R/tests/testthat-sqlPackage-external.R: -------------------------------------------------------------------------------- 1 | # Copyright(c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | 4 | library(testthat) 5 | library(sqlmlutils) 6 | 7 | test_check("sqlmlutils", filter = "createExternal") -------------------------------------------------------------------------------- /R/tests/testthat-sqlPackage-toplevel.R: -------------------------------------------------------------------------------- 1 | # Copyright(c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | 4 | library(testthat) 5 | library(sqlmlutils) 6 | 7 | test_check("sqlmlutils", filter = "toplevel") 8 | -------------------------------------------------------------------------------- /Python/buildandinstall.cmd: -------------------------------------------------------------------------------- 1 | del /q dist\* 2 | python.exe setup.py sdist --formats=zip 3 | python.exe setup.py bdist_wheel 4 | pushd dist 5 | python.exe -m pip install --upgrade --upgrade-strategy only-if-needed --find-links=. sqlmlutils 6 | popd 7 | -------------------------------------------------------------------------------- /CODEOWNERS: -------------------------------------------------------------------------------- 1 | # These owners will be the default owners for everything in 2 | # the repo. Unless a later match takes precedence, 3 | # review when someone opens a pull request. 4 | * @Aniruddh25 @seantleonard @monamaki @aaronburtle @beccadaniel @SicongLiu2000 5 | -------------------------------------------------------------------------------- /Python/tests/scripts/test_packages/testpackageA/setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | 3 | setup( 4 | name='testpackageA' , 5 | packages=['testpackageA'], 6 | version='0.0.1', 7 | description='Test package for python package management.', 8 | author='Microsoft' 9 | ) 10 | -------------------------------------------------------------------------------- /Python/sqlmlutils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright(c) Microsoft Corporation. 2 | # Licensed under the MIT license. 3 | 4 | from .connectioninfo import ConnectionInfo 5 | from .sqlpythonexecutor import SQLPythonExecutor 6 | from .packagemanagement.scope import Scope 7 | from .packagemanagement.sqlpackagemanager import SQLPackageManager -------------------------------------------------------------------------------- /Python/tests/scripts/exec_script_out_param.py: -------------------------------------------------------------------------------- 1 | def foo(t1, t2, t3): 2 | return str(t1)+str(t2) 3 | 4 | 5 | param_str = foo(t1,t2,t3) 6 | 7 | print("Testing output!") 8 | 9 | # The double single quotes below are need for SPEES execution test. 10 | data = {''Numbers'':[1, 2, 3, 4]} 11 | OutputDataSet = DataFrame(data) 12 | -------------------------------------------------------------------------------- /Python/samples/sample_simple_function_test.py: -------------------------------------------------------------------------------- 1 | # Copyright(c) Microsoft Corporation. 2 | # Licensed under the MIT license. 3 | 4 | import sqlmlutils 5 | 6 | 7 | def foo(): 8 | return "bar" 9 | 10 | 11 | sqlpython = sqlmlutils.SQLPythonExecutor(sqlmlutils.ConnectionInfo(server="localhost", database="master")) 12 | result = sqlpython.execute_function_in_sql(foo) 13 | assert result == "bar" 14 | 15 | -------------------------------------------------------------------------------- /R/NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(checkSproc) 4 | export(connectionInfo) 5 | export(createSprocFromFunction) 6 | export(createSprocFromScript) 7 | export(dropSproc) 8 | export(executeFunctionInSQL) 9 | export(executeSQLQuery) 10 | export(executeScriptInSQL) 11 | export(executeSproc) 12 | export(sql_install.packages) 13 | export(sql_installed.packages) 14 | export(sql_remove.packages) 15 | import(odbc) 16 | -------------------------------------------------------------------------------- /R/sqlmlutils.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: No 4 | SaveWorkspace: No 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 4 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source 21 | PackageRoxygenize: rd,collate,namespace 22 | -------------------------------------------------------------------------------- /Python/sqlmlutils/packagemanagement/scope.py: -------------------------------------------------------------------------------- 1 | # Copyright(c) Microsoft Corporation. 2 | # Licensed under the MIT license. 3 | 4 | class Scope: 5 | 6 | def __init__(self, name: str): 7 | self._name = name 8 | 9 | def __eq__(self, other): 10 | return self._name == other._name 11 | 12 | @staticmethod 13 | def public_scope(): 14 | return Scope("public") 15 | 16 | @staticmethod 17 | def private_scope(): 18 | return Scope("private") 19 | 20 | 21 | -------------------------------------------------------------------------------- /Python/tests/package_helper_functions.py: -------------------------------------------------------------------------------- 1 | # Copyright(c) Microsoft Corporation. 2 | # Licensed under the MIT license. 3 | 4 | from sqlmlutils.sqlqueryexecutor import execute_raw_query 5 | 6 | 7 | def _get_sql_package_table(connection): 8 | query = "select * from sys.external_libraries where Language='Python'" 9 | out_df, outparams = execute_raw_query(connection, query) 10 | return out_df 11 | 12 | 13 | def _get_package_names_list(connection): 14 | df = _get_sql_package_table(connection) 15 | return {x: y for x, y in zip(df['name'], df['scope'])} 16 | -------------------------------------------------------------------------------- /R/DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: sqlmlutils 2 | Type: Package 3 | Title: Wraps R code into executable SQL Server stored procedures 4 | Version: 1.2.1 5 | Author: Microsoft Corporation 6 | Maintainer: Microsoft Corporation 7 | Depends: 8 | R (>= 3.2.2) 9 | Imports: 10 | odbc, tools, methods, utils 11 | Description: sqlmlutils is a package designed to help users interact with SQL Server and execute R or Python code 12 | from an R/Python client. It provides a series of functions for executing functions in SQL, 13 | creating and running stored procedures, and managing packages on the database. 14 | License: MIT + file LICENSE 15 | Copyright: Copyright 2016 Microsoft Corporation 16 | RoxygenNote: 7.1.2 17 | Encoding: UTF-8 18 | Suggests: testthat (>= 2.0.0), 19 | roxygen2 20 | -------------------------------------------------------------------------------- /Python/MANIFEST: -------------------------------------------------------------------------------- 1 | # file GENERATED by distutils, do NOT edit 2 | README.md 3 | setup.cfg 4 | setup.py 5 | sqlmlutils\__init__.py 6 | sqlmlutils\connectioninfo.py 7 | sqlmlutils\sqlbuilder.py 8 | sqlmlutils\sqlpythonexecutor.py 9 | sqlmlutils\sqlqueryexecutor.py 10 | sqlmlutils\storedprocedure.py 11 | sqlmlutils/packagemanagement\__init__.py 12 | sqlmlutils/packagemanagement\dependencyresolver.py 13 | sqlmlutils/packagemanagement\download_script.py 14 | sqlmlutils/packagemanagement\messages.py 15 | sqlmlutils/packagemanagement\outputcapture.py 16 | sqlmlutils/packagemanagement\packagesqlbuilder.py 17 | sqlmlutils/packagemanagement\pipdownloader.py 18 | sqlmlutils/packagemanagement\pkgutils.py 19 | sqlmlutils/packagemanagement\scope.py 20 | sqlmlutils/packagemanagement\servermethods.py 21 | sqlmlutils/packagemanagement\sqlpackagemanager.py 22 | -------------------------------------------------------------------------------- /Python/samples/sample_linear_regression_test.py: -------------------------------------------------------------------------------- 1 | # Copyright(c) Microsoft Corporation. 2 | # Licensed under the MIT license. 3 | 4 | import sqlmlutils 5 | 6 | 7 | def linear_regression(input_df, x_col, y_col): 8 | from sklearn import linear_model 9 | 10 | X = input_df[[x_col]] 11 | y = input_df[y_col] 12 | 13 | lr = linear_model.LinearRegression() 14 | lr.fit(X, y) 15 | 16 | return lr 17 | 18 | 19 | sqlpy = sqlmlutils.SQLPythonExecutor(sqlmlutils.ConnectionInfo(server="localhost", database="AirlineTestDB")) 20 | sql_query = "select top 1000 CRSDepTime, CRSArrTime from airline5000" 21 | regression_model = sqlpy.execute_function_in_sql(linear_regression, input_data_query=sql_query, 22 | x_col="CRSDepTime", y_col="CRSArrTime") 23 | print(regression_model) 24 | print(regression_model.coef_) 25 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # History files 2 | .Rhistory 3 | .Rapp.history 4 | 5 | # Session Data files 6 | .RData 7 | 8 | # Example code in package build process 9 | *-Ex.R 10 | 11 | # Output files from R CMD build 12 | /*.tar.gz 13 | 14 | # Output files from R CMD check 15 | /*.Rcheck/ 16 | 17 | # RStudio files 18 | .Rproj.user/ 19 | 20 | # produced vignettes 21 | vignettes/*.html 22 | vignettes/*.pdf 23 | 24 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 25 | .httr-oauth 26 | 27 | # knitr and R markdown default cache directories 28 | /*_cache/ 29 | /cache/ 30 | 31 | # Temporary files created by R markdown 32 | *.utf8.md 33 | *.knit.md 34 | 35 | # Shiny token, see https://shiny.rstudio.com/articles/shinyapps.html 36 | rsconnect/ 37 | .Rproj.user 38 | 39 | #Python Cache files 40 | __pycache__/ 41 | .cache/ 42 | .idea/ 43 | 44 | */build/lib/ 45 | Python/*.egg-info 46 | Python/dist/*.whl 47 | -------------------------------------------------------------------------------- /Python/sqlmlutils/packagemanagement/messages.py: -------------------------------------------------------------------------------- 1 | # Copyright(c) Microsoft Corporation. 2 | # Licensed under the MIT license. 3 | 4 | def no_upgrade(pkgname: str, serverversion: str, pkgversion: str = ""): 5 | return """ 6 | Package {pkgname} exists on server. Set upgrade to True in install to force upgrade. 7 | The version of {pkgname} you are trying to install is {pkgversion}. 8 | The version installed on the server is {serverversion} 9 | """.format( 10 | pkgname=pkgname, 11 | pkgversion=pkgversion, 12 | serverversion=serverversion 13 | ) 14 | 15 | 16 | def install(pkgname: str, version: str, targetpackage: bool): 17 | target = "target package" if targetpackage else "required dependency" 18 | return "Installing {target} {pkgname} version {version}".format( 19 | target=target, 20 | pkgname=pkgname, 21 | version=version 22 | ) 23 | -------------------------------------------------------------------------------- /R/tests/testthat/test.checkLogins.R: -------------------------------------------------------------------------------- 1 | # Copyright(c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | 4 | library(testthat) 5 | context("Tests to check logins") 6 | 7 | test_that("Test DBO", 8 | { 9 | hodbc <- sqlmlutils:::connectToServer(helper_getSetting("connectionStringDBO")) 10 | 11 | expect_false(is.null(hodbc)) 12 | on.exit(dbDisconnect(hodbc), add = TRUE) 13 | }) 14 | 15 | test_that("Test AirlineUserdbowner", 16 | { 17 | hodbc <- sqlmlutils:::connectToServer(helper_getSetting("connectionStringAirlineUserdbowner")) 18 | 19 | expect_false(is.null(hodbc)) 20 | on.exit(dbDisconnect(hodbc), add = TRUE) 21 | }) 22 | 23 | test_that("Test AirlineUser", 24 | { 25 | hodbc <- sqlmlutils:::connectToServer(helper_getSetting("connectionStringAirlineUser")) 26 | 27 | expect_false(is.null(hodbc)) 28 | on.exit(dbDisconnect(hodbc), add = TRUE) 29 | }) 30 | -------------------------------------------------------------------------------- /Python/sqlmlutils/packagemanagement/pkgutils.py: -------------------------------------------------------------------------------- 1 | # Copyright(c) Microsoft Corporation. 2 | # Licensed under the MIT license. 3 | 4 | import os 5 | import pkginfo 6 | import re 7 | 8 | 9 | def _get_pkginfo(filename: str): 10 | try: 11 | if ".whl" in filename: 12 | return pkginfo.Wheel(filename) 13 | else: 14 | return pkginfo.SDist(filename) 15 | except Exception: 16 | return None 17 | 18 | 19 | def get_package_name_from_file(filename: str) -> str: 20 | pkg = _get_pkginfo(filename) 21 | if pkg is not None and pkg.name is not None: 22 | return pkg.name 23 | name = os.path.splitext(os.path.basename(filename))[0] 24 | return re.sub(r"\-[0-9].*", "", name) 25 | 26 | 27 | def get_package_version_from_file(filename: str): 28 | pkg = _get_pkginfo(filename) 29 | if pkg is not None and pkg.version is not None: 30 | return pkg.version 31 | return None 32 | 33 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | This project welcomes contributions and suggestions. Most contributions require you to agree to a 4 | Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us 5 | the rights to use your contribution. For details, visit https://cla.microsoft.com. 6 | 7 | When you submit a pull request, a CLA-bot will automatically determine whether you need to provide 8 | a CLA and decorate the PR appropriately (e.g., label, comment). Simply follow the instructions 9 | provided by the bot. You will only need to do this once across all repos using our CLA. 10 | 11 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 12 | For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or 13 | contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. 14 | 15 | -------------------------------------------------------------------------------- /Python/setup.py: -------------------------------------------------------------------------------- 1 | # Copyright(c) Microsoft Corporation. 2 | # Licensed under the MIT license. 3 | 4 | from setuptools import setup 5 | 6 | setup( 7 | name='sqlmlutils', 8 | packages=['sqlmlutils', 'sqlmlutils/packagemanagement'], 9 | version='1.2.0', 10 | url='https://github.com/Microsoft/sqlmlutils/Python', 11 | license='MIT License', 12 | description='A client side package for working with SQL Server', 13 | long_description='A client side package for working with SQL Server Machine Learning Python Services. ' 14 | 'sqlmlutils enables easy package installation and remote code execution on your SQL Server machine.', 15 | author='Microsoft', 16 | author_email='joz@microsoft.com', 17 | install_requires=[ 18 | 'pip', 19 | 'pyodbc', 20 | 'dill', 21 | 'pkginfo', 22 | 'requirements-parser', 23 | 'pandas', 24 | 'wheel<0.35.0' 25 | ], 26 | python_requires='>=3.5' 27 | ) 28 | -------------------------------------------------------------------------------- /R/man/executeSQLQuery.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/executeInSQL.R 3 | \name{executeSQLQuery} 4 | \alias{executeSQLQuery} 5 | \title{Execute a script in SQL} 6 | \usage{ 7 | executeSQLQuery( 8 | connectionString, 9 | sqlQuery, 10 | getScript = FALSE, 11 | languageName = "R" 12 | ) 13 | } 14 | \arguments{ 15 | \item{connectionString}{character string. The connectionString to the database} 16 | 17 | \item{sqlQuery}{character string. The query to execute} 18 | 19 | \item{getScript}{boolean. Return the tsql script that would be run on the server instead of running it} 20 | 21 | \item{languageName}{string. Use a language name other than the default R, if using an EXTERNAL LANGUAGE.} 22 | } 23 | \value{ 24 | The data frame returned by the query to the database 25 | } 26 | \description{ 27 | Execute a script in SQL 28 | } 29 | \examples{ 30 | \dontrun{ 31 | connection <- connectionInfo(database="AirlineTestDB") 32 | executeSQLQuery(connection, sqlQuery="SELECT top 1 * from airline5000") 33 | } 34 | 35 | 36 | } 37 | -------------------------------------------------------------------------------- /Python/sqlmlutils/packagemanagement/servermethods.py: -------------------------------------------------------------------------------- 1 | # Copyright(c) Microsoft Corporation. 2 | # Licensed under the MIT license. 3 | 4 | import os 5 | import re 6 | 7 | from sqlmlutils.packagemanagement.scope import Scope 8 | 9 | def show_installed_packages(): 10 | import pkg_resources 11 | return [(d.project_name, d.version) for d in pkg_resources.working_set] 12 | 13 | def get_server_info(): 14 | from distutils.version import LooseVersion 15 | import pip, sysconfig 16 | pipversion = LooseVersion(pip.__version__) 17 | 18 | if pipversion >= LooseVersion("19.3"): 19 | from wheel import pep425tags 20 | elif pipversion > LooseVersion("10"): 21 | from pip._internal import pep425tags 22 | else: 23 | from pip import pep425tags 24 | return { 25 | "impl_version_info": pep425tags.get_impl_version_info(), #(3,7) 26 | "abbr_impl": pep425tags.get_abbr_impl(), #'cp' 27 | "abi_tag": pep425tags.get_abi_tag(), #'cp37m' 28 | "platform": sysconfig.get_platform().replace("-","_") #'win_amd64', 'linux_x86_64' 29 | } 30 | -------------------------------------------------------------------------------- /Python/samples/sample_scatter_plot_test.py: -------------------------------------------------------------------------------- 1 | # Copyright(c) Microsoft Corporation. 2 | # Licensed under the MIT license. 3 | 4 | import sqlmlutils 5 | from PIL import Image 6 | 7 | 8 | def scatter_plot(input_df, x_col, y_col): 9 | import matplotlib.pyplot as plt 10 | import io 11 | 12 | title = x_col + " vs. " + y_col 13 | 14 | plt.scatter(input_df[x_col], input_df[y_col]) 15 | plt.xlabel(x_col) 16 | plt.ylabel(y_col) 17 | plt.title(title) 18 | 19 | # Save scatter plot image as a png 20 | buf = io.BytesIO() 21 | plt.savefig(buf, format="png") 22 | buf.seek(0) 23 | 24 | # Returns the bytes of the png to the client 25 | return buf 26 | 27 | 28 | sqlpy = sqlmlutils.SQLPythonExecutor(sqlmlutils.ConnectionInfo(server="localhost", database="AirlineTestDB")) 29 | 30 | sql_query = "select top 100 * from airline5000" 31 | plot_data = sqlpy.execute_function_in_sql(func=scatter_plot, input_data_query=sql_query, 32 | x_col="ArrDelay", y_col="CRSDepTime") 33 | im = Image.open(plot_data) 34 | im.show() 35 | #im.save("scatter_test.png") 36 | -------------------------------------------------------------------------------- /R/man/executeScriptInSQL.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/executeInSQL.R 3 | \name{executeScriptInSQL} 4 | \alias{executeScriptInSQL} 5 | \title{Execute a script in SQL} 6 | \usage{ 7 | executeScriptInSQL( 8 | connectionString, 9 | script, 10 | inputDataQuery = "", 11 | getScript = FALSE, 12 | languageName = "R" 13 | ) 14 | } 15 | \arguments{ 16 | \item{connectionString}{character string. The connectionString to the database} 17 | 18 | \item{script}{character string. The path to the script to execute in SQL} 19 | 20 | \item{inputDataQuery}{character string. A string to query the database. 21 | The result of the query will be put into a data frame into the variable "InputDataSet" in the environment} 22 | 23 | \item{getScript}{boolean. Return the tsql script that would be run on the server instead of running it} 24 | 25 | \item{languageName}{string. Use a language name other than the default R, if using an EXTERNAL LANGUAGE.} 26 | } 27 | \value{ 28 | The returned value from the last line of the script 29 | } 30 | \description{ 31 | Execute a script in SQL 32 | } 33 | \seealso{ 34 | \code{\link{executeFunctionInSQL}} to execute a user function instead of a script in SQL 35 | } 36 | -------------------------------------------------------------------------------- /Python/tests/conftest.py: -------------------------------------------------------------------------------- 1 | # Copyright(c) Microsoft Corporation. 2 | # Licensed under the MIT license. 3 | 4 | import os 5 | 6 | from sqlmlutils import ConnectionInfo, Scope 7 | 8 | driver = os.environ['DRIVER'] if 'DRIVER' in os.environ else "SQL Server" 9 | server = os.environ['SERVER'] if 'SERVER' in os.environ else "localhost" 10 | database = os.environ['DATABASE'] if 'DATABASE' in os.environ else "AirlineTestDB" 11 | uid = os.environ['USER'] if 'USER' in os.environ else "" 12 | pwd = os.environ['PASSWORD'] if 'PASSWORD' in os.environ else "" 13 | 14 | uidAirlineUser = "AirlineUserdbowner" 15 | pwdAirlineUser = os.environ['PASSWORD_AIRLINE_USER'] if 'PASSWORD_AIRLINE_USER' in os.environ else "FakeT3sterPwd!" 16 | 17 | scope = Scope.public_scope() if uid == "" else Scope.private_scope() 18 | 19 | connection = ConnectionInfo(driver=driver, 20 | server=server, 21 | database=database, 22 | uid=uid, 23 | pwd=pwd) 24 | 25 | airline_user_connection = ConnectionInfo(driver=driver, 26 | server=server, 27 | database=database, 28 | uid=uidAirlineUser, 29 | pwd=pwdAirlineUser) -------------------------------------------------------------------------------- /R/man/dropSproc.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/storedProcedure.R 3 | \name{dropSproc} 4 | \alias{dropSproc} 5 | \title{Drop Stored Procedure} 6 | \usage{ 7 | dropSproc(connectionString, name, getScript = FALSE) 8 | } 9 | \arguments{ 10 | \item{connectionString}{character string. The connectionString to the database} 11 | 12 | \item{name}{character string. The name of the stored procedure} 13 | 14 | \item{getScript}{boolean. Return the tsql script that would be run on the server instead of running it} 15 | } 16 | \description{ 17 | Drop Stored Procedure 18 | } 19 | \examples{ 20 | \dontrun{ 21 | connectionString <- connectionInfo() 22 | 23 | dropSproc(connectionString, "fun") 24 | 25 | func <- function(arg1) {return(data.frame(hello = arg1))} 26 | createSprocFromFunction(connectionString, name = "fun", 27 | func = func, inputParams = list(arg1 = "character")) 28 | 29 | if (checkSproc(connectionString, "fun")) 30 | { 31 | print("Function 'fun' exists!") 32 | executeSproc(connectionString, "fun", arg1="WORLD") 33 | } 34 | } 35 | 36 | 37 | } 38 | \seealso{ 39 | { 40 | 41 | \code{\link{createSprocFromFunction}} 42 | 43 | \code{\link{executeSproc}} 44 | 45 | \code{\link{checkSproc}} 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /R/man/connectionInfo.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/executeInSQL.R 3 | \name{connectionInfo} 4 | \alias{connectionInfo} 5 | \title{Execute a function in SQL} 6 | \usage{ 7 | connectionInfo( 8 | driver = "SQL Server", 9 | server = "localhost", 10 | database = "master", 11 | uid = NULL, 12 | pwd = NULL 13 | ) 14 | } 15 | \arguments{ 16 | \item{driver}{The driver to use for the connection - defaults to SQL Server} 17 | 18 | \item{server}{The server to connect to - defaults to localhost} 19 | 20 | \item{database}{The database to connect to - defaults to master} 21 | 22 | \item{uid}{The user id for the connection. If uid is NULL, default to Trusted Connection} 23 | 24 | \item{pwd}{The password for the connection. If uid is not NULL, pwd is required} 25 | } 26 | \value{ 27 | A fully formed connection string 28 | } 29 | \description{ 30 | Execute a function in SQL 31 | } 32 | \examples{ 33 | \dontrun{ 34 | 35 | connectionInfo() 36 | [1] "Driver={SQL Server};Server=localhost;Database=master;Trusted_Connection=Yes;" 37 | 38 | connectionInfo(server="ServerName", database="AirlineTestDB", uid="username", pwd="pass") 39 | [1] "Driver={SQL Server};Server=ServerName;Database=AirlineTestDB;uid=username;pwd=pass;" 40 | } 41 | 42 | 43 | } 44 | -------------------------------------------------------------------------------- /Python/sqlmlutils/packagemanagement/download_script.py: -------------------------------------------------------------------------------- 1 | # Copyright(c) Microsoft Corporation. 2 | # Licensed under the MIT license. 3 | 4 | import pip 5 | import sys 6 | import warnings 7 | 8 | from distutils.version import LooseVersion 9 | 10 | pipversion = LooseVersion(pip.__version__ ) 11 | 12 | if pipversion >= LooseVersion("19.3"): 13 | from wheel import pep425tags 14 | from pip._internal.main import main as pipmain 15 | elif pipversion > LooseVersion("10"): 16 | from pip._internal import pep425tags 17 | from pip._internal import main as pipmain 18 | else: 19 | if pipversion < LooseVersion("8.1.2"): 20 | warnings.warn("Pip version less than 8.1.2 not supported.", Warning) 21 | from pip import pep425tags 22 | from pip import main as pipmain 23 | 24 | # Monkey patch the pip version information with server information 25 | pep425tags.is_manylinux2010_compatible = lambda: True 26 | pep425tags.is_manylinux1_compatible = lambda: True 27 | pep425tags.get_impl_version_info = lambda: eval(sys.argv[1]) 28 | pep425tags.get_abbr_impl = lambda: sys.argv[2] 29 | pep425tags.get_abi_tag = lambda: sys.argv[3] 30 | pep425tags.get_platform = lambda: sys.argv[4] 31 | 32 | # Call pipmain with the download request 33 | pipmain(list(map(str.strip, sys.argv[5].split(",")))) 34 | -------------------------------------------------------------------------------- /R/man/checkSproc.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/storedProcedure.R 3 | \name{checkSproc} 4 | \alias{checkSproc} 5 | \title{Check if Stored Procedure is in Database} 6 | \usage{ 7 | checkSproc(connectionString, name, getScript = FALSE) 8 | } 9 | \arguments{ 10 | \item{connectionString}{character string. The connectionString to the database} 11 | 12 | \item{name}{character string. The name of the stored procedure} 13 | 14 | \item{getScript}{boolean. Return the tsql script that would be run on the server instead of running it} 15 | } 16 | \value{ 17 | Whether the stored procedure exists in the database 18 | } 19 | \description{ 20 | Check if Stored Procedure is in Database 21 | } 22 | \examples{ 23 | \dontrun{ 24 | connectionString <- connectionInfo() 25 | 26 | dropSproc(connectionString, "fun") 27 | 28 | func <- function(arg1) {return(data.frame(hello = arg1))} 29 | createSprocFromFunction(connectionString, name = "fun", 30 | func = func, inputParams = list(arg1="character")) 31 | if (checkSproc(connectionString, "fun")) 32 | { 33 | print("Function 'fun' exists!") 34 | executeSproc(connectionString, "fun", arg1="WORLD") 35 | } 36 | } 37 | 38 | 39 | } 40 | \seealso{ 41 | { 42 | \code{\link{createSprocFromFunction}} 43 | 44 | \code{\link{dropSproc}} 45 | 46 | \code{\link{executeSproc}} 47 | 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /Python/LICENSE.txt: -------------------------------------------------------------------------------- 1 | ------------------------------------------- START OF LICENSE ----------------------------------------- 2 | sqlmlutils 3 | 4 | MIT License 5 | 6 | Copyright (c) Microsoft Corporation. 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this software and associated documentation files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in all 16 | copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | SOFTWARE 25 | ----------------------------------------------- END OF LICENSE ------------------------------------------ 26 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | ------------------------------------------- START OF LICENSE ----------------------------------------- 2 | sqlmlutils 3 | 4 | MIT License 5 | 6 | Copyright (c) Microsoft Corporation. All rights reserved. 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this software and associated documentation files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in all 16 | copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | SOFTWARE 25 | ----------------------------------------------- END OF LICENSE ------------------------------------------ 26 | -------------------------------------------------------------------------------- /R/LICENSE: -------------------------------------------------------------------------------- 1 | ------------------------------------------- START OF LICENSE ----------------------------------------- 2 | sqlmlutils 3 | 4 | MIT License 5 | 6 | Copyright (c) Microsoft Corporation. All rights reserved. 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this software and associated documentation files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in all 16 | copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | SOFTWARE 25 | ----------------------------------------------- END OF LICENSE ------------------------------------------ 26 | -------------------------------------------------------------------------------- /R/R/sqlmlutils.R: -------------------------------------------------------------------------------- 1 | #' @section Executing in SQL: 2 | #' 3 | #' \itemize{ 4 | #' \item \code{\link{connectionInfo}}: Creates a connection string from a set of parameters 5 | #' 6 | #' \item \code{\link{executeScriptInSQL}}: Executes a script file inside SQL 7 | #' 8 | #' \item \code{\link{executeFunctionInSQL}}: Executes a user function inside SQL 9 | #' 10 | #' \item \code{\link{executeSQLQuery}}: Executes a SQL query and returns the resultant table 11 | #' } 12 | #' 13 | #' @section Stored Procedures: 14 | #' \itemize{ 15 | #' \item \code{\link{createSprocFromFunction}}: Creates a stored procedure from a custom R function 16 | #' 17 | #' \item \code{\link{createSprocFromScript}}: Creates a stored procedure from a custom R script file 18 | #' 19 | #' \item \code{\link{dropSproc}}: Drops a stored procedure from the database 20 | #' 21 | #' \item \code{\link{executeSproc}}: Executes a stored procedure that is already in the database 22 | #' 23 | #' \item \code{\link{checkSproc}}: Checks if a stored procedure is already in the database 24 | #' } 25 | #' 26 | #' @section Package Management: 27 | #' 28 | #' \itemize{ 29 | #' \item \code{\link{sql_install.packages}}: Installs packages on a SQL Server 30 | #' 31 | #' \item \code{\link{sql_remove.packages}}: Removes packages from a SQL Server 32 | #' 33 | #' \item \code{\link{sql_installed.packages}}: Enumerates the installed packages on a SQL Server 34 | #' } 35 | #' @keywords package 36 | "_PACKAGE" 37 | #> [1] "_PACKAGE" 38 | -------------------------------------------------------------------------------- /R/tests/testthat/test.sqlPackage.unit.R: -------------------------------------------------------------------------------- 1 | # Copyright(c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | 4 | library(sqlmlutils) 5 | library(testthat) 6 | 7 | context("Tests for sqlmlutils package management unit") 8 | 9 | 10 | test_that("checkOwner() catches bad owner parameter input", 11 | { 12 | expect_equal(sqlmlutils:::checkOwner(NULL), NULL) 13 | expect_equal(sqlmlutils:::checkOwner(''), NULL) 14 | expect_equal(sqlmlutils:::checkOwner('AirlineUserdbowner'), NULL) 15 | expect_error(sqlmlutils:::checkOwner(c('a','b'))) 16 | expect_error(sqlmlutils:::checkOwner(1)) 17 | expect_equal(sqlmlutils:::checkOwner('01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567'), NULL) 18 | expect_error(sqlmlutils:::checkOwner('012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678')) 19 | }) 20 | 21 | test_that("Package management ExtLib", { 22 | versionClass <- sqlmlutils:::sqlCheckPackageManagementVersion(connectionString = helper_getSetting("connectionStringDBO")) 23 | expect_equal(versionClass, "ExtLib") 24 | }) 25 | 26 | test_that("GetServerVersion() Returns Server Version of R Successfully",{ 27 | rversion <- sqlmlutils:::getserverVersion(connectionString = cnnstr, languageName = "R") 28 | # rversion value truncated, so R may be >= 3.5 (3.5.3) or >= 4.2 29 | expect_gte(as.double(rversion[['rversion']]), 3.5) 30 | }) 31 | -------------------------------------------------------------------------------- /R/man/executeFunctionInSQL.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/executeInSQL.R 3 | \name{executeFunctionInSQL} 4 | \alias{executeFunctionInSQL} 5 | \title{Execute a function in SQL} 6 | \usage{ 7 | executeFunctionInSQL( 8 | connectionString, 9 | func, 10 | ..., 11 | inputDataQuery = "", 12 | getScript = FALSE, 13 | languageName = "R" 14 | ) 15 | } 16 | \arguments{ 17 | \item{connectionString}{character string. The connectionString to the database} 18 | 19 | \item{func}{closure. The function to execute} 20 | 21 | \item{...}{A named list of arguments to pass into the function} 22 | 23 | \item{inputDataQuery}{character string. A string to query the database. 24 | The result of the query will be put into a data frame into the first argument in the function} 25 | 26 | \item{getScript}{boolean. Return the tsql script that would be run on the server instead of running it} 27 | 28 | \item{languageName}{string. Use a language name other than the default R, if using an EXTERNAL LANGUAGE.} 29 | } 30 | \value{ 31 | The returned value from the function 32 | } 33 | \description{ 34 | Execute a function in SQL 35 | } 36 | \examples{ 37 | \dontrun{ 38 | connection <- connectionInfo(database = "AirlineTestDB") 39 | 40 | foo <- function(in_df, arg) 41 | { 42 | list(data = in_df, value = arg) 43 | } 44 | 45 | executeFunctionInSQL(connection, foo, arg = 12345, 46 | inputDataQuery = "SELECT top 1 * from airline5000") 47 | } 48 | 49 | } 50 | \seealso{ 51 | \code{\link{executeScriptInSQL}} to execute a script file instead of a function in SQL 52 | } 53 | -------------------------------------------------------------------------------- /R/man/executeSproc.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/storedProcedure.R 3 | \name{executeSproc} 4 | \alias{executeSproc} 5 | \title{Execute a Stored Procedure} 6 | \usage{ 7 | executeSproc(connectionString, name, ..., getScript = FALSE) 8 | } 9 | \arguments{ 10 | \item{connectionString}{character string. The connectionString for the database with the stored procedure} 11 | 12 | \item{name}{character string. The name of the stored procedure in the database to execute} 13 | 14 | \item{...}{named list. Parameters to pass into the procedure. These MUST be named the same as the arguments to the function.} 15 | 16 | \item{getScript}{boolean. Return the tsql script that would be run on the server instead of running it} 17 | } 18 | \description{ 19 | Execute a Stored Procedure 20 | } 21 | \section{Warning}{ 22 | 23 | Even though you can create stored procedures with output parameters, you CANNOT currently execute them with output parameters 24 | } 25 | 26 | \examples{ 27 | \dontrun{ 28 | connectionString <- connectionInfo() 29 | 30 | dropSproc(connectionString, "fun") 31 | 32 | func <- function(arg1) {return(data.frame(hello = arg1))} 33 | createSprocFromFunction(connectionString, name = "fun", 34 | func = func, inputParams = list(arg1="character")) 35 | 36 | if (checkSproc(connectionString, "fun")) 37 | { 38 | print("Function 'fun' exists!") 39 | executeSproc(connectionString, "fun", arg1="WORLD") 40 | } 41 | } 42 | } 43 | \seealso{ 44 | { 45 | \code{\link{createSprocFromFunction}} 46 | 47 | \code{\link{dropSproc}} 48 | 49 | \code{\link{checkSproc}} 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /Python/samples/sample_stored_procedure.py: -------------------------------------------------------------------------------- 1 | # Copyright(c) Microsoft Corporation. 2 | # Licensed under the MIT license. 3 | 4 | import sqlmlutils 5 | import pytest 6 | 7 | def principal_components(input_table: str, output_table: str): 8 | import sqlalchemy 9 | from urllib import parse 10 | import pandas as pd 11 | from sklearn.decomposition import PCA 12 | 13 | # Internal ODBC connection string used by process executing inside SQL Server 14 | connection_string = "Driver=SQL Server;Server=localhost;Database=AirlineTestDB;Trusted_Connection=Yes;" 15 | engine = sqlalchemy.create_engine("mssql+pyodbc:///?odbc_connect={}".format(parse.quote_plus(connection_string))) 16 | 17 | input_df = pd.read_sql("select top 200 ArrDelay,CRSDepTime,DayOfWeek from {}".format(input_table), engine).dropna() 18 | 19 | 20 | pca = PCA(n_components=2) 21 | components = pca.fit_transform(input_df) 22 | 23 | output_df = pd.DataFrame(components) 24 | output_df.to_sql(output_table, engine, if_exists="replace") 25 | 26 | 27 | connection = sqlmlutils.ConnectionInfo(server="localhost", database="AirlineTestDB") 28 | 29 | input_table = "airline5000" 30 | output_table = "AirlineDemoPrincipalComponents" 31 | 32 | sp_name = "SavePrincipalComponents" 33 | 34 | sqlpy = sqlmlutils.SQLPythonExecutor(connection) 35 | 36 | if sqlpy.check_sproc(sp_name): 37 | sqlpy.drop_sproc(sp_name) 38 | 39 | sqlpy.create_sproc_from_function(sp_name, principal_components) 40 | 41 | # You can check the stored procedure exists in the db with this: 42 | assert sqlpy.check_sproc(sp_name) 43 | 44 | sqlpy.execute_sproc(sp_name, input_table=input_table, output_table=output_table) 45 | 46 | sqlpy.drop_sproc(sp_name) 47 | assert not sqlpy.check_sproc(sp_name) -------------------------------------------------------------------------------- /R/man/sqlmlutils-package.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sqlmlutils.R 3 | \docType{package} 4 | \name{sqlmlutils-package} 5 | \alias{sqlmlutils} 6 | \alias{sqlmlutils-package} 7 | \title{sqlmlutils: Wraps R code into executable SQL Server stored procedures} 8 | \description{ 9 | sqlmlutils is a package designed to help users interact with SQL Server and execute R or Python code 10 | from an R/Python client. It provides a series of functions for executing functions in SQL, 11 | creating and running stored procedures, and managing packages on the database. 12 | } 13 | \section{Executing in SQL}{ 14 | 15 | 16 | \itemize{ 17 | \item \code{\link{connectionInfo}}: Creates a connection string from a set of parameters 18 | 19 | \item \code{\link{executeScriptInSQL}}: Executes a script file inside SQL 20 | 21 | \item \code{\link{executeFunctionInSQL}}: Executes a user function inside SQL 22 | 23 | \item \code{\link{executeSQLQuery}}: Executes a SQL query and returns the resultant table 24 | } 25 | } 26 | 27 | \section{Stored Procedures}{ 28 | 29 | \itemize{ 30 | \item \code{\link{createSprocFromFunction}}: Creates a stored procedure from a custom R function 31 | 32 | \item \code{\link{createSprocFromScript}}: Creates a stored procedure from a custom R script file 33 | 34 | \item \code{\link{dropSproc}}: Drops a stored procedure from the database 35 | 36 | \item \code{\link{executeSproc}}: Executes a stored procedure that is already in the database 37 | 38 | \item \code{\link{checkSproc}}: Checks if a stored procedure is already in the database 39 | } 40 | } 41 | 42 | \section{Package Management}{ 43 | 44 | 45 | \itemize{ 46 | \item \code{\link{sql_install.packages}}: Installs packages on a SQL Server 47 | 48 | \item \code{\link{sql_remove.packages}}: Removes packages from a SQL Server 49 | 50 | \item \code{\link{sql_installed.packages}}: Enumerates the installed packages on a SQL Server 51 | } 52 | } 53 | 54 | \keyword{package} 55 | -------------------------------------------------------------------------------- /R/tests/testthat/test.sqlPackage.fileNameParse.unit.R: -------------------------------------------------------------------------------- 1 | # Copyright(c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | 4 | library(sqlmlutils) 5 | library(testthat) 6 | 7 | context("Tests for sqlmlutils package management file path parsing helpers") 8 | 9 | # 10 | # A package name "should contain only (ASCII) letters, numbers and dot, have at least two 11 | # characters and start with a letter and not end in a dot. 12 | # Source: https://cran.r-project.org/doc/manuals/r-devel/R-exts.html#The-DESCRIPTION-file 13 | # Consequentially, this test ensures that the parsed package name are the characters 14 | # that appear before the first underscore (_) 15 | # 16 | test_that("getPackageNameFromFilePath outputs correct package name", { 17 | expect_equal(sqlmlutils:::getPackageNameFromFilePath(c('C:\\packages\\binaries\\data.table_1.14.6.zip')), 'data.table') 18 | expect_equal(sqlmlutils:::getPackageNameFromFilePath(c('C:\\packages\\binaries\\sqlmlutils_1.2.0.zip')), 'sqlmlutils') 19 | expect_equal(sqlmlutils:::getPackageNameFromFilePath(c('C:\\packages\\binaries\\mypackage_metadata_1.14.6.zip')), 'mypackage') 20 | expect_equal(sqlmlutils:::getPackageNameFromFilePath(c('C:\\packages\\binaries\\st_1.2.7.zip')), 'st') 21 | }) 22 | 23 | # 24 | # Tests that checking for file existance on invalid filepaths properly fails. 25 | # 26 | test_that("areValidFilesPaths fails when files do not exist", { 27 | # Using 1 as a default value, functions under test don't use the value 28 | # for any calculations 29 | topMostPackageFlagAttribute <- 1 30 | 31 | # Generate list of sample file paths that would be provided by a user to sql_install.packages() 32 | fileList <- c('C:\\packages\\binaries\\data.table_1.14.6.zip') 33 | fileList <- append(fileList, c('C:\\packages\\binaries\\sqlmlutils_1.2.0.zip')) 34 | fileList <- append(fileList, c('C:\\packages\\binaries\\st_1.2.7.zip')) 35 | 36 | # As this is a unit test, the sample files in fileList do not actually exist 37 | expect_error(sqlmlutils:::areValidFilesPaths(pkgs = fileList)) 38 | }) 39 | -------------------------------------------------------------------------------- /R/tests/testthat/helper-Setup.R: -------------------------------------------------------------------------------- 1 | # Copyright(c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | 4 | library(sqlmlutils) 5 | library(methods) 6 | library(testthat) 7 | 8 | options(keep.source = TRUE) 9 | Sys.setenv(TZ='GMT') 10 | 11 | Sysname <- Sys.info()['sysname'] 12 | cat("INFO: sysname=", Sysname, "\n", sep = "") 13 | 14 | Driver <- Sys.getenv("DRIVER") 15 | if (Driver == '') 16 | { 17 | if(Sysname == "Windows") 18 | { 19 | Driver <- "SQL Server" 20 | } 21 | else 22 | { 23 | Driver <- "ODBC Driver 17 for SQL Server" 24 | } 25 | } 26 | 27 | cat("INFO: Driver=", Driver, "\n", sep = "") 28 | 29 | Server <- Sys.getenv("SERVER") 30 | if (Server == '') Server <- "." 31 | 32 | Database <- Sys.getenv("DATABASE") 33 | if (Database == '') Database <- "AirlineTestDB" 34 | 35 | Uid <- Sys.getenv("USER") 36 | Pwd <- Sys.getenv("PASSWORD") 37 | PwdAirlineUserdbowner <- Sys.getenv("PASSWORD_AIRLINE_USER_DBOWNER") 38 | PwdAirlineUser <- Sys.getenv("PASSWORD_AIRLINE_USER") 39 | 40 | if(Uid == '') Uid = NULL 41 | if(Pwd == '') Pwd = NULL 42 | if(PwdAirlineUserdbowner == '') PwdAirlineUserdbowner = NULL 43 | if(PwdAirlineUser == '') PwdAirlineUser = NULL 44 | 45 | sqlcmd_path <- Sys.getenv("SQLCMD") 46 | if (sqlcmd_path == '') sqlcmd_path <- "sqlcmd" 47 | 48 | cnnstr <- connectionInfo(driver=Driver, server=Server, database=Database, uid=Uid, pwd=Pwd) 49 | 50 | testthatDir <- getwd() 51 | R_Root <- file.path(testthatDir, "../..") 52 | scriptDirectory <- file.path(testthatDir, "scripts") 53 | 54 | options(repos = c(CRAN="https://cloud.r-project.org/")) 55 | cat("INFO: repos = ", getOption("repos"), sep="\n") 56 | 57 | # Compute context specifications 58 | # 59 | TestArgs <- list( 60 | gitRoot = R_Root, 61 | testDirectory = testthatDir, 62 | scriptDirectory = scriptDirectory, 63 | driver=Driver, 64 | server=Server, 65 | database=Database, 66 | uid=Uid, 67 | pwd=Pwd, 68 | pwdAirlineUserdbowner = PwdAirlineUserdbowner, 69 | pwdAirlineUser = PwdAirlineUser, 70 | connectionString = cnnstr, 71 | sqlcmd = sqlcmd_path 72 | ) 73 | 74 | options(TestArgs = TestArgs) 75 | rm(TestArgs) 76 | -------------------------------------------------------------------------------- /R/man/sql_installed.packages.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sqlPackage.R 3 | \name{sql_installed.packages} 4 | \alias{sql_installed.packages} 5 | \title{sql_installed.packages} 6 | \usage{ 7 | sql_installed.packages( 8 | connectionString, 9 | priority = NULL, 10 | noCache = FALSE, 11 | fields = "Package", 12 | subarch = NULL, 13 | scope = "private", 14 | owner = "", 15 | scriptFile = NULL, 16 | languageName = "R" 17 | ) 18 | } 19 | \arguments{ 20 | \item{connectionString}{ODBC connection string to Microsoft SQL Server database.} 21 | 22 | \item{priority}{character vector or NULL (default). If non-null, used to select packages; "high" is equivalent to c("base", "recommended"). To select all packages without an assigned priority use priority = "NA".} 23 | 24 | \item{noCache}{logical. If TRUE, do not use cached information, nor cache it.} 25 | 26 | \item{fields}{a character vector giving the fields to extract from each package's DESCRIPTION file, or NULL. If NULL, the following fields are used: "Package", "LibPath", "Version", "Priority", "Depends", "Imports", "LinkingTo", "Suggests", "Enhances", "License", "License_is_FOSS", "License_restricts_use", "OS_type", "MD5sum", "NeedsCompilation", and "Built". Unavailable fields result in NA values.} 27 | 28 | \item{subarch}{character string or NULL. If non-null and non-empty, used to select packages which are installed for that sub-architecture} 29 | 30 | \item{scope}{character string which can be "private" or "public".} 31 | 32 | \item{owner}{character string of a user whose private packages shall be listed (availableto dbo or db_owner users only)} 33 | 34 | \item{scriptFile}{character string - a file where to record the tsql that is run by the function.} 35 | 36 | \item{languageName}{string. Use a language name other than the default R, if using an EXTERNAL LANGUAGE.} 37 | } 38 | \value{ 39 | matrix with enumerated packages 40 | } 41 | \description{ 42 | Enumerates the currently installed R packages on a SQL Server for the current database 43 | } 44 | \seealso{ 45 | { 46 | \code{\link{sql_install.packages}} to install packages 47 | 48 | \code{\link{sql_remove.packages}} to remove packages 49 | 50 | \code{\link{installed.packages}} for the base version of this function 51 | 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /R/tests/testthat/test.sqlPackage.basic.R: -------------------------------------------------------------------------------- 1 | # Copyright(c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | 4 | library(sqlmlutils) 5 | library(testthat) 6 | 7 | context("Tests for sqlmlutils package management") 8 | 9 | test_that( "successfull install and remove of package with special char in name that requires [] in t-sql", 10 | { 11 | # 12 | # Set scope to public for trusted connection on Windows 13 | # 14 | scope <- if(!helper_isServerLinux()) "public" else "private" 15 | 16 | packageName <- c("abc.data") 17 | connectionStringDBO <- helper_getSetting("connectionStringDBO") 18 | 19 | tryCatch({ 20 | # 21 | # Remove old packages if any and verify they aren't there 22 | # 23 | if (helper_remote.require( connectionStringDBO, packageName) == TRUE) 24 | { 25 | cat("\nINFO: removing package...\n") 26 | sql_remove.packages(connectionStringDBO, packageName, verbose = TRUE, scope = scope) 27 | } 28 | 29 | helper_checkPackageStatusRequire( connectionStringDBO, packageName, FALSE) 30 | 31 | # 32 | # Install single package (package has no dependencies) 33 | # 34 | output <- try(capture.output(sql_install.packages(connectionStringDBO, packageName, verbose = TRUE, scope = scope))) 35 | print(output) 36 | expect_true(!inherits(output, "try-error")) 37 | expect_equal(1, sum(grepl("Successfully installed packages on SQL server", output))) 38 | 39 | helper_checkPackageStatusRequire( connectionStringDBO, packageName, TRUE) 40 | 41 | # 42 | # Remove the installed package and check again they are gone 43 | # 44 | cat("\nINFO: removing package...\n") 45 | output <- try(capture.output(sql_remove.packages(connectionStringDBO, packageName, verbose = TRUE, scope = scope))) 46 | print(output) 47 | expect_true(!inherits(output, "try-error")) 48 | expect_equal(1, sum(grepl("Successfully removed packages from SQL server", output))) 49 | 50 | helper_checkPackageStatusRequire( connectionStringDBO, packageName, FALSE) 51 | }, finally={ 52 | helper_cleanAllExternalLibraries(connectionStringDBO) 53 | }) 54 | }) 55 | -------------------------------------------------------------------------------- /Python/sqlmlutils/connectioninfo.py: -------------------------------------------------------------------------------- 1 | # Copyright(c) Microsoft Corporation. 2 | # Licensed under the MIT license. 3 | 4 | class ConnectionInfo: 5 | """Information needed to connect to SQL Server. 6 | 7 | """ 8 | 9 | def __init__(self, driver: str = "SQL Server", server: str = "localhost", port: str = "", database: str = "master", 10 | uid: str = "", pwd: str = ""): 11 | """ 12 | :param driver: Driver to use to connect to SQL Server. 13 | :param server: SQL Server hostname or a specific instance to connect to. 14 | :param port: SQL Server port number. 15 | :param database: Database to connect to. 16 | :param uid: uid to connect with. If not specified, utilizes trusted authentication. 17 | :param pwd: pwd to connect with. If uid is not specified, pwd is ignored; uses trusted auth instead 18 | 19 | >>> from sqlmlutils import ConnectionInfo 20 | >>> connection = ConnectionInfo(server="ServerName", database="DatabaseName", uid="Uid", pwd="Pwd") 21 | """ 22 | self._driver = driver 23 | self._server = server 24 | self._port = port 25 | self._database = database 26 | self._uid = uid 27 | self._pwd = pwd 28 | 29 | @property 30 | def driver(self): 31 | return self._driver 32 | 33 | @property 34 | def server(self): 35 | return self._server 36 | 37 | @property 38 | def port(self): 39 | return self._port 40 | 41 | @property 42 | def database(self): 43 | return self._database 44 | 45 | @property 46 | def uid(self): 47 | return self._uid 48 | 49 | @property 50 | def pwd(self): 51 | return self._pwd 52 | 53 | @property 54 | def connection_string(self): 55 | server = self._server if self._port == "" \ 56 | else "{server},{port}".format(server=self._server, port=self._port) 57 | 58 | auth = "Trusted_Connection=Yes" if self._uid == "" \ 59 | else "uid={uid};pwd={{{pwd}}}".format(uid=self._uid, pwd=self._pwd) 60 | 61 | return "Driver={driver};Server={server};Database={database};{auth};".format( 62 | driver = self._driver, 63 | server = server, 64 | database = self._database, 65 | auth = auth 66 | ) -------------------------------------------------------------------------------- /R/man/sql_remove.packages.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sqlPackage.R 3 | \name{sql_remove.packages} 4 | \alias{sql_remove.packages} 5 | \title{sql_remove.packages} 6 | \usage{ 7 | sql_remove.packages( 8 | connectionString, 9 | pkgs, 10 | dependencies = TRUE, 11 | checkReferences = TRUE, 12 | verbose = getOption("verbose"), 13 | scope = "private", 14 | owner = "", 15 | scriptFile = NULL, 16 | languageName = "R" 17 | ) 18 | } 19 | \arguments{ 20 | \item{connectionString}{ODBC connection string to SQL Server database.} 21 | 22 | \item{pkgs}{character vector of names of the packages to be removed.} 23 | 24 | \item{dependencies}{logical. If TRUE, does dependency resolution of the packages being removed and removes the dependent packages also if the dependent packages aren't referenced by other packages outside the dependency closure.} 25 | 26 | \item{checkReferences}{logical. If TRUE, verifies there are no references to the dependent packages by other packages outside the dependency closure. Use FALSE to force removal of packages even when other packages depend on it.} 27 | 28 | \item{verbose}{logical. If TRUE, more detailed information is given during removal of packages.} 29 | 30 | \item{scope}{character string. Should be either "public" or "private". "public" removes the packages from a per-database public location on SQL Server which in turn could have been used (referred) by multiple different users. "private" removes the packages from a per-database, per-user private location on SQL Server which is only accessible to the single user.} 31 | 32 | \item{owner}{character string. Should be either empty '' or a valid SQL database user account name. Only 'dbo' or users in 'db_owner' role for a database can specify this value to remove packages on behalf of other users. A user who is member of the 'db_owner' group can set owner='dbo' to remove packages from the "public" folder.} 33 | 34 | \item{scriptFile}{character string - a file where to record the tsql that is run by the function.} 35 | 36 | \item{languageName}{string. Use a language name other than the default R, if using an EXTERNAL LANGUAGE.} 37 | } 38 | \value{ 39 | invisible(NULL) 40 | } 41 | \description{ 42 | Removes R packages from a SQL Server database. 43 | } 44 | \seealso{ 45 | { 46 | \code{\link{sql_install.packages}} to install packages 47 | 48 | \code{\link{sql_installed.packages}} to enumerate the installed packages 49 | 50 | \code{\link{remove.packages}} for the base version of this function 51 | 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /Python/sqlmlutils/packagemanagement/dependencyresolver.py: -------------------------------------------------------------------------------- 1 | # Copyright(c) Microsoft Corporation. 2 | # Licensed under the MIT license. 3 | 4 | import operator 5 | 6 | from pkg_resources import Requirement 7 | from distutils.version import LooseVersion 8 | 9 | class DependencyResolver: 10 | 11 | def __init__(self, server_packages, target_package): 12 | self._server_packages = server_packages 13 | self._target_package = target_package 14 | 15 | def requirement_met(self, upgrade: bool, version: str = None) -> bool: 16 | exists = self._package_exists_on_server(self._target_package) 17 | return exists and (not upgrade or 18 | (version is not None and self.get_target_server_version() != "" and 19 | LooseVersion(self.get_target_server_version()) >= LooseVersion(version))) 20 | 21 | def get_target_server_version(self): 22 | for package in self._server_packages: 23 | if package[0].lower() == self._target_package.lower(): 24 | return package[1] 25 | return "" 26 | 27 | def get_required_installs(self, target_requirements): 28 | required_packages = [] 29 | for requirement in target_requirements: 30 | reqmet = self._package_exists_on_server(requirement.name) 31 | 32 | reqmet = reqmet and self._check_if_installed_package_meets_spec( 33 | self._server_packages, requirement) 34 | 35 | if not reqmet or requirement.name == self._target_package: 36 | required_packages.append(self.clean_requirement_name(requirement.name)) 37 | return required_packages 38 | 39 | def _package_exists_on_server(self, pkgname): 40 | return any([self.clean_requirement_name(pkgname.lower()) == 41 | self.clean_requirement_name(serverpkg[0].lower()) 42 | for serverpkg in self._server_packages]) 43 | 44 | @staticmethod 45 | def clean_requirement_name(reqname: str): 46 | return reqname.replace("-", "_") 47 | 48 | @staticmethod 49 | def _check_if_installed_package_meets_spec(package_tuples, requirement): 50 | installed_package_name_and_version = [package for package in package_tuples \ 51 | if DependencyResolver.clean_requirement_name(requirement.name.lower()) == \ 52 | DependencyResolver.clean_requirement_name(package[0].lower())] 53 | 54 | if not installed_package_name_and_version: 55 | return False 56 | 57 | installed_version = installed_package_name_and_version[0][1] 58 | return Requirement.parse(requirement.line).specifier.contains(installed_version) 59 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## Security 4 | 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/). 6 | 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/opensource/security/definition), please report it to us as described below. 8 | 9 | ## Reporting Security Issues 10 | 11 | **Please do not report security vulnerabilities through public GitHub issues.** 12 | 13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/opensource/security/create-report). 14 | 15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/opensource/security/pgpkey). 16 | 17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://aka.ms/opensource/security/msrc). 18 | 19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue: 20 | 21 | * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.) 22 | * Full paths of source file(s) related to the manifestation of the issue 23 | * The location of the affected source code (tag/branch/commit or direct URL) 24 | * Any special configuration required to reproduce the issue 25 | * Step-by-step instructions to reproduce the issue 26 | * Proof-of-concept or exploit code (if possible) 27 | * Impact of the issue, including how an attacker might exploit the issue 28 | 29 | This information will help us triage your report more quickly. 30 | 31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/opensource/security/bounty) page for more details about our active programs. 32 | 33 | ## Preferred Languages 34 | 35 | We prefer all communications to be in English. 36 | 37 | ## Policy 38 | 39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/opensource/security/cvd). 40 | 41 | 42 | -------------------------------------------------------------------------------- /R/man/sql_install.packages.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sqlPackage.R 3 | \name{sql_install.packages} 4 | \alias{sql_install.packages} 5 | \title{sql_install.packages} 6 | \usage{ 7 | sql_install.packages( 8 | connectionString, 9 | pkgs, 10 | skipMissing = FALSE, 11 | repos, 12 | verbose = getOption("verbose"), 13 | scope = "private", 14 | owner = "", 15 | scriptFile = NULL, 16 | languageName = "R" 17 | ) 18 | } 19 | \arguments{ 20 | \item{connectionString}{ODBC connection string to Microsoft SQL Server database.} 21 | 22 | \item{pkgs}{character vector of the names of packages whose current versions should be downloaded from the repositories. If repos = NULL, a character vector of file paths of .zip files containing binary builds of packages. (http:// and file:// URLs are also accepted and the files will be downloaded and installed from local copies).} 23 | 24 | \item{skipMissing}{logical. If TRUE, skips missing dependent packages for which otherwise an error is generated.} 25 | 26 | \item{repos}{character vector, the base URL(s) of the repositories to use.Can be NULL to install from local files, directories.} 27 | 28 | \item{verbose}{logical. If TRUE, more detailed information is given during installation of packages.} 29 | 30 | \item{scope}{character string. Should be either "public" or "private". "public" installs the packages on per database public location on SQL server which in turn can be used (referred) by multiple different users. "private" installs the packages on per database, per user private location on SQL server which is only accessible to the single user.} 31 | 32 | \item{owner}{character string. Should be either empty '' or a valid SQL database user account name. Only 'dbo' or users in 'db_owner' role for a database can specify this value to install packages on behalf of other users. A user who is member of the 'db_owner' group can set owner='dbo' to install on the "public" folder.} 33 | 34 | \item{scriptFile}{character string - a file where to record the tsql that is run by the function.} 35 | 36 | \item{languageName}{string. Use a language name other than the default R, if using an EXTERNAL LANGUAGE.} 37 | } 38 | \value{ 39 | invisible(NULL) 40 | } 41 | \description{ 42 | Installs R packages on a SQL Server database. Packages are downloaded on the client and then copied and installed to SQL Server into "public" and "private" folders. Packages in the "public" folders can be loaded by all database users running R script in SQL. Packages in the "private" folder can be loaded only by a single user. 'dbo' users always install into the "public" folder. Users who are members of the 'db_owner' role can install to both "public" and "private" folders. All other users can only install packages to their "private" folder. 43 | } 44 | \seealso{ 45 | { 46 | \code{\link{sql_remove.packages}} to remove packages 47 | 48 | \code{\link{sql_installed.packages}} to enumerate the installed packages 49 | 50 | \code{\link{install.packages}} for the base version of this function 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /R/man/createSprocFromFunction.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/storedProcedure.R 3 | \name{createSprocFromFunction} 4 | \alias{createSprocFromFunction} 5 | \alias{createSprocFromScript} 6 | \title{Create a Stored Procedure} 7 | \usage{ 8 | createSprocFromFunction( 9 | connectionString, 10 | name, 11 | func, 12 | inputParams = NULL, 13 | outputParams = NULL, 14 | getScript = FALSE, 15 | languageName = "R" 16 | ) 17 | 18 | createSprocFromScript( 19 | connectionString, 20 | name, 21 | script, 22 | inputParams = NULL, 23 | outputParams = NULL, 24 | getScript = FALSE, 25 | languageName = "R" 26 | ) 27 | } 28 | \arguments{ 29 | \item{connectionString}{character string. The connectionString to the database} 30 | 31 | \item{name}{character string. The name of the stored procedure} 32 | 33 | \item{func}{closure. The function to wrap in the stored procedure} 34 | 35 | \item{inputParams}{named list. The types of the inputs, 36 | where the names are the arguments and the values are the types} 37 | 38 | \item{outputParams}{named list. The types of the outputs, 39 | where the names are the arguments and the values are the types} 40 | 41 | \item{getScript}{boolean. Return the tsql script that would be run on the server instead of running it} 42 | 43 | \item{languageName}{string. Use a language name other than the default R, if using an EXTERNAL LANGUAGE.} 44 | 45 | \item{script}{character string. The path to the script to wrap in the stored procedure} 46 | } 47 | \value{ 48 | Invisibly returns the script used to create the stored procedure 49 | } 50 | \description{ 51 | This function creates a stored procedure from a function 52 | on the database and return the object. 53 | } 54 | \section{Functions}{ 55 | \itemize{ 56 | \item \code{createSprocFromFunction}: Create stored procedure from function 57 | 58 | \item \code{createSprocFromScript}: Create stored procedure from script file, returns output of final line 59 | }} 60 | 61 | \section{Warning}{ 62 | 63 | You can add output parameters to the stored procedure 64 | but you will not be able to execute the procedure from R afterwards. 65 | Any stored procedure with output params must be executed directly in SQL. 66 | } 67 | 68 | \examples{ 69 | \dontrun{ 70 | connectionString <- connectionInfo() 71 | 72 | ### Using a function 73 | dropSproc(connectionString, "fun") 74 | 75 | func <- function(arg1) {return(data.frame(hello = arg1))} 76 | createSprocFromFunction(connectionString, name = "fun", 77 | func = func, inputParams = list(arg1="character")) 78 | 79 | if (checkSproc(connectionString, "fun")) 80 | { 81 | print("Function 'fun' exists!") 82 | executeSproc(connectionString, "fun", arg1="WORLD") 83 | } 84 | 85 | ### Using a script 86 | createSprocFromScript(connectionString, name = "funScript", 87 | script = "path/to/script", inputParams = list(arg1="character")) 88 | 89 | } 90 | 91 | 92 | 93 | } 94 | \seealso{ 95 | { 96 | \code{\link{dropSproc}} 97 | 98 | \code{\link{executeSproc}} 99 | 100 | \code{\link{checkSproc}} 101 | } 102 | } 103 | -------------------------------------------------------------------------------- /Python/sqlmlutils/packagemanagement/pipdownloader.py: -------------------------------------------------------------------------------- 1 | # Copyright(c) Microsoft Corporation. 2 | # Licensed under the MIT license. 3 | 4 | import os 5 | import re 6 | import requirements 7 | import subprocess 8 | import sys 9 | 10 | from sqlmlutils import ConnectionInfo, SQLPythonExecutor 11 | from sqlmlutils.packagemanagement import servermethods 12 | 13 | class PipDownloader: 14 | 15 | def __init__(self, connection: ConnectionInfo, downloaddir: str, targetpackage: str, language_name: str): 16 | self._connection = connection 17 | self._downloaddir = downloaddir 18 | self._targetpackage = targetpackage 19 | self._language_name = language_name 20 | server_info = SQLPythonExecutor(connection, self._language_name).execute_function_in_sql(servermethods.get_server_info) 21 | globals().update(server_info) 22 | 23 | def download(self): 24 | return self._download(True) 25 | 26 | def download_single(self) -> str: 27 | _, pkgsdownloaded = self._download(False) 28 | return pkgsdownloaded[0] 29 | 30 | def _download(self, withdependencies): 31 | # This command directs pip to download the target package, as well as all of its dependencies into 32 | # temporary_directory. 33 | commands = ["download", self._targetpackage, "--destination-dir", self._downloaddir, "--no-cache-dir"] 34 | if not withdependencies: 35 | commands.append("--no-dependencies") 36 | 37 | output, error = self._run_in_new_process(commands) 38 | 39 | pkgreqs = self._get_reqs_from_output(output) 40 | 41 | packagesdownloaded = [os.path.join(self._downloaddir, f) for f in os.listdir(self._downloaddir) 42 | if os.path.isfile(os.path.join(self._downloaddir, f))] 43 | 44 | if len(packagesdownloaded) <= 0: 45 | raise RuntimeError("Failed to download any packages, pip returned error: " + error) 46 | 47 | return pkgreqs, packagesdownloaded 48 | 49 | def _run_in_new_process(self, commands): 50 | # We get the package requirements based on the print output of pip, which is stable across version 8-10. 51 | # TODO: get requirements in a more robust way (either through using pip internal code or rolling our own) 52 | download_script = os.path.join((os.path.dirname(os.path.realpath(__file__))), "download_script.py") 53 | exe_path = sys.executable if sys.executable is not None else "python" 54 | args = [exe_path, download_script, 55 | str(_patch_get_impl_version_info()), str(_patch_get_abbr_impl()), 56 | str(_patch_get_abi_tag()), str(_patch_get_platform()), 57 | ",".join(str(x) for x in commands)] 58 | 59 | with subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) as proc: 60 | output = proc.stdout.read() 61 | error = proc.stderr.read() 62 | 63 | return output.decode(), error.decode() 64 | 65 | @staticmethod 66 | def _get_reqs_from_output(pipoutput: str): 67 | # TODO: get requirements in a more robust way (either through using pip internal code or rolling our own) 68 | collectinglines = [line for line in pipoutput.splitlines() if "Collecting" in line] 69 | 70 | f = lambda unclean: \ 71 | re.sub(r'\(.*\)', "", unclean.replace("Collecting ", "").strip()) 72 | 73 | reqstr = "\n".join([f(line) for line in collectinglines]) 74 | return list(requirements.parse(reqstr)) 75 | 76 | 77 | def _patch_get_impl_version_info(): 78 | return globals()["impl_version_info"] 79 | 80 | 81 | def _patch_get_abbr_impl(): 82 | return globals()["abbr_impl"] 83 | 84 | 85 | def _patch_get_abi_tag(): 86 | return globals()["abi_tag"] 87 | 88 | 89 | def _patch_get_platform(): 90 | return globals()["platform"] 91 | 92 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # sqlmlutils 2 | 3 | [![BuildAndTest](https://github.com/microsoft/sqlmlutils/actions/workflows/ci.yaml/badge.svg)](https://github.com/microsoft/sqlmlutils/actions/workflows/ci.yaml) 4 | 5 | sqlmlutils is a package designed to help users interact with SQL databases (SQL Server and Azure SQL Database) and execute R or Python code in SQL from an R/Python client. 6 | Currently, only the R version of sqlmlutils is supported in Azure SQL Database. Python support will be added later. 7 | 8 | ### Check out the README in each language folder for language-specific details and code examples! 9 | 10 | # Installation 11 | 12 | To install sqlmlutils, follow the instructions below for Python and R, respectively. 13 | 14 | Python: 15 | To install from PyPI: 16 | Run 17 | ```bash 18 | pip install sqlmlutils 19 | ``` 20 | To install from file, download the latest release from https://github.com/microsoft/sqlmlutils/releases: 21 | ```bash 22 | pip install sqlmlutils-1.1.0.zip 23 | ``` 24 | 25 | R: 26 | 27 | Download the latest release from https://github.com/microsoft/sqlmlutils/releases. 28 | 29 | Windows: 30 | 31 | To obtain the version of R your server is currently using, please use this query: 32 | ```tsql 33 | EXEC sp_execute_external_script 34 | @language = N'R', 35 | @script = N' 36 | v = R.version 37 | OutputDataSet = data.frame(rversion=paste0(v$major, ".", v$minor))', 38 | @input_data_1 = N'select 1' 39 | WITH RESULT SETS ((rversion varchar(max))); 40 | ``` 41 | Get the version of R which the server is using and install it locally. Then, run the following commands with the same version of R. 42 | 43 | From command prompt, run 44 | ```bash 45 | R.exe -e "install.packages('odbc', type='binary')" 46 | R.exe CMD INSTALL sqlmlutils_1.0.0.zip 47 | ``` 48 | OR 49 | To build a new package file and install, run 50 | ```bash 51 | .\buildandinstall.cmd 52 | ``` 53 | 54 | Linux 55 | ```bash 56 | R.exe -e "install.packages('odbc')" 57 | R.exe CMD INSTALL sqlmlutils_1.0.0.tar.gz 58 | ``` 59 | 60 | # Details 61 | 62 | sqlmlutils contains 3 main parts: 63 | - Execution of Python/R in SQL databases using sp_execute_external_script 64 | - Creation and execution of stored procedures created from scripts and functions 65 | - Install and manage packages in SQL databases 66 | 67 | For more specifics and examples of how to use each language's API, look at the README in the respective folder. 68 | 69 | ## Execute in SQL 70 | 71 | Execute in SQL provides a convenient way for the user to execute arbitrary Python/R code inside a SQL database using an sp_execute_external_script. The user does not have to know any t-sql to use this function. Function arguments are serialized into binary and passed into the t-sql script that is generated. Warnings and printed output will be printed at the end of execution, and any results returned by the function will be passed back to the client. 72 | 73 | ## Stored Procedures (Sprocs) 74 | 75 | The goal of this utility is to allow users to create and execute stored procedures on their database without needing to know the exact syntax of creating one. Functions and scripts are wrapped into a stored procedure and registered into a database, then can be executed from the Python/R client. 76 | 77 | ## Package Management 78 | 79 | ##### R and Python package management with sqlmlutils is supported in SQL Server 2019 CTP 2.4 and later. 80 | 81 | With package management users can install packages to a remote SQL database from a client machine. The packages are downloaded on the client and then sent over to SQL databases where they will be installed into library folders. The folders are per-database so packages will always be installed and made available for a specific database. The package management APIs provided a PUBLIC and PRIVATE folders. Packages in the PUBLIC folder are accessible to all database users. Packages in the PRIVATE folder are only accessible by the user who installed the package. 82 | -------------------------------------------------------------------------------- /R/tests/testthat/test.sqlPackage.createExternalLibrary.R: -------------------------------------------------------------------------------- 1 | # Copyright(c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | 4 | library(sqlmlutils) 5 | library(testthat) 6 | 7 | context("Tests for sqlmlutils package management create external library") 8 | 9 | test_that("Package APIs interop with Create External Library", 10 | { 11 | cat("\nINFO: test if package management interops properly with packages installed directly with CREATE EXTERNAL LIBRARY\n 12 | Note:\n 13 | packages installed with CREATE EXTERNAL LIBRARY won't have top-level attribute set in extended properties\n 14 | By default we will consider them top-level packages\n") 15 | 16 | connectionStringAirlineUserdbowner <- helper_getSetting("connectionStringAirlineUserdbowner") 17 | scope <- "private" 18 | packageName <- c("glue") 19 | 20 | tryCatch({ 21 | cat("\nINFO: checking remote lib paths...\n") 22 | helper_checkSqlLibPaths(connectionStringAirlineUserdbowner, 1) 23 | 24 | # 25 | # remove old packages if any and verify they aren't there 26 | # 27 | cat("\nINFO: removing packages...\n") 28 | if (helper_remote.require( connectionStringAirlineUserdbowner, packageName) == TRUE) 29 | { 30 | sql_remove.packages( connectionStringAirlineUserdbowner, packageName, verbose = TRUE, scope = scope) 31 | } 32 | 33 | helper_checkPackageStatusRequire( connectionStringAirlineUserdbowner, packageName, FALSE) 34 | 35 | # 36 | # install the package with its dependencies and check if its present 37 | # 38 | repoDir <- file.path(tempdir(), "repo") 39 | on.exit({ 40 | if ( dir.exists(repoDir)){ 41 | unlink( repoDir, recursive = TRUE , force = TRUE) 42 | } 43 | }) 44 | 45 | dir.create( repoDir, recursive = TRUE) 46 | download.packages( c("glue"), destdir = repoDir, type = "win.binary" ) 47 | pkgPath <- list.files(repoDir, pattern = "glue.+zip", full.names = TRUE, ignore.case = TRUE) 48 | cat(sprintf("\nTEST: install package using CREATE EXTERNAL LIBRARY: pkg=%s...\n", pkgPath)) 49 | 50 | fileConnection = file(pkgPath, 'rb') 51 | pkgBin = readBin(con = fileConnection, what = raw(), n = file.size(pkgPath)) 52 | close(fileConnection) 53 | pkgContent = paste0("0x", paste0(pkgBin, collapse = "") ); 54 | 55 | output <- try(capture.output( 56 | helper_CreateExternalLibrary(connectionString = connectionStringAirlineUserdbowner, packageName = packageName, content = pkgContent) 57 | )) 58 | 59 | expect_true(!inherits(output, "try-error")) 60 | 61 | output <- try(capture.output( 62 | helper_callDummySPEES( connectionString = connectionStringAirlineUserdbowner) 63 | )) 64 | 65 | expect_true(!inherits(output, "try-error")) 66 | 67 | 68 | helper_checkPackageStatusFind( connectionStringAirlineUserdbowner, packageName, TRUE) 69 | 70 | # Enumerate packages and check that package is listed as top-level 71 | # 72 | cat("\nTEST: enumerate packages and check that package is listed as top-level...\n") 73 | installedPkgs <- helper_tryCatchValue( sql_installed.packages(connectionString = connectionStringAirlineUserdbowner, fields=c("Package", "Attributes", "Scope"))) 74 | 75 | expect_true(!inherits(installedPkgs$value, "try-error")) 76 | expect_equal(1, as.integer(installedPkgs$value['glue','Attributes']), msg=sprintf(" (expected package listed as top-level: pkg=%s)", packageName)) 77 | 78 | # Remove package 79 | # 80 | cat("\nTEST: remove package previously installed with CREATE EXTERNAL LIBRARY...\n") 81 | output <- try(capture.output(sql_remove.packages( connectionStringAirlineUserdbowner, packageName, verbose = TRUE, scope = scope))) 82 | expect_true(!inherits(output, "try-error")) 83 | expect_equal(1, sum(grepl("Successfully removed packages from SQL server", output))) 84 | helper_checkPackageStatusRequire( connectionStringAirlineUserdbowner, packageName, FALSE) 85 | }, finally={ 86 | helper_cleanAllExternalLibraries(connectionStringAirlineUserdbowner) 87 | }) 88 | }) 89 | -------------------------------------------------------------------------------- /.github/workflows/ci.yaml: -------------------------------------------------------------------------------- 1 | # This workflow uses actions that are not certified by GitHub. 2 | # They are provided by a third-party and are governed by 3 | # separate terms of service, privacy policy, and support 4 | # documentation. 5 | # 6 | # See https://github.com/r-lib/actions/tree/master/examples#readme for 7 | # additional example workflows available for the R community. 8 | 9 | name: BuildAndTest 10 | 11 | on: 12 | push: 13 | branches: [ master ] 14 | pull_request: 15 | branches: [ master ] 16 | workflow_dispatch: 17 | 18 | env: 19 | USER: ${{ secrets.USER }} 20 | PASSWORD: ${{ secrets.PASSWORD }} 21 | PASSWORD_AIRLINE_USER: ${{ secrets.PASSWORD_AIRLINE_USER }} 22 | PASSWORD_AIRLINE_USER_DBOWNER: ${{ secrets.PASSWORD_AIRLINE_USER_DBOWNER }} 23 | DRIVER: "{ODBC Driver 17 for SQL Server}" 24 | 25 | jobs: 26 | R: 27 | runs-on: [self-hosted, 1ES.Pool=sqlmlutils_GH_RunnerPool] 28 | strategy: 29 | matrix: 30 | include: 31 | - r-version: "3.5.2" 32 | sql-platform: "box" 33 | - r-version: "4.2.0" 34 | sql-platform: "box" 35 | 36 | env: 37 | # Define CI to skip some test case. 38 | CI: True 39 | DATABASE: ${{ secrets.DATABASE }} 40 | 41 | defaults: 42 | run: 43 | shell: cmd 44 | 45 | steps: 46 | - name: Set SQL Server 2019 Env_var for R 3.5.2 47 | if: matrix.r-version == '3.5.2' && matrix.sql-platform == 'box' 48 | run: echo SERVER=${{ secrets.SQL19SERVER }}>> %GITHUB_ENV% 49 | shell: cmd 50 | 51 | - name: Set SQL Server 2022 Env_var for R 4.2.0 52 | if: matrix.r-version == '4.2.0' && matrix.sql-platform == 'box' 53 | run: echo SERVER=${{ secrets.SQL22SERVER }}>> %GITHUB_ENV% 54 | shell: cmd 55 | 56 | - name: Check Connectivity to SQL Database 57 | run: | 58 | sqlcmd -S tcp:%SERVER%,1433 -U %USER% -P %PASSWORD% -d %DATABASE% -l 5 -Q "SELECT @@VERSION" 59 | shell: cmd 60 | 61 | - name: Checkout Branch 62 | uses: actions/checkout@v2 63 | 64 | - name: Set up R ${{ matrix.r-version }} Runtime 65 | uses: r-lib/actions/setup-r@v2 66 | with: 67 | r-version: ${{ matrix.r-version }} 68 | 69 | - name: Install R Package Dependencies 70 | uses: r-lib/actions/setup-r-dependencies@v2 71 | with: 72 | cache-version: 2 73 | working-directory: ./R 74 | extra-packages: 75 | #Retrieves most recent odbc pkg from cran to avoid errors seen in older versions. 76 | #Updated odbc pkg is still compatible with R >= 3.2.0 77 | cran::odbc 78 | cran::xml2 79 | rcmdcheck 80 | 81 | - uses: r-lib/actions/check-r-package@v2 82 | with: 83 | working-directory: ./R 84 | 85 | Python: 86 | runs-on: [self-hosted, 1ES.Pool=sqlmlutils_GH_RunnerPool] 87 | strategy: 88 | fail-fast: true 89 | matrix: 90 | include: 91 | - python-version: "3.7.1" 92 | sql-platform: "box" 93 | - python-version: "3.10.5" 94 | sql-platform: "box" 95 | 96 | env: 97 | CI: True 98 | DATABASE: ${{ secrets.DATABASE_PYTHON }} 99 | 100 | steps: 101 | - name: Set SQL Server 2019 Env_var for Python 3.7.1 102 | if: matrix.python-version == '3.7.1' && matrix.sql-platform == 'box' 103 | run: echo SERVER=${{ secrets.SQL19SERVER }}>> %GITHUB_ENV% 104 | shell: cmd 105 | 106 | - name: Set SQL Server 2022 Env_var for Python 3.10.5 107 | if: matrix.python-version == '3.10.5' && matrix.sql-platform == 'box' 108 | run: echo SERVER=${{ secrets.SQL22SERVER }}>> %GITHUB_ENV% 109 | shell: cmd 110 | 111 | - name: Check Connectivity SQL Database 112 | run: | 113 | sqlcmd -S tcp:%SERVER%,1433 -U %USER% -P %PASSWORD% -d %DATABASE% -l 5 -Q "SELECT @@VERSION" 114 | shell: cmd 115 | 116 | - name: Checkout Branch 117 | uses: actions/checkout@v2 118 | 119 | - name: Set up Python ${{ matrix.python-version }} 120 | uses: actions/setup-python@v2 121 | with: 122 | python-version: ${{ matrix.python-version }} 123 | 124 | - name: Install dependencies 125 | working-directory: ./Python 126 | run: | 127 | python -m pip install --upgrade pip 128 | python -m pip install flake8 pytest 129 | pip install -r requirements.txt 130 | 131 | - name: Build Python Package 132 | working-directory: ./Python 133 | run: ./buildandinstall.cmd 134 | 135 | - name: Run pytest 136 | working-directory: ./Python/tests 137 | run: | 138 | pytest 139 | -------------------------------------------------------------------------------- /R/tests/testthat/test.sqlPackage.scope.R: -------------------------------------------------------------------------------- 1 | # Copyright(c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | 4 | library(sqlmlutils) 5 | library(testthat) 6 | 7 | context("Tests for sqlmlutils package management scope") 8 | 9 | test_that("dbo cannot install package into private scope", 10 | { 11 | skip_if(helper_isServerLinux(), "Linux tests do not have support for Trusted user." ) 12 | 13 | connectionStringDBO <- helper_getSetting("connectionStringDBO") 14 | packageName <- c("xtable") 15 | 16 | tryCatch({ 17 | output <- try(capture.output(sql_install.packages(connectionString = connectionStringDBO, packageName, verbose = TRUE, scope="private"))) 18 | expect_true(inherits(output, "try-error")) 19 | expect_equal(1, sum(grepl("Permission denied for installing packages on SQL server for current user", output))) 20 | helper_checkPackageStatusRequire( connectionString = connectionStringDBO, packageName, FALSE) 21 | }, finally={ 22 | helper_cleanAllExternalLibraries(connectionStringDBO) 23 | }) 24 | }) 25 | 26 | test_that( "package install and remove, PUBLIC scope", 27 | { 28 | skip_if(helper_isServerLinux(), "Linux tests do not have support for Trusted user." ) 29 | 30 | connectionStringDBO <- helper_getSetting("connectionStringDBO") 31 | 32 | tryCatch({ 33 | packageName <- c("A3") 34 | 35 | owner <- "" 36 | cat("\nTEST: connection string='",connectionStringDBO,"'\n", sep="") 37 | 38 | cat("\nTEST: owner is set to: owner='",owner,"'\n", sep="") 39 | 40 | # Extract the server and database names from the connection string supplied by the execution environment 41 | connSplit <- helper_parseConnectionString(helper_getSetting("connectionStringDBO")) 42 | 43 | # 44 | # --- dbo user install and remove tests --- 45 | # 46 | 47 | # 48 | # remove packages from both public scope 49 | # 50 | cat("\nTEST: removing packages from public scope...\n") 51 | try(sql_remove.packages( connectionStringDBO, packageName, scope = 'public', owner = owner, verbose = TRUE)) 52 | helper_checkPackageStatusFind(connectionStringDBO, packageName, FALSE) 53 | 54 | # 55 | # install package in public scope 56 | # 57 | cat("\nTEST: dbo: installing packages in public scope...\n") 58 | sql_install.packages( connectionStringDBO, packageName, scope = 'public', owner = owner, verbose = TRUE) 59 | helper_checkPackageStatusFind(connectionStringDBO, packageName, TRUE) 60 | 61 | # 62 | # uninstall package in public scope 63 | # 64 | cat("\nTEST: dbo: removing packages from public scope...\n") 65 | sql_remove.packages( connectionStringDBO, packageName, scope = 'public', owner = owner, verbose = TRUE) 66 | helper_checkPackageStatusFind(connectionStringDBO, packageName, FALSE) 67 | }, finally={ 68 | helper_cleanAllExternalLibraries(connectionStringDBO) 69 | }) 70 | }) 71 | 72 | test_that( "package install and remove, PRIVATE scope", 73 | { 74 | packageName <- c("A3") 75 | 76 | # 77 | # --- AirlineUser user install and remove tests --- 78 | # 79 | connectionStringAirlineUser <- helper_getSetting("connectionStringAirlineUser") 80 | 81 | tryCatch({ 82 | # 83 | # remove packages from private scope 84 | # 85 | cat("TEST: AirlineUser: removing packages from private scope...\n") 86 | try(sql_remove.packages( connectionStringAirlineUser, packageName, scope = 'private', verbose = TRUE)) 87 | helper_checkPackageStatusFind(connectionStringAirlineUser, packageName, FALSE) 88 | 89 | # 90 | # install package in private scope 91 | # 92 | cat("TEST: AirlineUser: installing packages in private scope...\n") 93 | sql_install.packages( connectionStringAirlineUser, packageName, scope = 'private', verbose = TRUE) 94 | helper_checkPackageStatusFind(connectionStringAirlineUser, packageName, TRUE) 95 | 96 | # 97 | # uninstall package in private scope 98 | # 99 | cat("TEST: AirlineUser: removing packages from private scope...\n") 100 | sql_remove.packages( connectionStringAirlineUser, packageName, scope = 'private', verbose = TRUE) 101 | helper_checkPackageStatusFind(connectionStringAirlineUser, packageName, FALSE) 102 | }, finally={ 103 | helper_cleanAllExternalLibraries(connectionStringAirlineUser) 104 | }) 105 | }) 106 | -------------------------------------------------------------------------------- /Python/sqlmlutils/sqlqueryexecutor.py: -------------------------------------------------------------------------------- 1 | # Copyright(c) Microsoft Corporation. 2 | # Licensed under the MIT license. 3 | 4 | import pyodbc 5 | import sys 6 | 7 | from pandas import DataFrame 8 | 9 | from .connectioninfo import ConnectionInfo 10 | from .sqlbuilder import SQLBuilder 11 | from .sqlbuilder import STDOUT_COLUMN_NAME, STDERR_COLUMN_NAME 12 | 13 | """This module is used to actually execute sql queries. It uses the pyodbc module under the hood. 14 | 15 | It is mostly setup to work with SQLBuilder objects as defined in sqlbuilder. 16 | """ 17 | 18 | 19 | # This function is best used to execute_function_in_sql a one off query 20 | # (the SQL connection is closed after the query completes). 21 | # If you need to keep the SQL connection open in between queries, you can use the _SQLQueryExecutor class below. 22 | def execute_query(builder, connection: ConnectionInfo, out_file:str=None): 23 | with SQLQueryExecutor(connection=connection) as executor: 24 | return executor.execute(builder, out_file=out_file) 25 | 26 | 27 | def execute_raw_query(conn: ConnectionInfo, query, params=()): 28 | with SQLQueryExecutor(connection=conn) as executor: 29 | return executor.execute_query(query, params) 30 | 31 | class SQLQueryExecutor: 32 | """_SQLQueryExecutor objects keep a SQL connection open in order to execute_function_in_sql one or more queries. 33 | 34 | This class implements the basic context manager paradigm. 35 | """ 36 | 37 | def __init__(self, connection: ConnectionInfo): 38 | self._connection = connection 39 | 40 | def execute(self, builder: SQLBuilder, out_file=None): 41 | return self.execute_query(builder.base_script, builder.params, out_file=out_file) 42 | 43 | def execute_query(self, query, params, out_file=None): 44 | df = DataFrame() 45 | output_params = None 46 | 47 | try: 48 | if out_file is not None: 49 | with open(out_file,"a") as f: 50 | if params is not None: 51 | 52 | # Convert bytearray to hex so user can run as a script 53 | # 54 | if type(params) is bytearray: 55 | params = str('0x' + params.hex()) 56 | script = query.replace("?", "%s") 57 | else: 58 | script = query.replace("?", "N'%s'") 59 | 60 | f.write(script % params) 61 | else: 62 | f.write(query) 63 | f.write("GO\n") 64 | f.write("-----------------------------") 65 | else: 66 | if params is not None: 67 | self._cursor.execute(query, params) 68 | else: 69 | self._cursor.execute(query) 70 | 71 | # Get the first resultset (OutputDataSet) 72 | # 73 | if self._cursor.description is not None: 74 | column_names = [element[0] for element in self._cursor.description] 75 | rows = [tuple(t) for t in self._cursor.fetchall()] 76 | df = DataFrame(rows, columns=column_names) 77 | if STDOUT_COLUMN_NAME in column_names: 78 | self.extract_output(dict(zip(column_names, rows[0]))) 79 | 80 | # Get output parameters 81 | # 82 | while self._cursor.nextset(): 83 | try: 84 | if self._cursor.description is not None: 85 | column_names = [element[0] for element in self._cursor.description] 86 | rows = [tuple(t) for t in self._cursor.fetchall()] 87 | output_params = dict(zip(column_names, rows[0])) 88 | 89 | if STDOUT_COLUMN_NAME in column_names: 90 | self.extract_output(output_params) 91 | 92 | except pyodbc.ProgrammingError: 93 | continue 94 | 95 | except Exception as e: 96 | raise RuntimeError("Error in SQL Execution: " + str(e)) 97 | 98 | return df, output_params 99 | 100 | def __enter__(self): 101 | self._cnxn = pyodbc.connect(self._connection.connection_string, 102 | autocommit=True) 103 | self._cursor = self._cnxn.cursor() 104 | return self 105 | 106 | def __exit__(self, exception_type, exception_value, traceback): 107 | self._cnxn.close() 108 | 109 | def extract_output(self, output_params : dict): 110 | out = output_params.pop(STDOUT_COLUMN_NAME, None) 111 | err = output_params.pop(STDERR_COLUMN_NAME, None) 112 | if out is not None: 113 | print(out) 114 | if err is not None: 115 | print(err, file=sys.stderr) -------------------------------------------------------------------------------- /R/tests/testthat/test.sqlPackage.toplevel.R: -------------------------------------------------------------------------------- 1 | # Copyright(c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | 4 | library(sqlmlutils) 5 | library(testthat) 6 | 7 | context("Tests for sqlmlutils package management top level") 8 | 9 | test_that("package top level install and remove", 10 | { 11 | connectionStringAirlineUserdbowner <- helper_getSetting("connectionStringAirlineUserdbowner") 12 | scope <- "private" 13 | 14 | tryCatch({ 15 | # 16 | # check package management is installed 17 | # 18 | cat("\nINFO: checking remote lib paths...\n") 19 | helper_checkSqlLibPaths(connectionStringAirlineUserdbowner, 1) 20 | packageName <- c("A3") 21 | dependentPackageName <- "xtable" 22 | dependentPackage2 <- "pbapply" 23 | 24 | # 25 | # remove old packages if any and verify they aren't there 26 | # 27 | if (helper_remote.require( connectionStringAirlineUserdbowner, packageName) == TRUE) 28 | { 29 | cat("\nINFO: removing package:", packageName,"\n") 30 | sql_remove.packages( connectionStringAirlineUserdbowner, packageName, verbose = TRUE, scope = scope) 31 | } 32 | 33 | # Make sure dependent package does not exist on its own 34 | # 35 | if (helper_remote.require(connectionStringAirlineUserdbowner, dependentPackageName) == TRUE) 36 | { 37 | cat("\nINFO: removing package:", dependentPackageName,"\n") 38 | sql_remove.packages( connectionStringAirlineUserdbowner, dependentPackageName, verbose = TRUE, scope = scope) 39 | } 40 | 41 | if (helper_remote.require(connectionStringAirlineUserdbowner, dependentPackage2) == TRUE) 42 | { 43 | cat("\nINFO: removing package:", dependentPackage2,"\n") 44 | sql_remove.packages( connectionStringAirlineUserdbowner, dependentPackage2, verbose = TRUE, scope = scope) 45 | } 46 | 47 | helper_checkPackageStatusRequire( connectionStringAirlineUserdbowner, packageName, FALSE) 48 | helper_checkPackageStatusRequire( connectionStringAirlineUserdbowner, dependentPackageName, FALSE) 49 | helper_checkPackageStatusRequire( connectionStringAirlineUserdbowner, dependentPackage2, FALSE) 50 | 51 | # 52 | # install the package with its dependencies and check if its present 53 | # 54 | output <- try(capture.output(sql_install.packages( connectionStringAirlineUserdbowner, packageName, verbose = TRUE, scope = scope))) 55 | expect_true(!inherits(output, "try-error")) 56 | expect_equal(1, sum(grepl("Successfully installed packages on SQL server", output))) 57 | 58 | helper_checkPackageStatusRequire( connectionStringAirlineUserdbowner, packageName, TRUE) 59 | helper_checkPackageStatusRequire( connectionStringAirlineUserdbowner, dependentPackageName, TRUE) 60 | helper_checkPackageStatusRequire( connectionStringAirlineUserdbowner, dependentPackage2, TRUE) 61 | 62 | # Promote one dependent package to top most by explicit installation 63 | # 64 | cat("\nTEST: promote dependent package to top most by explicit installation...\n") 65 | output <- try(capture.output(sql_install.packages( connectionStringAirlineUserdbowner, dependentPackageName, verbose = TRUE, scope = scope))) 66 | expect_true(!inherits(output, "try-error")) 67 | expect_equal(1, sum(grepl("Successfully attributed packages on SQL server", output))) 68 | 69 | helper_checkPackageStatusRequire( connectionStringAirlineUserdbowner, dependentPackageName, TRUE) 70 | 71 | 72 | # Remove main package and make sure the dependent, now turned top most, does not being removed 73 | # 74 | cat("\nTEST: remove main package and make sure the dependent, now turned top most, is not removed...\n") 75 | output <- try(capture.output(sql_remove.packages( connectionStringAirlineUserdbowner, packageName, verbose = TRUE, scope = scope))) 76 | expect_true(!inherits(output, "try-error")) 77 | expect_equal(1, sum(grepl("Successfully removed packages from SQL server", output))) 78 | 79 | helper_checkPackageStatusRequire( connectionStringAirlineUserdbowner, packageName, FALSE) 80 | helper_checkPackageStatusRequire( connectionStringAirlineUserdbowner, dependentPackage2, FALSE) 81 | helper_checkPackageStatusRequire( connectionStringAirlineUserdbowner, dependentPackageName, TRUE) 82 | 83 | # Make sure promoted dependent package can be removed 84 | # 85 | cat("\nTEST: remove dependent package previously promoted to top most...\n") 86 | output <- try(capture.output(sql_remove.packages( connectionStringAirlineUserdbowner, dependentPackageName, verbose = TRUE, scope = scope))) 87 | expect_true(!inherits(output, "try-error")) 88 | expect_equal(1, sum(grepl("Successfully removed packages from SQL server", output))) 89 | 90 | helper_checkPackageStatusRequire( connectionStringAirlineUserdbowner, dependentPackageName, FALSE) 91 | }, finally={ 92 | helper_cleanAllExternalLibraries(connectionStringAirlineUserdbowner) 93 | }) 94 | }) 95 | -------------------------------------------------------------------------------- /Python/tests/execute_function_test.py: -------------------------------------------------------------------------------- 1 | # Copyright(c) Microsoft Corporation. 2 | # Licensed under the MIT license. 3 | 4 | import io 5 | import os 6 | import pytest 7 | 8 | from contextlib import redirect_stdout, redirect_stderr 9 | from pandas import DataFrame 10 | 11 | from sqlmlutils import ConnectionInfo, SQLPythonExecutor 12 | from conftest import driver, server, database, uid, pwd 13 | 14 | connection = ConnectionInfo(driver=driver, 15 | server=server, 16 | database=database, 17 | uid=uid, 18 | pwd=pwd) 19 | 20 | current_dir = os.path.dirname(__file__) 21 | script_dir = os.path.join(current_dir, "scripts") 22 | 23 | sqlpy = SQLPythonExecutor(connection) 24 | 25 | def test_with_named_args(): 26 | def func_with_args(arg1, arg2): 27 | print(arg1) 28 | return arg2 29 | 30 | output = io.StringIO() 31 | with redirect_stderr(output), redirect_stdout(output): 32 | res = sqlpy.execute_function_in_sql(func_with_args, arg1="str1", arg2="str2") 33 | 34 | assert "str1" in output.getvalue() 35 | assert res == "str2" 36 | 37 | 38 | def test_with_order_args(): 39 | def func_with_order_args(arg1: int, arg2: float): 40 | return arg1 / arg2 41 | 42 | res = sqlpy.execute_function_in_sql(func_with_order_args, 2, 3.0) 43 | assert res == 2 / 3.0 44 | res = sqlpy.execute_function_in_sql(func_with_order_args, 3.0, 2) 45 | assert res == 3 / 2.0 46 | 47 | 48 | def test_return(): 49 | def func_with_return(): 50 | return "returned!" 51 | 52 | res = sqlpy.execute_function_in_sql(func_with_return) 53 | assert res == func_with_return() 54 | 55 | 56 | @pytest.mark.skip(reason="Do we capture warnings?") 57 | def test_warning(): 58 | def func_with_warning(): 59 | import warnings 60 | warnings.warn("WARNING!") 61 | 62 | res = sqlpy.execute_function_in_sql(func_with_warning) 63 | assert res is None 64 | 65 | 66 | def test_with_internal_func(): 67 | def func_with_internal_func(): 68 | def func2(arg1, arg2): 69 | return arg1 + arg2 70 | 71 | return func2("Suc", "cess") 72 | 73 | res = sqlpy.execute_function_in_sql(func_with_internal_func) 74 | assert res == "Success" 75 | 76 | 77 | @pytest.mark.skip(reason="Cannot currently return a function") 78 | def test_return_func(): 79 | def func2(arg1, arg2): 80 | return arg1 + arg2 81 | 82 | def func_returns_func(): 83 | def func2(arg1, arg2): 84 | return arg1 + arg2 85 | 86 | return func2 87 | 88 | res = sqlpy.execute_function_in_sql(func_returns_func) 89 | assert res == func2 90 | 91 | 92 | @pytest.mark.skip(reason="Cannot currently return a function outside of environment") 93 | def test_return_func(): 94 | def func2(arg1, arg2): 95 | return arg1 + arg2 96 | 97 | def func_returns_func(): 98 | return func2 99 | 100 | res = sqlpy.execute_function_in_sql(func_returns_func) 101 | assert res == func2 102 | 103 | 104 | def test_with_no_args(): 105 | def func_with_no_args(): 106 | return 107 | 108 | res = sqlpy.execute_function_in_sql(func_with_no_args) 109 | 110 | assert res is None 111 | 112 | 113 | def test_with_data_frame(): 114 | def func_return_df(in_df): 115 | return in_df 116 | 117 | res = sqlpy.execute_function_in_sql(func_return_df, 118 | input_data_query="SELECT TOP 10 * FROM airline5000") 119 | 120 | assert type(res) == DataFrame 121 | assert res.shape == (10, 30) 122 | 123 | 124 | def test_with_variables(): 125 | def func_with_variables(s): 126 | print(s) 127 | 128 | output = io.StringIO() 129 | with redirect_stderr(output), redirect_stdout(output): 130 | sqlpy.execute_function_in_sql(func_with_variables, s="Hello") 131 | 132 | assert "Hello" in output.getvalue() 133 | 134 | output = io.StringIO() 135 | with redirect_stderr(output), redirect_stdout(output): 136 | var_s = "World" 137 | sqlpy.execute_function_in_sql(func_with_variables, s=var_s) 138 | 139 | assert 'World' in output.getvalue() 140 | 141 | 142 | def test_execute_query(): 143 | res = sqlpy.execute_sql_query("SELECT TOP 10 * FROM airline5000") 144 | 145 | assert type(res) == DataFrame 146 | assert res.shape == (10, 30) 147 | 148 | 149 | def test_execute_script(): 150 | path = os.path.join(script_dir, "exec_script.py") 151 | 152 | output = io.StringIO() 153 | with redirect_stderr(output), redirect_stdout(output): 154 | res = sqlpy.execute_script_in_sql(path_to_script=path, 155 | input_data_query="SELECT TOP 10 * FROM airline5000") 156 | 157 | assert "HelloWorld" in output.getvalue() 158 | assert res is None 159 | 160 | with pytest.raises(FileNotFoundError): 161 | sqlpy.execute_script_in_sql(path_to_script="NonexistentScriptPath", 162 | input_data_query="SELECT TOP 10 * FROM airline5000") 163 | 164 | 165 | def test_stderr(): 166 | def print_to_stderr(): 167 | import sys 168 | sys.stderr.write("Error!") 169 | 170 | output = io.StringIO() 171 | with redirect_stderr(output), redirect_stdout(output): 172 | sqlpy.execute_function_in_sql(print_to_stderr) 173 | 174 | assert "Error!" in output.getvalue() 175 | -------------------------------------------------------------------------------- /Python/sqlmlutils/packagemanagement/packagesqlbuilder.py: -------------------------------------------------------------------------------- 1 | # Copyright(c) Microsoft Corporation. 2 | # Licensed under the MIT license. 3 | 4 | import pyodbc 5 | 6 | from sqlmlutils.sqlbuilder import SQLBuilder 7 | from sqlmlutils.packagemanagement.scope import Scope 8 | 9 | 10 | class CreateLibraryBuilder(SQLBuilder): 11 | 12 | def __init__(self, pkg_name: str, pkg_filename: str, scope: Scope, language_name: str): 13 | self._name = clean_library_name(pkg_name) 14 | self._language_name = language_name 15 | self._filename = pkg_filename 16 | self._scope = scope 17 | 18 | @property 19 | def params(self): 20 | with open(self._filename, "rb") as f: 21 | package_bits = f.read() 22 | pkgdatastr = pyodbc.Binary(package_bits) 23 | return pkgdatastr 24 | 25 | @property 26 | def base_script(self) -> str: 27 | authorization = _get_authorization(self._scope) 28 | dummy_spees = _get_dummy_spees(self._language_name) 29 | 30 | return """ 31 | set NOCOUNT on 32 | -- Drop the library if it exists 33 | BEGIN TRY 34 | DROP EXTERNAL LIBRARY [{sqlpkgname}] {authorization} 35 | END TRY 36 | BEGIN CATCH 37 | END CATCH 38 | 39 | -- Create the library 40 | CREATE EXTERNAL LIBRARY [{sqlpkgname}] {authorization} 41 | FROM (CONTENT = ?) WITH (LANGUAGE = '{language_name}'); 42 | 43 | -- Dummy SPEES 44 | {dummy_spees} 45 | """.format( 46 | sqlpkgname=self._name, 47 | authorization=authorization, 48 | dummy_spees=dummy_spees, 49 | language_name=self._language_name 50 | ) 51 | 52 | 53 | class CheckLibraryBuilder(SQLBuilder): 54 | 55 | def __init__(self, pkg_name: str, scope: Scope, language_name: str): 56 | self._name = clean_library_name(pkg_name) 57 | self._language_name = language_name 58 | self._scope = scope 59 | 60 | if self._language_name == "Python": 61 | self._private_path_env = "MRS_EXTLIB_USER_PATH" 62 | self._public_path_env = "MRS_EXTLIB_SHARED_PATH" 63 | else: 64 | self._private_path_env = "PRIVATELIBPATH" 65 | self._public_path_env = "PUBLICLIBPATH" 66 | 67 | @property 68 | def params(self): 69 | return """ 70 | import os 71 | import re 72 | _ENV_NAME_USER_PATH = "{private_path_env}" 73 | _ENV_NAME_SHARED_PATH = "{public_path_env}" 74 | 75 | def _is_dist_info_file(name, file): 76 | return re.match(name + r"-.*egg", file) or re.match(name + r"-.*dist-info", file) 77 | 78 | def _is_package_match(package_name, file): 79 | package_name = package_name.lower() 80 | file = file.lower() 81 | return file == package_name or file == package_name + ".py" or \ 82 | _is_dist_info_file(package_name, file) or \ 83 | ("-" in package_name and 84 | (package_name.split("-")[0] == file or _is_dist_info_file(package_name.replace("-", "_"), file))) 85 | 86 | def package_files_in_scope(scope="private"): 87 | envdir = _ENV_NAME_SHARED_PATH if scope == "public" or os.environ.get(_ENV_NAME_USER_PATH, "") == "" \ 88 | else _ENV_NAME_USER_PATH 89 | path = os.environ.get(envdir, "") 90 | if os.path.isdir(path): 91 | return os.listdir(path) 92 | return [] 93 | 94 | def package_exists_in_scope(sql_package_name: str, scope=None) -> bool: 95 | if scope is None: 96 | # default to user path for every user but DBOs 97 | scope = "public" if (os.environ.get(_ENV_NAME_USER_PATH, "") == "") else "private" 98 | package_files = package_files_in_scope(scope) 99 | return any([_is_package_match(sql_package_name, package_file) for package_file in package_files]) 100 | 101 | # Check that the package exists in scope. 102 | # For some reason this check works but there is a bug in pyODBC when asserting this is True. 103 | assert package_exists_in_scope("{name}", "{scope}") != False 104 | """.format(private_path_env=self._private_path_env, 105 | public_path_env=self._public_path_env, 106 | name=self._name, 107 | scope=self._scope._name) 108 | 109 | @property 110 | def base_script(self) -> str: 111 | return """ 112 | -- Check to make sure the package was installed 113 | BEGIN TRY 114 | EXEC sp_execute_external_script 115 | @language = N'{language_name}', 116 | @script = ? 117 | print('Package successfully installed.') 118 | END TRY 119 | BEGIN CATCH 120 | print('Package installation failed.'); 121 | THROW; 122 | END CATCH 123 | """.format(language_name = self._language_name) 124 | 125 | 126 | class DropLibraryBuilder(SQLBuilder): 127 | 128 | def __init__(self, sql_package_name: str, scope: Scope, language_name: str): 129 | self._name = clean_library_name(sql_package_name) 130 | self._language_name = language_name 131 | self._scope = scope 132 | 133 | @property 134 | def base_script(self) -> str: 135 | return """ 136 | DROP EXTERNAL LIBRARY [{name}] {auth} 137 | 138 | {dummy_spees} 139 | """.format( 140 | name=self._name, 141 | auth=_get_authorization(self._scope), 142 | dummy_spees=_get_dummy_spees(self._language_name) 143 | ) 144 | 145 | def clean_library_name(pkgname: str): 146 | return pkgname.replace("-", "_").lower() 147 | 148 | 149 | def _get_authorization(scope: Scope) -> str: 150 | return "AUTHORIZATION dbo" if scope == Scope.public_scope() else "" 151 | 152 | 153 | def _get_dummy_spees(language_name: str) -> str: 154 | return """ 155 | EXEC sp_execute_external_script 156 | @language = N'{language_name}', 157 | @script = N'' 158 | """.format(language_name = language_name) 159 | -------------------------------------------------------------------------------- /R/tests/testthat/helper-sqlPackage.R: -------------------------------------------------------------------------------- 1 | # Copyright(c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | 4 | library(sqlmlutils) 5 | library(methods) 6 | library(testthat) 7 | 8 | Settings <- NULL 9 | 10 | helper_parseConnectionString <- function(connectionString) 11 | { 12 | # parse a connection string (e.g. "Server=localhost;Database=AirlineTestDB;Uid=AirlineUserdbowner;Pwd=****") 13 | # into a list with names-value pair of the parameters 14 | paramList <- unlist(strsplit(connectionString, ";")) 15 | paramsSplit <- do.call("rbind", strsplit(paramList, "=")) 16 | params <- as.list(paramsSplit[,2]) 17 | names(params) <- paramsSplit[,1] 18 | params 19 | } 20 | 21 | helper_getSetting <- function(key) 22 | { 23 | if(is.null(Settings)) 24 | { 25 | testArgs <- options('TestArgs')$TestArgs 26 | connectionStringDBO <- testArgs$connectionString 27 | connSplit <- helper_parseConnectionString( connectionStringDBO ) 28 | connectionStringAirlineUserdbowner <- sprintf("Driver=%s;Server=%s;Database=%s;Uid=AirlineUserdbowner;Pwd=%s", connSplit$Driver, connSplit$Server, connSplit$Database, testArgs$pwdAirlineUserdbowner) 29 | connectionStringAirlineUser <- sprintf("Driver=%s;Server=%s;Database=%s;Uid=AirlineUser;Pwd=%s", connSplit$Driver, connSplit$Server, connSplit$Database, testArgs$pwdAirlineUser) 30 | 31 | Settings <<- c(connectionStringDBO = connectionStringDBO, 32 | connectionStringAirlineUserdbowner = connectionStringAirlineUserdbowner, 33 | connectionStringAirlineUser = connectionStringAirlineUser 34 | ) 35 | 36 | } 37 | 38 | if( key %in% names(Settings)) return (Settings[[key]]) 39 | stop(sprintf("setting not found: (%s)", key)) 40 | } 41 | 42 | helper_isLinux <- function() 43 | { 44 | return(Revo.version$os == "linux-gnu"); 45 | } 46 | 47 | helper_isServerLinux <- function() 48 | { 49 | return (sqlmlutils:::sqlRemoteExecuteFun(helper_getSetting("connectionStringDBO"), helper_isLinux, languageName="R")) 50 | } 51 | 52 | # 53 | # Remote require 54 | # 55 | helper_remote.require <- function(connectionString, packageName) 56 | { 57 | return (suppressWarnings((sqlmlutils:::sqlRemoteExecuteFun(connectionString, require, package = packageName, useRemoteFun = TRUE , languageName="R")))) 58 | } 59 | 60 | helper_checkPackageStatusRequire <- function(connectionString, packageName, expectedInstallStatus) 61 | { 62 | requireStatus <- helper_remote.require( connectionString, packageName) 63 | msg <- sprintf(" %s is present : %s (expected=%s)\r\n", packageName, requireStatus, expectedInstallStatus) 64 | cat("\nCHECK:", msg) 65 | expect_equal(expectedInstallStatus, requireStatus, info=msg) 66 | } 67 | 68 | # 69 | # Remote find.package 70 | # 71 | helper_remote.find.package <- function(connectionString, packageName) 72 | { 73 | findResult <- sqlmlutils:::sqlRemoteExecuteFun(connectionString, find.package, package = packageName, quiet = TRUE, useRemoteFun = TRUE, languageName="R" ) 74 | 75 | return (is.character(findResult) && (length(findResult) > 0)) 76 | } 77 | 78 | helper_checkPackageStatusFind <- function(connectionString, packageName, expectedInstallStatus) 79 | { 80 | findStatus <- helper_remote.find.package(connectionString, packageName) 81 | msg <- sprintf(" %s is present : %s (expected=%s)\r\n", packageName, findStatus, expectedInstallStatus) 82 | cat("\nCHECK:", msg) 83 | expect_equal(expectedInstallStatus, findStatus, msg) 84 | } 85 | 86 | helper_checkSqlLibPaths <- function(connectionString, minimumCount) 87 | { 88 | sqlLibPaths = sqlmlutils:::sqlRemoteExecuteFun(connectionString, .libPaths, useRemoteFun = TRUE, languageName="R" ) 89 | cat(paste0( "INFO: lib paths = ", sqlLibPaths, colapse = "\r\n")) 90 | expect_true(length(sqlLibPaths) >= minimumCount) 91 | } 92 | 93 | helper_ExecuteSQLDDL <- function(connectionString, sqlDDL) 94 | { 95 | cat(sprintf("\nINFO: executing: sqlDDL=\'%s\', connectionString=\'%s\'.\r\n", substr(sqlDDL,0,256), connectionString)) 96 | 97 | sqlmlutils:::execute(connectionString, sqlDDL) 98 | } 99 | 100 | helper_CreateExternalLibrary <- function(connectionString, packageName, authorization=NULL, content, languageName="R") 101 | { 102 | # 1. issue 'CREATE EXTERNAL LIBRARY' 103 | createExtLibDDLString = paste0("CREATE EXTERNAL LIBRARY [", packageName, "]") 104 | if (!is.null(authorization)) 105 | { 106 | createExtLibDDLString = paste0(createExtLibDDLString, " AUTHORIZATION ", authorization) 107 | } 108 | 109 | if (substr(content, 0, 2) == "0x") 110 | { 111 | createExtLibDDLString = paste0(createExtLibDDLString, " FROM (content = ", content, ") WITH (LANGUAGE = '", languageName,"')") 112 | } 113 | else 114 | { 115 | createExtLibDDLString = paste0(createExtLibDDLString, " FROM (content = '", content, "') WITH (LANGUAGE = '", languageName,"')") 116 | } 117 | 118 | helper_ExecuteSQLDDL(connectionString = connectionString, sqlDDL = createExtLibDDLString) 119 | } 120 | 121 | helper_callDummySPEES <- function(connectionString, languageName="R") 122 | { 123 | cat(sprintf("\nINFO: call dummy sp_execute_external_library to trigger install.\r\n")) 124 | speesStr = paste0("EXECUTE sp_execute_external_script 125 | @LANGUAGE = N'", languageName,"', 126 | @SCRIPT = N'invisible(NULL)'") 127 | 128 | sqlmlutils:::execute(connectionString, speesStr) 129 | } 130 | 131 | # 132 | # Returns list with 'value' and 'warning' 133 | # In the case of a warning returns the computed result in 'value' and the warning message in 'warning' 134 | # In the case of error returns error message in 'value' and marks it of class 'try-error' 135 | # 136 | helper_tryCatchValue <- function(expr) 137 | { 138 | warningSave <- c() 139 | warningHandler <- function(w) 140 | { 141 | warningSave <<- c(warningSave, w$message) 142 | invokeRestart("muffleWarning") 143 | } 144 | 145 | list( value = withCallingHandlers( 146 | tryCatch( 147 | expr, 148 | error = function(e) 149 | { 150 | invisible(structure(conditionMessage(e), class = "try-error")) 151 | } 152 | ), 153 | warning = warningHandler 154 | ), 155 | warning = warningSave 156 | ) 157 | } 158 | 159 | helper_cleanAllExternalLibraries <- function(connectionString) 160 | { 161 | names <- sqlmlutils:::execute(connectionString, "select * from sys.external_libraries where Language = 'R'")$name 162 | for(name in names) 163 | { 164 | sqlmlutils:::execute(connectionString, paste0("DROP EXTERNAL LIBRARY ", name)) 165 | } 166 | } 167 | -------------------------------------------------------------------------------- /R/tests/testthat/test.executeInSqlTests.R: -------------------------------------------------------------------------------- 1 | # Copyright(c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | 4 | library(testthat) 5 | context("executeInSQL tests") 6 | 7 | TestArgs <- options("TestArgs")$TestArgs 8 | connection <- TestArgs$connectionString 9 | scriptDir <- TestArgs$scriptDirectory 10 | 11 | test_that("Test with named args", 12 | { 13 | funcWithArgs <- function(arg1, arg2) 14 | { 15 | print(arg1) 16 | return(arg2) 17 | } 18 | expect_output( 19 | expect_equal( 20 | executeFunctionInSQL(connection, funcWithArgs, arg1="blah1", arg2="blah2"), 21 | "blah2"), 22 | "blah1" 23 | ) 24 | }) 25 | 26 | test_that("Test ordered arguments", 27 | { 28 | funcNum <- function(arg1, arg2) 29 | { 30 | stopifnot(typeof(arg1) == "integer") 31 | stopifnot(typeof(arg2) == "double") 32 | return(arg1 / arg2) 33 | } 34 | expect_error(executeFunctionInSQL(connection, funcNum, 2)) 35 | expect_equal(executeFunctionInSQL(connection, funcNum, as.integer(2), 3), 2/3) 36 | expect_equal(executeFunctionInSQL(connection, funcNum, as.integer(3), 2), 3/2) 37 | }) 38 | 39 | test_that("Test Return", 40 | { 41 | myReturnVal <- function() 42 | { 43 | return("returned!") 44 | } 45 | 46 | val = executeFunctionInSQL(connection, myReturnVal) 47 | expect_equal(val, myReturnVal()) 48 | }) 49 | 50 | test_that("Test Warning", 51 | { 52 | printWarning <- function() 53 | { 54 | warning("testWarning") 55 | print("Hello, this returned") 56 | } 57 | expect_warning( 58 | expect_output(executeFunctionInSQL(connection, printWarning), 59 | "Hello, this returned"), 60 | "testWarning") 61 | 62 | }) 63 | 64 | test_that("Passing in a user defined function", 65 | { 66 | func1 <- function() 67 | { 68 | func2 <- function() 69 | { 70 | return("Success") 71 | } 72 | 73 | return(func2()) 74 | } 75 | 76 | expect_equal(executeFunctionInSQL(connection, func=func1), "Success") 77 | }) 78 | 79 | test_that("Returning a function object", 80 | { 81 | func2 <- function() 82 | { 83 | return("Success") 84 | } 85 | 86 | func1 <- function() 87 | { 88 | func2 <- function() 89 | { 90 | return("Success") 91 | } 92 | 93 | return(func2) 94 | } 95 | 96 | # Result of executeFunctionInSQL() will have different environment than func2. 97 | expect_equal(executeFunctionInSQL(connection, func=func1), func2, check.environment=FALSE) 98 | }) 99 | 100 | test_that("Calling an object in the environment", 101 | { 102 | skip("This doesn't work right now because we don't pass the whole environment") 103 | 104 | func2 <- function() 105 | { 106 | return("Success") 107 | } 108 | 109 | func1 <- function() 110 | { 111 | return(func2) 112 | } 113 | 114 | expect_equal(executeFunctionInSQL(connection, func=func1), func2) 115 | }) 116 | 117 | test_that("No Parameters test", 118 | { 119 | noReturn <- function() {} 120 | 121 | result = executeFunctionInSQL(connection, noReturn) 122 | expect_null(result) 123 | }) 124 | 125 | test_that("Print, Warning, Return test", 126 | { 127 | 128 | returnString <- function() 129 | { 130 | print("hello") 131 | warning("uh oh") 132 | return("bar") 133 | } 134 | 135 | expect_warning(expect_output(result <- executeFunctionInSQL(connection, returnString), "hello"), "uh oh") 136 | 137 | expect_equal(result , "bar") 138 | 139 | }) 140 | 141 | test_that("Print, Warning, Return test, with args", 142 | { 143 | returnVector <- function(a,b) 144 | { 145 | print("print") 146 | warning("uh oh") 147 | return(c(a,b)) 148 | } 149 | 150 | expect_warning(expect_output(result <- executeFunctionInSQL(connection, returnVector, "foo", "bar"), "print"), "uh oh") 151 | 152 | expect_equal(result , c("foo","bar")) 153 | }) 154 | 155 | test_that("Print, Warning, Error test", 156 | { 157 | testError <- function() 158 | { 159 | print("print") 160 | warning("warning") 161 | stop("ERROR") 162 | } 163 | 164 | expect_error( 165 | expect_warning( 166 | expect_output( 167 | result <- executeFunctionInSQL(connection, testError), 168 | "print"), 169 | "warning"), 170 | "ERROR") 171 | }) 172 | 173 | test_that("Return a DataFrame", 174 | { 175 | 176 | returnDF <- function(a, b) 177 | { 178 | return(data.frame(x = c(foo=a,bar=b))) 179 | } 180 | 181 | result <- executeFunctionInSQL(connection, returnDF, "foo", 2) 182 | expect_equal(result, data.frame(x = c(foo="foo",bar=2))) 183 | }) 184 | 185 | test_that("Return an input DataFrame", 186 | { 187 | useInputDataSet <- function(in_df) 188 | { 189 | return(in_df) 190 | } 191 | 192 | result = executeFunctionInSQL(connection, useInputDataSet, inputDataQuery = "SELECT TOP 5 * FROM airline5000") 193 | expect_equal(nrow(result), 5) 194 | expect_equal(ncol(result), 30) 195 | 196 | useInputDataSet2 <- function(in_df, t1) 197 | { 198 | return(list(in_df, t1=t1)) 199 | } 200 | 201 | result = executeFunctionInSQL(connection, useInputDataSet2, t1=5, inputDataQuery = "SELECT TOP 5 * FROM airline5000") 202 | expect_equal(result$t1, 5) 203 | expect_equal(ncol(result[[1]]), 30) 204 | 205 | }) 206 | 207 | test_that("Variable test", 208 | { 209 | 210 | printString <- function(str) 211 | { 212 | print(str) 213 | } 214 | 215 | expect_output(executeFunctionInSQL(connection, printString, str="Hello"), "Hello") 216 | test <- "World" 217 | expect_output(executeFunctionInSQL(connection, printString, str=test), test) 218 | }) 219 | 220 | test_that("Query test", 221 | { 222 | res <- executeSQLQuery(connectionString = connection, sqlQuery = "SELECT TOP 5 * FROM airline5000") 223 | expect_equal(nrow(res), 5) 224 | expect_equal(ncol(res), 30) 225 | }) 226 | 227 | test_that("Script test", 228 | { 229 | script <- file.path(scriptDir, 'script.txt') 230 | 231 | expect_warning( 232 | expect_output( 233 | res <- executeScriptInSQL(connectionString=connection, script=script, inputDataQuery = "SELECT TOP 5 * FROM airline5000"), 234 | "Hello"), 235 | "WARNING") 236 | 237 | expect_equal(nrow(res), 5) 238 | expect_equal(ncol(res), 30) 239 | 240 | script2 <- file.path(scriptDir, 'script2.txt') 241 | 242 | expect_output(res <- executeScriptInSQL(connection, script2), "Script path exists") 243 | expect_equal(res, 33) 244 | 245 | expect_error(res <- executeScriptInSQL(connection, "non-existent-script.txt"), regexp = "Script path doesn't exist") 246 | 247 | }) 248 | -------------------------------------------------------------------------------- /R/README.md: -------------------------------------------------------------------------------- 1 | # sqlmlutils 2 | 3 | sqlmlutils is an R package to help execute R code on a SQL database (SQL Server or Azure SQL Database). 4 | 5 | # Installation 6 | 7 | ### Windows 8 | 9 | Download the latest release from https://github.com/microsoft/sqlmlutils/releases: 10 | 11 | From command prompt, run 12 | ``` 13 | R.exe -e "install.packages('odbc')" 14 | R.exe CMD INSTALL sqlmlutils_1.0.0.zip 15 | ``` 16 | OR 17 | To build a new package file and install, run 18 | ``` 19 | .\buildandinstall.cmd 20 | ``` 21 | 22 | ### Linux 23 | ``` 24 | R.exe -e "install.packages('odbc')" 25 | R.exe CMD INSTALL sqlmlutils_1.0.0.tar.gz 26 | ``` 27 | 28 | # Getting started 29 | 30 | Shown below are the important functions sqlmlutils provides: 31 | ```R 32 | connectionInfo # Create a connection string for connecting to the SQL database 33 | 34 | executeFunctionInSQL # Execute an R function inside the SQL database 35 | executeScriptInSQL # Execute an R script inside the SQL database 36 | executeSQLQuery # Execute a SQL query on the database and return the resultant table 37 | 38 | createSprocFromFunction # Create a stored procedure based on a R function inside the SQL database 39 | createSprocFromScript # Create a stored procedure based on a R script inside the SQL database 40 | checkSproc # Check whether a stored procedure exists in the SQL database 41 | dropSproc # Drop a stored procedure in the SQL database 42 | executeSproc # Execute a stored procedure in the SQL database 43 | 44 | sql_install.packages # Install packages in the SQL database 45 | sql_remove.packages # Remove packages from the SQL database 46 | sql_installed.packages # Enumerate packages that are installed on the SQL database 47 | ``` 48 | 49 | # Examples 50 | 51 | ### Execute In SQL 52 | ##### Execute an R function in database using sp_execute_external_script 53 | 54 | ```R 55 | library(sqlmlutils) 56 | 57 | # For Linux SQL Server, you must specify the ODBC Driver and the username/password because there is no Trusted_Connection/Implied Authentication support yet. 58 | # connection <- connectionInfo(driver= "ODBC Driver 13 for SQL Server", database="AirlineTestDB", uid = "username", pwd = "password") 59 | 60 | connection <- connectionInfo() 61 | 62 | funcWithArgs <- function(arg1, arg2) 63 | { 64 | return(c(arg1, arg2)) 65 | } 66 | result <- executeFunctionInSQL(connection, funcWithArgs, arg1="result1", arg2="result2") 67 | ``` 68 | 69 | ##### Generate a linear model without the data leaving the machine 70 | 71 | ```R 72 | library(sqlmlutils) 73 | 74 | # For Linux SQL Server, you must specify the ODBC Driver and the username/password because there is no Trusted_Connection/Implied Authentication support yet. 75 | # connection <- connectionInfo(driver= "ODBC Driver 13 for SQL Server", database="AirlineTestDB", uid = "username", pwd = "password") 76 | 77 | connection <- connectionInfo(database="AirlineTestDB") 78 | 79 | linearModel <- function(in_df, xCol, yCol) 80 | { 81 | lm(paste0(yCol, " ~ ", xCol), in_df) 82 | } 83 | 84 | model <- executeFunctionInSQL(connectionString = connection, func = linearModel, xCol = "CRSDepTime", yCol = "ArrDelay", 85 | inputDataQuery = "SELECT TOP 100 * FROM airline5000") 86 | model 87 | ``` 88 | 89 | ##### Execute a SQL Query from R 90 | 91 | ```R 92 | library(sqlmlutils) 93 | 94 | # For Linux SQL Server, you must specify the ODBC Driver and the username/password because there is no Trusted_Connection/Implied Authentication support yet. 95 | # connection <- connectionInfo(driver= "ODBC Driver 13 for SQL Server", database="AirlineTestDB", uid = "username", pwd = "password") 96 | 97 | connection <- connectionInfo(database="AirlineTestDB") 98 | 99 | dataTable <- executeSQLQuery(connectionString = connection, sqlQuery="SELECT TOP 100 * FROM airline5000") 100 | stopifnot(nrow(dataTable) == 100) 101 | stopifnot(ncol(dataTable) == 30) 102 | ``` 103 | 104 | ### Stored Procedures (Sproc) 105 | ##### Create and call a T-SQL stored procedure based on a R function 106 | 107 | ```R 108 | library(sqlmlutils) 109 | 110 | spPredict <- function(inputDataFrame) 111 | { 112 | library(RevoScaleR) 113 | model <- rxLinMod(ArrDelay ~ CRSDepTime, inputDataFrame) 114 | rxPredict(model, inputDataFrame) 115 | } 116 | 117 | # For Linux SQL Server, you must specify the ODBC Driver and the username/password because there is no Trusted_Connection/Implied Authentication support yet. 118 | # connection <- connectionInfo(driver= "ODBC Driver 13 for SQL Server", database="AirlineTestDB", uid = "username", pwd = "password") 119 | 120 | connection <- connectionInfo(database="AirlineTestDB") 121 | inputParams <- list(inputDataFrame = "Dataframe") 122 | 123 | name = "prediction" 124 | 125 | createSprocFromFunction(connectionString = connection, name = name, func = spPredict, inputParams = inputParams) 126 | stopifnot(checkSproc(connectionString = connection, name = name)) 127 | 128 | predictions <- executeSproc(connectionString = connection, name = name, inputDataFrame = "select ArrDelay, CRSDepTime, DayOfWeek from airline5000") 129 | stopifnot(nrow(predictions) == 5000) 130 | 131 | dropSproc(connectionString = connection, name = name) 132 | ``` 133 | 134 | ### Package Management 135 | 136 | ##### Package management with sqlmlutils is supported in SQL Server 2019 CTP 2.4 and later. 137 | 138 | ##### Install and remove packages from the SQL database 139 | 140 | ```R 141 | library(sqlmlutils) 142 | 143 | # For Linux SQL Server, you must specify the ODBC Driver and the username/password because there is no Trusted_Connection/Implied Authentication support yet. 144 | # connection <- connectionInfo(driver= "ODBC Driver 13 for SQL Server", database="AirlineTestDB", uid = "username", pwd = "password") 145 | 146 | connection <- connectionInfo(database="AirlineTestDB") 147 | 148 | # install glue on sql database 149 | pkgs <- c("glue") 150 | sql_install.packages(connectionString = connection, pkgs, verbose = TRUE, scope="PUBLIC") 151 | 152 | # confirm glue is installed on sql database 153 | r <- sql_installed.packages(connectionString = connection, fields=c("Package", "LibPath", "Attributes", "Scope")) 154 | View(r) 155 | 156 | # use glue on sql database 157 | useLibraryGlueInSql <- function() 158 | { 159 | library(glue) 160 | 161 | name <- "Fred" 162 | age <- 50 163 | anniversary <- as.Date("1991-10-12") 164 | glue('My name is {name},', 165 | 'my age next year is {age + 1},', 166 | 'my anniversary is {format(anniversary, "%A, %B %d, %Y")}.') 167 | } 168 | 169 | result <- executeFunctionInSQL(connectionString = connection, func = useLibraryGlueInSql) 170 | print(result) 171 | 172 | # remove glue from sql database 173 | sql_remove.packages(connectionString = connection, pkgs, scope="PUBLIC") 174 | ``` 175 | 176 | ##### Install using a local file (instead of from CRAN) 177 | To install from a local file, add "repos=NULL" to sql_install.packages. 178 | Testing and uninstall can be done the same way as above. 179 | 180 | ```R 181 | library(sqlmlutils) 182 | 183 | # For Linux SQL Server, you must specify the ODBC Driver and the username/password because there is no Trusted_Connection/Implied Authentication support yet. 184 | # connection <- connectionInfo(driver= "ODBC Driver 13 for SQL Server", database="AirlineTestDB", uid = "username", pwd = "password") 185 | 186 | connection <- connectionInfo(database="AirlineTestDB") 187 | 188 | # install glue on sql database 189 | pkgPath <- "C:\\glue_1.3.0.zip" 190 | sql_install.packages(connectionString = connection, pkgPath, verbose = TRUE, scope="PUBLIC", repos=NULL) 191 | ``` 192 | 193 | # Notes for Developers 194 | 195 | ### Running the tests on a local machine 196 | 197 | 1. Make sure a SQL database with an updated ML Services R is running on localhost. 198 | 2. Restore the AirlineTestDB from the .bak file in this repo 199 | 3. Make sure Trusted (Windows) authentication works for connecting to the database 200 | 201 | ### Notable TODOs and open issues 202 | 203 | 1. Output Parameter execution did not work with RODBCext. Check whether they work with odbc package. 204 | -------------------------------------------------------------------------------- /Python/tests/package_management_pypi_test.py: -------------------------------------------------------------------------------- 1 | # Copyright(c) Microsoft Corporation. 2 | # Licensed under the MIT license. 3 | 4 | import io 5 | import os 6 | import sys 7 | import pytest 8 | 9 | from contextlib import redirect_stdout 10 | from package_helper_functions import _get_sql_package_table, _get_package_names_list 11 | from sqlmlutils import SQLPythonExecutor, SQLPackageManager, Scope 12 | 13 | from conftest import connection, scope 14 | 15 | pyexecutor = SQLPythonExecutor(connection) 16 | pkgmanager = SQLPackageManager(connection) 17 | initial_list = _get_sql_package_table(connection)['name'] 18 | 19 | def _drop_all_ddl_packages(conn, scope): 20 | """Clean the external libraries - drop all packages""" 21 | pkgs = _get_sql_package_table(conn) 22 | if(len(pkgs.index) > 0 ): 23 | for pkg in pkgs['name']: 24 | if pkg not in initial_list: 25 | try: 26 | SQLPackageManager(conn)._drop_sql_package(pkg, scope=scope) 27 | except Exception as e: 28 | pass 29 | 30 | def _package_exists(module_name: str): 31 | """Check if a package exists""" 32 | mod = __import__(module_name) 33 | return mod is not None 34 | 35 | def _package_no_exist(module_name: str): 36 | """Check that a package does NOT exist""" 37 | import pytest 38 | with pytest.raises(Exception): 39 | __import__(module_name) 40 | return True 41 | 42 | def _check_version(module_name): 43 | """Get the version of an installed package""" 44 | module = __import__(module_name) 45 | return module.__version__ 46 | 47 | def test_install_different_names(): 48 | """Test installing a single package with different capitalization""" 49 | def useit(): 50 | import funcsigs 51 | print(funcsigs.__version__) 52 | 53 | try: 54 | pkgmanager.install("funcsigs==1.0.2") 55 | pyexecutor.execute_function_in_sql(useit) 56 | 57 | pkgmanager.uninstall("funcsigs") 58 | 59 | pkgmanager.install("funcSIGS==1.0.2") 60 | pyexecutor.execute_function_in_sql(useit) 61 | pkgmanager.uninstall("funcsigs") 62 | 63 | finally: 64 | _drop_all_ddl_packages(connection, scope) 65 | 66 | def test_install_version(): 67 | """Test the 'version' installation parameter""" 68 | package = "funcsigs" 69 | v = "1.0.1" 70 | 71 | def _package_version_exists(module_name: str, version: str): 72 | mod = __import__(module_name) 73 | return mod.__version__ == version 74 | 75 | try: 76 | pkgmanager.install(package, version=v) 77 | val = pyexecutor.execute_function_in_sql(_package_version_exists, module_name=package, version=v) 78 | assert val 79 | 80 | pkgmanager.uninstall(package) 81 | val = pyexecutor.execute_function_in_sql(_package_no_exist, module_name=package) 82 | assert val 83 | finally: 84 | _drop_all_ddl_packages(connection, scope) 85 | 86 | def test_dependency_resolution(): 87 | """Test that dependencies are installed with the main package""" 88 | package = "data" 89 | version = "0.4" 90 | 91 | try: 92 | pkgmanager.install(package, version=version, upgrade=True) 93 | val = pyexecutor.execute_function_in_sql(_package_exists, module_name=package) 94 | assert val 95 | 96 | pkgs = _get_package_names_list(connection) 97 | 98 | assert package in pkgs 99 | assert "funcsigs" in pkgs 100 | 101 | pkgmanager.uninstall(package) 102 | val = pyexecutor.execute_function_in_sql(_package_no_exist, module_name=package) 103 | assert val 104 | 105 | finally: 106 | _drop_all_ddl_packages(connection, scope) 107 | 108 | def test_no_upgrade_parameter(): 109 | """Test new version but no "upgrade" installation parameter""" 110 | try: 111 | pkg = "funcsigs" 112 | 113 | first_version = "1.0.1" 114 | second_version = "1.0.2" 115 | 116 | # Install package first so we can test upgrade param 117 | # 118 | pkgmanager.install(pkg, version=first_version) 119 | 120 | # Get sql packages 121 | # 122 | originalsqlpkgs = _get_sql_package_table(connection) 123 | 124 | # Try installing WITHOUT the upgrade parameter, it should fail 125 | # 126 | output = io.StringIO() 127 | with redirect_stdout(output): 128 | pkgmanager.install(pkg, upgrade=False, version=second_version) 129 | assert "exists on server. Set upgrade to True" in output.getvalue() 130 | 131 | # Make sure that the version we have on the server is still the first one 132 | # 133 | installed_version = pyexecutor.execute_function_in_sql(_check_version, pkg) 134 | assert first_version == installed_version 135 | 136 | # Make sure nothing excess was accidentally installed 137 | # 138 | sqlpkgs = _get_sql_package_table(connection) 139 | assert len(sqlpkgs) == len(originalsqlpkgs) 140 | 141 | finally: 142 | _drop_all_ddl_packages(connection, scope) 143 | 144 | def test_upgrade_parameter(): 145 | """Test the "upgrade" installation parameter""" 146 | try: 147 | pkg = "funcsigs" 148 | 149 | first_version = "1.0.1" 150 | second_version = "1.0.2" 151 | 152 | # Install package first so we can test upgrade param 153 | # 154 | pkgmanager.install(pkg, version=first_version) 155 | 156 | # Get sql packages 157 | # 158 | originalsqlpkgs = _get_sql_package_table(connection) 159 | 160 | oldversion = pyexecutor.execute_function_in_sql(_check_version, pkg) 161 | 162 | # Test installing WITH the upgrade parameter 163 | # 164 | pkgmanager.install(pkg, upgrade=True, version=second_version) 165 | 166 | afterinstall = _get_sql_package_table(connection) 167 | assert len(afterinstall) >= len(originalsqlpkgs) 168 | 169 | version = pyexecutor.execute_function_in_sql(_check_version, pkg) 170 | assert version > oldversion 171 | 172 | pkgmanager.uninstall(pkg) 173 | 174 | sqlpkgs = _get_sql_package_table(connection) 175 | assert len(sqlpkgs) == len(afterinstall) - 1 176 | 177 | finally: 178 | _drop_all_ddl_packages(connection, scope) 179 | 180 | @pytest.mark.skip(reason="Very slow test. Skip for CI.") 181 | def test_already_installed_popular_ml_packages(): 182 | """Test packages that are preinstalled, make sure they do not install anything extra""" 183 | installedpackages = ["numpy", "scipy", "pandas"] 184 | 185 | sqlpkgs = _get_sql_package_table(connection) 186 | for package in installedpackages: 187 | pkgmanager.install(package) 188 | newsqlpkgs = _get_sql_package_table(connection) 189 | assert len(sqlpkgs) == len(newsqlpkgs) 190 | 191 | def test_dependency_spec(): 192 | """Test that the DepedencyResolver handles ~= requirement spec. 193 | Also tests when package name and module name are different.""" 194 | package = "beautifulsoup4" 195 | version = "4.10.0" 196 | dependent = "soupsieve" 197 | module = "bs4" 198 | 199 | try: 200 | # Install the package and its dependencies 201 | # 202 | pkgmanager.install(package, version=version) 203 | val = pyexecutor.execute_function_in_sql(_package_exists, module_name=module) 204 | assert val 205 | 206 | pkgs = _get_package_names_list(connection) 207 | 208 | assert package in pkgs 209 | assert dependent in pkgs 210 | 211 | # Uninstall the top package only, not the dependencies 212 | # 213 | pkgmanager.uninstall(package) 214 | val = pyexecutor.execute_function_in_sql(_package_no_exist, module_name=module) 215 | assert val 216 | 217 | pkgs = _get_package_names_list(connection) 218 | 219 | assert package not in pkgs 220 | assert dependent in pkgs 221 | 222 | finally: 223 | _drop_all_ddl_packages(connection, scope) 224 | 225 | def test_installing_popular_ml_packages(): 226 | """Test a couple of popular ML packages""" 227 | newpackages = [ {'package': "TextBlob==0.17.1", 'module': 'textblob'}, {'package': "vocabulary==1.0.4", 'module': 'vocabulary'}] 228 | 229 | def checkit(pkgname): 230 | val = __import__(pkgname) 231 | return str(val) 232 | 233 | try: 234 | for package in newpackages: 235 | pkgmanager.install(package['package']) 236 | pyexecutor.execute_function_in_sql(checkit, pkgname=package['module']) 237 | finally: 238 | _drop_all_ddl_packages(connection, scope) 239 | 240 | -------------------------------------------------------------------------------- /.github/workflows/SQL2019.yaml: -------------------------------------------------------------------------------- 1 | # This workflow uses actions that are not certified by GitHub. 2 | # They are provided by a third-party and are governed by 3 | # separate terms of service, privacy policy, and support 4 | # documentation. 5 | # 6 | # See https://github.com/r-lib/actions/tree/master/examples#readme for 7 | # additional example workflows available for the R community. 8 | 9 | name: R 3.5 - Python 3.7 10 | 11 | on: 12 | push: 13 | branches: [ master ] 14 | pull_request: 15 | branches: [ master ] 16 | workflow_dispatch: 17 | 18 | jobs: 19 | SQLServer2019: 20 | runs-on: windows-2022 21 | 22 | env: 23 | # Define CI to skip some test case. 24 | CI: True 25 | r-version: "3.5.2" 26 | python-version: "3.7.1" 27 | sql-platform: "box" 28 | 29 | defaults: 30 | run: 31 | shell: cmd 32 | 33 | steps: 34 | # Set password in github env but dont display output. 35 | # Note for github action you need to use double %, when testing in a local VM 36 | # %i would be the correct format. 37 | - name: Generate password 38 | run: | 39 | setlocal enabledelayedexpansion 40 | set password=%random%%random%%random%qaZ~@ 41 | for /f "delims=" %%i in ('echo !password!') do echo dbPassword=%%i >> "%GITHUB_ENV%" 42 | 43 | # Downloads the R cabinet file for the initial release version of SQL Server 2019. 44 | # Note that we use the -L flag to allow for redirects. 45 | - name: Download Microsoft R Open cabinet file 46 | run: | 47 | set CurrentDir=%cd% 48 | curl -L https://go.microsoft.com/fwlink/?linkid=2085686 -o "%CurrentDir%/SRO_3.5.2.125_1033.cab" 49 | shell: cmd 50 | 51 | # Downloads the Python cabinet file for the initial release version of SQL Server 2019. 52 | # Note that we use the -L flag to allow for redirects. 53 | - name: Download Microsoft Python Open cabinet file 54 | run: | 55 | set CurrentDir=%cd% 56 | curl -L https://go.microsoft.com/fwlink/?linkid=2085793 -o "%CurrentDir%/SPO_4.5.12.120_1033.cab" 57 | shell: cmd 58 | 59 | # /MRCACHEDIRECTORY=%CurrentDir% is where we saved the cabinet files 60 | # /SAPWD=%dbPassword% /SECURITYMODE=SQL /TCPENABLED=1 allows connecting using localhost without windows account 61 | # /FEATURES=SQLEngine,ADVANCEDANALYTICS,SQL_INST_MR,SQL_INST_MPY are the machine learning services options including R and Python 62 | # Note that MLS require the initial release installed and then upgrading to newer versions. 63 | - name: Install SQL Server 2019 64 | run: | 65 | set CurrentDir=%cd% 66 | choco install sql-server-2019 -y -params "'/MRCACHEDIRECTORY=%CurrentDir% /SAPWD=%dbPassword% /SECURITYMODE=SQL /TCPENABLED=1 /INDICATEPROGRESS /ACTION=Install /FEATURES=SQLEngine,ADVANCEDANALYTICS,SQL_INST_MR,SQL_INST_MPY /INSTANCENAME=MSSQLSERVER /IACCEPTSQLSERVERLICENSETERMS /IACCEPTROPENLICENSETERMS /IACCEPTPYTHONLICENSETERMS'" 67 | shell: cmd 68 | 69 | - name: Apply SQL Server 2019 Cumulative Update 70 | run: choco install sql-server-2019-cumulative-update -y 71 | shell: cmd 72 | 73 | - name: Install sqlcmd 74 | run: choco install sqlcmd 75 | shell: cmd 76 | 77 | # Saves the chocolately logs which will likely hold the errors needed to debug a failed installation of SQL Server 2019. 78 | - name: Save Logs as artifact 79 | if: always() 80 | uses: actions/upload-artifact@v4 81 | with: 82 | name: Chocolatey Logs 83 | path: C:\ProgramData\chocolatey\logs\chocolatey.log 84 | retention-days: 20 85 | 86 | - name: Check Connectivity to SQL Database 87 | run: | 88 | sqlcmd -S localhost -U SA -P %dbPassword% -d Master -l 5 -Q "SELECT @@VERSION" 89 | shell: cmd 90 | 91 | - name: Enable External Scripts 92 | run: sqlcmd -S localhost -U SA -P %dbPassword% -Q "EXEC sp_configure 'external scripts enabled', 1;" 93 | 94 | - name: Reconfigure 95 | run: sqlcmd -S localhost -U SA -P %dbPassword% -Q "RECONFIGURE WITH OVERRIDE;" 96 | 97 | - name: Restart the Service 98 | run: | 99 | sqlcmd -S localhost -U SA -P %dbPassword% -Q "SHUTDOWN WITH NOWAIT" 100 | timeout /t 5 /nobreak 101 | net start "MSSQLSERVER" 102 | 103 | # Validate that MLS works for R. 104 | - name: Execute SPEES for R 105 | run: sqlcmd -S localhost -U SA -P %dbPassword% -l 5 -Q " 106 | EXEC sp_execute_external_script @language =N'R', 107 | @script=N' 108 | OutputDataSet <- InputDataSet; 109 | ', 110 | @input_data_1 =N'SELECT 1 AS hello' 111 | WITH RESULT SETS (([hello] int not null));" 112 | 113 | # Validate that MLS works for Python. 114 | - name: Execute SPEES for Python 115 | run: sqlcmd -S localhost -U SA -P %dbPassword% -l 5 -Q " 116 | EXEC sp_execute_external_script @language =N'Python', 117 | @script=N'OutputDataSet = InputDataSet;', 118 | @input_data_1 =N'SELECT 1 AS hello' WITH RESULT SETS (([hello] int not null));" 119 | 120 | - name: Checkout Branch 121 | uses: actions/checkout@v4 122 | 123 | - name: Move AirlineTestDB.bak to correct file location for backup command 124 | run: copy "AirlineTestDB.bak" "C:\Program Files\Microsoft SQL Server\MSSQL15.MSSQLSERVER\MSSQL\Backup\AirlineTestDB.bak" 125 | 126 | - name: Restore AirlineTestDB 127 | run: sqlcmd -S localhost -U SA -P %dbPassword% -l 5 -Q "USE [master]; RESTORE DATABASE [AirlineTestDB] FROM DISK = N'C:\Program Files\Microsoft SQL Server\MSSQL15.MSSQLSERVER\MSSQL\Backup\AirlineTestDB.bak' WITH FILE = 1, MOVE N'AirlineTestDB' TO N'C:\Program Files\Microsoft SQL Server\MSSQL15.MSSQLSERVER\MSSQL\DATA\AirlineTestDB_Primary.mdf', MOVE N'AirlineTestDB_log' TO N'C:\Program Files\Microsoft SQL Server\MSSQL15.MSSQLSERVER\MSSQL\DATA\AirlineTestDB_Primary.ldf', NOUNLOAD, STATS = 5;" 128 | 129 | - name: Create Logins for Test DBs 130 | run: sqlcmd -S localhost -U SA -P %dbPassword% -l 5 -Q " 131 | CREATE LOGIN AirlineUser WITH PASSWORD = '%dbPassword%', CHECK_POLICY=OFF, CHECK_EXPIRATION = OFF, DEFAULT_DATABASE=AirlineTestDB; 132 | CREATE LOGIN AirlineUserdbowner WITH PASSWORD = '%dbPassword%', CHECK_POLICY=OFF, CHECK_EXPIRATION = OFF, DEFAULT_DATABASE=AirlineTestDB;" 133 | 134 | - name: Alter AirlineUser 135 | run: sqlcmd -S localhost -U SA -P %dbPassword% -l 5 -Q " 136 | USE AirlineTestDB; 137 | ALTER USER [AirlineUser] WITH LOGIN=[AirlineUser]" 138 | 139 | - name: Alter AirlineUserdbowner 140 | run: sqlcmd -S localhost -U SA -P %dbPassword% -l 5 -Q " 141 | USE AirlineTestDB; 142 | ALTER USER [AirlineUserdbowner] WITH LOGIN=[AirlineUserdbowner]" 143 | 144 | - name: Alter Authorization 145 | run: sqlcmd -S localhost -U SA -P %dbPassword% -l 5 -Q " 146 | USE AirlineTestDB; 147 | ALTER AUTHORIZATION ON SCHEMA::[db_owner] TO [AirlineUserdbowner]" 148 | 149 | - name: Set up R ${{ env.r-version }} Runtime 150 | uses: r-lib/actions/setup-r@v2 151 | with: 152 | r-version: ${{ env.r-version }} 153 | 154 | # Todo: Needs mirror installation URL for python 3.5 155 | - name: Install R Package Dependencies 156 | uses: r-lib/actions/setup-r-dependencies@v2 157 | with: 158 | cache-version: 2 159 | working-directory: ./R 160 | extra-packages: 161 | #Retrieves most recent odbc pkg from cran to avoid errors seen in older versions. 162 | #Current version needs R >=3.6 and is currently failing 163 | cran::odbc 164 | cran::xml2 165 | rcmdcheck 166 | continue-on-error: true 167 | 168 | - uses: r-lib/actions/check-r-package@v2 169 | with: 170 | working-directory: ./R 171 | env: 172 | PASSWORD_AIRLINE_USER: "${{ env.dbPassword }}" 173 | PASSWORD_AIRLINE_USER_DBOWNER: "${{ env.dbPassword }}" 174 | continue-on-error: true 175 | 176 | 177 | - name: Set up Python ${{ env.python-version }} 178 | uses: actions/setup-python@v2 179 | with: 180 | python-version: ${{ env.python-version }} 181 | 182 | - name: Install Python dependencies 183 | working-directory: ./Python 184 | run: | 185 | python -m pip install --upgrade pip 186 | python -m pip install flake8 pytest 187 | pip install -r requirements.txt 188 | 189 | - name: Build Python Package 190 | working-directory: ./Python 191 | run: ./buildandinstall.cmd 192 | 193 | - name: Run pytest 194 | working-directory: ./Python/tests 195 | run: | 196 | pytest 197 | continue-on-error: true 198 | -------------------------------------------------------------------------------- /R/R/storedProcedureScripting.R: -------------------------------------------------------------------------------- 1 | # Copyright(c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | 4 | 5 | # the list with type conversion info 6 | sqlTypes <- list(posixct = "datetime", numeric = "float", 7 | character = "nvarchar(max)", integer = "int", 8 | logical = "bit", raw = "varbinary(max)", dataframe = "nvarchar(max)") 9 | 10 | getSqlType <- function(rType) 11 | { 12 | sqlTypes[[tolower(rType)]] 13 | } 14 | 15 | # creates the top part of the sql script (up to R code) 16 | # 17 | getHeader <- function(spName, inputParams, outputParams, languageName) 18 | { 19 | header <- c(paste0("CREATE PROCEDURE ", spName), 20 | handleHeadParams(inputParams, outputParams), 21 | "AS", 22 | "BEGIN TRY", 23 | "exec sp_execute_external_script", 24 | paste0("@language = N'", languageName,"',"),"@script = N'") 25 | 26 | return(paste0(header, collapse = "\n")) 27 | } 28 | 29 | handleHeadParams <- function(inputParams, outputParams) 30 | { 31 | paramString <- c() 32 | 33 | makeString <- function(name, d, output = "") 34 | { 35 | rType <- d[[name]] 36 | sqlType <- getSqlType(rType) 37 | paste0(" @", name, "_outer ", sqlType, output) 38 | } 39 | 40 | for(name in names(inputParams)) 41 | { 42 | paramString <- c(paramString, makeString(name, inputParams)) 43 | } 44 | 45 | for(name in names(outputParams)) 46 | { 47 | rType <- outputParams[[name]] 48 | if (tolower(rType) != "dataframe") 49 | { 50 | paramString <- c(paramString, makeString(name, outputParams, " output")) 51 | } 52 | } 53 | 54 | return(paste0(paramString, collapse = ",\n")) 55 | } 56 | 57 | generateTSQL <- function(func, spName, inputParams, outputParams, languageName) 58 | { 59 | # header to drop and create a stored procedure 60 | # 61 | header <- getHeader(spName, inputParams = inputParams, outputParams = outputParams, languageName = languageName) 62 | 63 | # vector containing R code 64 | # 65 | rCode <- getRCode(func, outputParams) 66 | 67 | # tail of the sp 68 | # 69 | tail <- getTail(inputParams, outputParams) 70 | 71 | return(paste0(header, rCode, tail, sep = "\n")) 72 | } 73 | 74 | generateTSQLFromScript <- function(script, spName, inputParams, outputParams, languageName) 75 | { 76 | # header to drop and create a stored procedure 77 | # 78 | header <- getHeader(spName, inputParams = inputParams, outputParams = outputParams, languageName = languageName) 79 | 80 | # vector containing R code 81 | # 82 | rCode <- getRCodeFromScript(script = script, outputParams = outputParams) 83 | 84 | # tail of the sp 85 | # 86 | tail <- getTail(inputParams = inputParams, outputParams = outputParams) 87 | 88 | return(paste0(header, rCode, tail, sep = "\n")) 89 | } 90 | 91 | 92 | 93 | # creates the bottom part of the sql script (after R code) 94 | # 95 | getTail <- function(inputParams, outputParams) 96 | { 97 | tail <- c("'") 98 | tailParams <- handleTailParams(inputParams, outputParams) 99 | 100 | if (tailParams != "") 101 | tail <- c("',") 102 | 103 | tail <- c(tail, 104 | tailParams, 105 | "END TRY", 106 | "BEGIN CATCH", 107 | "THROW;", 108 | "END CATCH;") 109 | 110 | return(paste0(tail, collapse = "\n")) 111 | } 112 | 113 | handleTailParams <- function(inputParams, outputParams) 114 | { 115 | inDataString <- c() 116 | outDataString <- c() 117 | paramString <- c() 118 | overallParams <- c() 119 | 120 | makeString <- function(name, d, output = "") 121 | { 122 | rType <- d[[name]] 123 | if (tolower(rType) == "dataframe") 124 | { 125 | if (output=="") 126 | { 127 | c(paste0("@input_data_1 = @", name, "_outer"), 128 | paste0("@input_data_1_name = N'", name, "'")) 129 | } 130 | else 131 | { 132 | c(paste0("@output_data_1_name = N'", name, "'")) 133 | } 134 | } 135 | else 136 | { 137 | sqlType <- getSqlType(rType) 138 | overallParams <<- c(overallParams, paste0("@", name, " ", sqlType, output)) 139 | paste0("@", name, " = ", "@", name, "_outer", output) 140 | } 141 | } 142 | 143 | for(name in names(inputParams)) 144 | { 145 | rType <- inputParams[[name]] 146 | if (tolower(rType) == "dataframe") 147 | { 148 | inDataString <- c(makeString(name, inputParams)) 149 | } 150 | else 151 | { 152 | paramString <- c(paramString, makeString(name, inputParams)) 153 | } 154 | } 155 | for(name in names(outputParams)) 156 | { 157 | rType <- outputParams[[name]] 158 | if (tolower(rType) == "dataframe") 159 | { 160 | outDataString <- c(makeString(name, outputParams, " output")) 161 | } 162 | else 163 | { 164 | paramString <- c(paramString, makeString(name, outputParams, " output")) 165 | } 166 | } 167 | if (length(overallParams) > 0) 168 | { 169 | overallParams <- paste0(overallParams, collapse = ", ") 170 | overallParams <- paste0("@params = N'" , overallParams,"'") 171 | } 172 | 173 | return(paste0(c(inDataString, outDataString, overallParams, paramString), collapse = ",\n")) 174 | } 175 | 176 | getRCodeFromScript <- function(script, inputParams, outputParams) 177 | { 178 | # escape single quotes and get rid of tabs 179 | # 180 | script <- sapply(script, gsub, pattern = "\t", replacement = " ") 181 | script <- sapply(script, gsub, pattern = "'", replacement = "''") 182 | 183 | return(paste0(script, collapse = "\n")) 184 | } 185 | 186 | getRCode <- function(func, outputParams) 187 | { 188 | name <- as.character(substitute(func)) 189 | 190 | funcBody <- deparse(func) 191 | 192 | # add on the function definititon 193 | # 194 | funcBody[1] <- paste(name, "<-", funcBody[1], sep = " ") 195 | 196 | # escape single quotes and get rid of tabs 197 | # 198 | funcBody <- sapply(funcBody, gsub, pattern = "\t", replacement = " ") 199 | funcBody <- sapply(funcBody, gsub, pattern = "'", replacement = "''") 200 | 201 | inputParameters <- methods::formalArgs(func) 202 | 203 | funcInputNames <- paste(inputParameters, inputParameters, 204 | sep = " = ") 205 | funcInputNames <- paste(funcInputNames, collapse = ", ") 206 | 207 | # add function call 208 | # 209 | funcBody <- c(funcBody, paste0("result <- ", name, 210 | paste0("(", funcInputNames, ")"))) 211 | 212 | # add appropriate ending 213 | # 214 | ending <- getEnding(outputParams) 215 | funcBody <- c(funcBody, ending) 216 | 217 | return(paste0(funcBody, collapse = "\n")) 218 | } 219 | 220 | # 221 | # Get ending string 222 | # We change the result into an OutputDataSet - we only expect a single OutputDataSet result 223 | # 224 | getEnding <- function(outputParams) 225 | { 226 | outputDataSetName <- "OutputDataSet" 227 | for(name in names(outputParams)) 228 | { 229 | if (tolower(outputParams[[name]]) == "dataframe") 230 | { 231 | outputDataSetName <- name 232 | } 233 | } 234 | 235 | ending <- c( "if (is.data.frame(result)) {", 236 | paste0(" ", outputDataSetName," <- result") 237 | ) 238 | 239 | if (length(outputParams) > 0) 240 | { 241 | ending <- c(ending, "} else if (is.list(result)) {") 242 | 243 | for(name in names(outputParams)) 244 | { 245 | if (tolower(outputParams[[name]]) == "dataframe") 246 | { 247 | ending <- c(ending,paste0(" ", name," <- result$", name)) 248 | } 249 | else 250 | { 251 | ending <- c(ending,paste0(" ", name, " <- result$", name)) 252 | } 253 | } 254 | 255 | ending <- c(ending, 256 | "} else if (!is.null(result)) {", 257 | " stop(\"the R function must return a list\")" 258 | ) 259 | } 260 | 261 | ending <- c(ending, "}") 262 | } 263 | 264 | # @import odbc 265 | # Execute the registration script 266 | register <- function(registrationScript, connectionString) 267 | { 268 | output <- character(0) 269 | 270 | execute(connectionString, registrationScript) 271 | 272 | if (length(output) > 0 ) 273 | { 274 | stop(output) 275 | } 276 | 277 | output 278 | } 279 | -------------------------------------------------------------------------------- /Python/tests/package_management_file_test.py: -------------------------------------------------------------------------------- 1 | # Copyright(c) Microsoft Corporation. 2 | # Licensed under the MIT license. 3 | 4 | import io 5 | import os 6 | import sys 7 | import subprocess 8 | import tempfile 9 | from contextlib import redirect_stdout 10 | 11 | import pytest 12 | 13 | from sqlmlutils import ConnectionInfo, SQLPackageManager, SQLPythonExecutor, Scope 14 | from package_helper_functions import _get_sql_package_table, _get_package_names_list 15 | from sqlmlutils.packagemanagement.pipdownloader import PipDownloader 16 | 17 | from conftest import connection, airline_user_connection 18 | 19 | path_to_packages = os.path.join((os.path.dirname(os.path.realpath(__file__))), "scripts", "test_packages") 20 | _SUCCESS_TOKEN = "SUCCESS" 21 | 22 | pyexecutor = SQLPythonExecutor(connection) 23 | pkgmanager = SQLPackageManager(connection) 24 | 25 | originals = _get_sql_package_table(connection) 26 | 27 | def check_package(package_name: str, exists: bool, class_to_check: str = ""): 28 | """Check and assert whether a package exists, and if a class is in the module""" 29 | if exists: 30 | themodule = __import__(package_name) 31 | assert themodule is not None 32 | assert getattr(themodule, class_to_check) is not None 33 | else: 34 | import pytest 35 | with pytest.raises(Exception): 36 | __import__(package_name) 37 | 38 | def _drop(package_name: str, ddl_name: str): 39 | """Uninstall a package and check that it is gone""" 40 | pkgmanager.uninstall(package_name) 41 | pyexecutor.execute_function_in_sql(check_package, package_name=package_name, exists=False) 42 | 43 | def _create(module_name: str, package_file: str, class_to_check: str, drop: bool = True): 44 | """Install a package and check that it is installed""" 45 | try: 46 | pyexecutor.execute_function_in_sql(check_package, package_name=module_name, exists=False) 47 | pkgmanager.install(package_file) 48 | pyexecutor.execute_function_in_sql(check_package, package_name=module_name, exists=True, class_to_check=class_to_check) 49 | finally: 50 | if drop: 51 | _drop(package_name=module_name, ddl_name=module_name) 52 | 53 | def _remove_all_new_packages(manager): 54 | """Drop all packages that were not there in the original list""" 55 | df = _get_sql_package_table(connection) 56 | 57 | libs = {df['external_library_id'][i]: (df['name'][i], df['scope'][i]) for i in range(len(df.index))} 58 | original_libs = {originals['external_library_id'][i]: (originals['name'][i], originals['scope'][i]) for i in range(len(originals.index))} 59 | 60 | for lib in libs: 61 | pkg, sc = libs[lib] 62 | if lib not in original_libs: 63 | print("uninstalling" + str(lib)) 64 | if sc: 65 | manager.uninstall(pkg, scope=Scope.private_scope()) 66 | else: 67 | manager.uninstall(pkg, scope=Scope.public_scope()) 68 | else: 69 | if sc != original_libs[lib][1]: 70 | if sc: 71 | manager.uninstall(pkg, scope=Scope.private_scope()) 72 | else: 73 | manager.uninstall(pkg, scope=Scope.public_scope()) 74 | 75 | 76 | # Download the package zips we will use for these tests 77 | # 78 | packages = ["astor==0.8.1", "html5lib==1.1", "termcolor==1.1.0"] 79 | 80 | for package in packages: 81 | pipdownloader = PipDownloader(connection, path_to_packages, package, language_name="Python") 82 | pipdownloader.download_single() 83 | 84 | def test_install_basic_zip_package(): 85 | """Test a basic zip package""" 86 | package = os.path.join(path_to_packages, "testpackageA-0.0.1.zip") 87 | module_name = "testpackageA" 88 | 89 | _remove_all_new_packages(pkgmanager) 90 | 91 | _create(module_name=module_name, package_file=package, class_to_check="ClassA") 92 | 93 | @pytest.mark.skip(reason="Very long running test. Skip for CI.") 94 | def test_install_whl_files(): 95 | """Test some basic wheel files""" 96 | packages = ["html5lib-1.1-py2.py3-none-any.whl", 97 | "astor-0.8.1-py2.py3-none-any.whl"] 98 | module_names = ["html5lib", "astor"] 99 | classes_to_check = ["parse", "code_gen"] 100 | 101 | _remove_all_new_packages(pkgmanager) 102 | 103 | for package, module, class_to_check in zip(packages, module_names, classes_to_check): 104 | full_package = os.path.join(path_to_packages, package) 105 | _create(module_name=module, package_file=full_package, class_to_check=class_to_check) 106 | 107 | 108 | def test_install_targz_files(): 109 | """Test a basic tar.gz file""" 110 | packages = ["termcolor-1.1.0.tar.gz"] 111 | module_names = ["termcolor"] 112 | ddl_names = ["termcolor"] 113 | classes_to_check = ["colored"] 114 | 115 | _remove_all_new_packages(pkgmanager) 116 | 117 | for package, module, ddl_name, class_to_check in zip(packages, module_names, ddl_names, classes_to_check): 118 | full_package = os.path.join(path_to_packages, package) 119 | _create(module_name=module, package_file=full_package, class_to_check=class_to_check) 120 | 121 | @pytest.mark.skip(reason="Very long running test. Skip for CI.") 122 | def test_install_bad_package_badzipfile(): 123 | """Test a zip that is not a package, then make sure it is not in the external_libraries table""" 124 | _remove_all_new_packages(pkgmanager) 125 | 126 | with tempfile.TemporaryDirectory() as temporary_directory: 127 | badpackagefile = os.path.join(temporary_directory, "badpackageA-0.0.1.zip") 128 | with open(badpackagefile, "w") as f: 129 | f.write("asdasdasdascsacsadsadas") 130 | with pytest.raises(Exception): 131 | pkgmanager.install(badpackagefile) 132 | 133 | assert "badpackageA" not in _get_package_names_list(connection) 134 | 135 | def test_package_already_exists_on_sql_table(): 136 | """Test the 'upgrade' parameter in installation""" 137 | _remove_all_new_packages(pkgmanager) 138 | 139 | # Install a downgraded version of the package first 140 | # 141 | package = os.path.join(path_to_packages, "testpackageA-0.0.1.zip") 142 | pkgmanager.install(package) 143 | 144 | def check_version(): 145 | import pkg_resources 146 | return pkg_resources.get_distribution("testpackageA").version 147 | 148 | version = pyexecutor.execute_function_in_sql(check_version) 149 | assert version == "0.0.1" 150 | 151 | package = os.path.join(path_to_packages, "testpackageA-0.0.2.zip") 152 | 153 | # Without upgrade 154 | # 155 | output = io.StringIO() 156 | with redirect_stdout(output): 157 | pkgmanager.install(package, upgrade=False) 158 | assert "exists on server. Set upgrade to True" in output.getvalue() 159 | 160 | version = pyexecutor.execute_function_in_sql(check_version) 161 | assert version == "0.0.1" 162 | 163 | # With upgrade 164 | # 165 | pkgmanager.install(package, upgrade=True) 166 | 167 | version = pyexecutor.execute_function_in_sql(check_version) 168 | assert version == "0.0.2" 169 | 170 | pkgmanager.uninstall("testpackageA") 171 | 172 | def test_scope(): 173 | """Test installing in a private scope with a db_owner (not dbo) user""" 174 | _remove_all_new_packages(pkgmanager) 175 | 176 | package = os.path.join(path_to_packages, "testpackageA-0.0.1.zip") 177 | 178 | def get_location(): 179 | import testpackageA 180 | return testpackageA.__file__ 181 | 182 | # The airline_user_connection is database user "airlineuser" and is NOT dbo, 183 | # so it has access to both Private and Public scopes 184 | # 185 | revopkgmanager = SQLPackageManager(airline_user_connection) 186 | revoexecutor = SQLPythonExecutor(airline_user_connection) 187 | 188 | # Install a package into the private scope using database user "airlineuser" 189 | # 190 | revopkgmanager.install(package, scope=Scope.private_scope()) 191 | private_location = revoexecutor.execute_function_in_sql(get_location) 192 | 193 | pkg_name = "testpackageA" 194 | 195 | pyexecutor.execute_function_in_sql(check_package, package_name=pkg_name, exists=False) 196 | 197 | # Uninstall packages installed into database user "airlineuser" private directory. 198 | # 199 | revopkgmanager.uninstall(pkg_name, scope=Scope.private_scope()) 200 | 201 | # Try the same installation in public scope 202 | # 203 | revopkgmanager.install(package, scope=Scope.public_scope()) 204 | public_location = revoexecutor.execute_function_in_sql(get_location) 205 | 206 | assert private_location != public_location 207 | pyexecutor.execute_function_in_sql(check_package, package_name=pkg_name, exists=True, class_to_check='ClassA') 208 | 209 | revopkgmanager.uninstall(pkg_name, scope=Scope.public_scope()) 210 | 211 | # Make sure the package was removed properly from private scope 212 | # 213 | revoexecutor.execute_function_in_sql(check_package, package_name=pkg_name, exists=False) 214 | 215 | # Make sure the package was removed properly from public scope 216 | # 217 | pyexecutor.execute_function_in_sql(check_package, package_name=pkg_name, exists=False) 218 | -------------------------------------------------------------------------------- /.github/workflows/SQL2022.yml: -------------------------------------------------------------------------------- 1 | # This workflow uses actions that are not certified by GitHub. 2 | # They are provided by a third-party and are governed by 3 | # separate terms of service, privacy policy, and support 4 | # documentation. 5 | # 6 | # See https://github.com/r-lib/actions/tree/master/examples#readme for 7 | # additional example workflows available for the R community. 8 | 9 | name: R 4.2 - Python 3.10 10 | 11 | on: 12 | push: 13 | branches: [ master ] 14 | pull_request: 15 | branches: [ master ] 16 | workflow_dispatch: 17 | 18 | jobs: 19 | SQLServer2022: 20 | runs-on: windows-2022 21 | 22 | env: 23 | # Define CI to skip some test case. 24 | CI: True 25 | r-version: "4.2.0" 26 | python-version: "3.10.2" 27 | sql-platform: "box" 28 | 29 | 30 | defaults: 31 | run: 32 | shell: cmd 33 | 34 | steps: 35 | # Set password in Github env but don't display output. 36 | # Note for github action you need to use double %, when testing in a local VM 37 | # %i would be the correct format. 38 | - name: Generate password 39 | run: | 40 | setlocal enabledelayedexpansion 41 | set password=%random%%random%%random%qaZ~@ 42 | for /f "delims=" %%i in ('echo !password!') do echo dbPassword=%%i >> "%GITHUB_ENV%" 43 | 44 | # /SAPWD=%dbPassword% /SECURITYMODE=SQL /TCPENABLED=1 allows connecting using localhost without windows account. 45 | # /FEATURES=SQLEngine,ADVANCEDANALYTICS are the machine learning services options. 46 | - name: Install SQL Server 2022 47 | run: | 48 | set CurrentDir=%cd% 49 | choco install sql-server-2022 -y -params "'/SAPWD=%dbPassword% /SECURITYMODE=SQL /TCPENABLED=1 /INDICATEPROGRESS /ACTION=Install /FEATURES=SQLEngine,ADVANCEDANALYTICS /INSTANCENAME=MSSQLSERVER /IACCEPTSQLSERVERLICENSETERMS'" 50 | shell: cmd 51 | 52 | - name: Install sqlcmd 53 | run: choco install sqlcmd 54 | shell: cmd 55 | 56 | # Saves the chocolately logs which will likely hold the errors needed to debug a failed installation of SQL Server 2019. 57 | - name: Save Logs as artifact 58 | if: always() 59 | uses: actions/upload-artifact@v4 60 | with: 61 | name: Chocolatey Logs 62 | path: C:\ProgramData\chocolatey\logs\chocolatey2022.log 63 | retention-days: 20 64 | 65 | - name: Check Connectivity to SQL Database 66 | run: | 67 | sqlcmd -S localhost -U SA -P %dbPassword% -d Master -l 5 -Q "SELECT @@VERSION" 68 | shell: cmd 69 | 70 | - name: Checkout Branch 71 | uses: actions/checkout@v4 72 | 73 | - name: Move AirlineTestDB.bak to correct file location for backup command 74 | run: copy "AirlineTestDB.bak" "C:\Program Files\Microsoft SQL Server\MSSQL16.MSSQLSERVER\MSSQL\Backup\AirlineTestDB.bak" 75 | 76 | - name: Restore AirlineTestDB 77 | run: sqlcmd -S localhost -U SA -P %dbPassword% -l 5 -Q "USE [master]; RESTORE DATABASE [AirlineTestDB] FROM DISK = N'C:\Program Files\Microsoft SQL Server\MSSQL16.MSSQLSERVER\MSSQL\Backup\AirlineTestDB.bak' WITH FILE = 1, MOVE N'AirlineTestDB' TO N'C:\Program Files\Microsoft SQL Server\MSSQL16.MSSQLSERVER\MSSQL\DATA\AirlineTestDB_Primary.mdf', MOVE N'AirlineTestDB_log' TO N'C:\Program Files\Microsoft SQL Server\MSSQL16.MSSQLSERVER\MSSQL\DATA\AirlineTestDB_Primary.ldf', NOUNLOAD, STATS = 5;" 78 | 79 | - name: Create Logins for Test DBs 80 | run: sqlcmd -S localhost -U SA -P %dbPassword% -l 5 -Q " 81 | CREATE LOGIN AirlineUser WITH PASSWORD = '%dbPassword%', CHECK_POLICY=OFF, CHECK_EXPIRATION = OFF, DEFAULT_DATABASE=AirlineTestDB; 82 | CREATE LOGIN AirlineUserdbowner WITH PASSWORD = '%dbPassword%', CHECK_POLICY=OFF, CHECK_EXPIRATION = OFF, DEFAULT_DATABASE=AirlineTestDB;" 83 | 84 | - name: Alter AirlineUser 85 | run: sqlcmd -S localhost -U SA -P %dbPassword% -l 5 -Q " 86 | USE AirlineTestDB; 87 | ALTER USER [AirlineUser] WITH LOGIN=[AirlineUser]" 88 | 89 | - name: Alter AirlineUserdbowner 90 | run: sqlcmd -S localhost -U SA -P %dbPassword% -l 5 -Q " 91 | USE AirlineTestDB; 92 | ALTER USER [AirlineUserdbowner] WITH LOGIN=[AirlineUserdbowner]" 93 | 94 | - name: Alter Authorization 95 | run: sqlcmd -S localhost -U SA -P %dbPassword% -l 5 -Q " 96 | USE AirlineTestDB; 97 | ALTER AUTHORIZATION ON SCHEMA::[db_owner] TO [AirlineUserdbowner]" 98 | 99 | # https://learn.microsoft.com/sql/machine-learning/install/sql-machine-learning-services-windows-install-sql-2022?view=sql-server-ver16#setup-r-support 100 | - name: Install R for MLS 101 | run: | 102 | curl -L -o R-4.2.0-win.exe https://cloud.r-project.org/bin/windows/base/old/4.2.0/R-4.2.0-win.exe 103 | R-4.2.0-win.exe /VERYSILENT /DIR=C:\MLS\R 104 | 105 | - name: Install CompatibilityAPI and RevoScaleR after installing their dependencies (iterators, foreach, R6, and jsonlite). 106 | run: | 107 | C:\MLS\R\bin\Rscript.exe -e "install.packages(c('iterators', 'foreach', 'R6', 'jsonlite'), repos='https://cloud.r-project.org/', lib='C:\\MLS\\R\\library')" 108 | C:\MLS\R\bin\Rscript.exe -e "install.packages('https://aka.ms/sqlml/r4.2/windows/CompatibilityAPI_1.1.0.zip', repos=NULL, lib='C:\\MLS\\R\\library')" 109 | C:\MLS\R\bin\Rscript.exe -e "install.packages('https://aka.ms/sqlml/r4.2/windows/RevoScaleR_10.0.1.zip', repos=NULL, lib='C:\\MLS\\R\\library')" 110 | 111 | - name: Configure the R runtime installed for MLS with SQL Server 112 | run: C:\MLS\R\library\RevoScaleR\rxLibs\x64\RegisterRext.exe /configure /rhome:"C:\MLS\R" /instance:"MSSQLSERVER" 113 | 114 | - name: Set up Python ${{ env.python-version }} 115 | uses: actions/setup-python@v5 116 | with: 117 | python-version: ${{ env.python-version }} 118 | 119 | - name: Install revoscalepy and dependencies 120 | working-directory: ${{ env.Python3_ROOT_DIR }} 121 | run: | 122 | python -m pip install -t "${{ env.Python3_ROOT_DIR }}\Lib\site-packages" dill numpy==1.22.0 pandas patsy python-dateutil 123 | python -m pip install -t "${{ env.Python3_ROOT_DIR }}\Lib\site-packages" https://aka.ms/sqlml/python3.10/windows/revoscalepy-10.0.1-py3-none-any.whl 124 | 125 | - name: Grant READ/EXECUTE access to installed libraries 126 | run: | 127 | icacls "${{ env.Python3_ROOT_DIR }}\Lib\site-packages" /grant "NT Service\MSSQLLAUNCHPAD":(OI)(CI)RX /T 128 | icacls "${{ env.Python3_ROOT_DIR }}\Lib\site-packages" /grant *S-1-15-2-1:(OI)(CI)RX /T 129 | 130 | - name: Configure the Python runtime installed for MLS with SQL Server 131 | working-directory: ${{ env.Python3_ROOT_DIR }}\Lib\site-packages\revoscalepy\rxLibs 132 | run: .\RegisterRext.exe /configure /pythonhome:"${{ env.Python3_ROOT_DIR }}" /instance:"MSSQLSERVER" 133 | 134 | - name: Enable External Scripts 135 | run: sqlcmd -S localhost -U SA -P %dbPassword% -Q "EXEC sp_configure 'external scripts enabled', 1;" 136 | 137 | - name: Reconfigure 138 | run: sqlcmd -S localhost -U SA -P %dbPassword% -Q "RECONFIGURE WITH OVERRIDE;" 139 | 140 | - name: Restart the Service 141 | run: | 142 | sqlcmd -S localhost -U SA -P %dbPassword% -Q "SHUTDOWN WITH NOWAIT" 143 | timeout /t 5 /nobreak 144 | net start "MSSQLSERVER" 145 | 146 | - name: Execute sp_execute_external_script for R 147 | run: sqlcmd -S localhost -U SA -P %dbPassword% -l 5 -Q " 148 | EXEC sp_execute_external_script @language =N'R', 149 | @script=N' 150 | OutputDataSet <- InputDataSet; 151 | ', 152 | @input_data_1 =N'SELECT 1 AS hello' 153 | WITH RESULT SETS (([hello] int not null));" 154 | 155 | - name: Execute sp_execute_external_script for Python 156 | run: sqlcmd -S localhost -U SA -P %dbPassword% -l 5 -Q " 157 | EXEC sp_execute_external_script @language =N'Python', 158 | @script=N'OutputDataSet = InputDataSet;', 159 | @input_data_1 =N'SELECT 1 AS hello' WITH RESULT SETS (([hello] int not null));" 160 | 161 | - name: Set up R ${{ env.r-version }} Runtime 162 | uses: r-lib/actions/setup-r@v2 163 | with: 164 | r-version: ${{ env.r-version }} 165 | 166 | - name: Install R Package Dependencies 167 | uses: r-lib/actions/setup-r-dependencies@v2 168 | with: 169 | cache-version: 2 170 | working-directory: ./R 171 | extra-packages: 172 | #Retrieves most recent odbc pkg from cran to avoid errors seen in older versions. 173 | #Current version needs R >=3.6 and is currently failing 174 | cran::odbc 175 | cran::xml2 176 | rcmdcheck 177 | 178 | - uses: r-lib/actions/check-r-package@v2 179 | with: 180 | working-directory: ./R 181 | env: 182 | PASSWORD_AIRLINE_USER: "${{ env.dbPassword }}" 183 | PASSWORD_AIRLINE_USER_DBOWNER: "${{ env.dbPassword }}" 184 | 185 | - name: Install Python dependencies 186 | working-directory: ./Python 187 | run: | 188 | python -m pip install --upgrade pip 189 | python -m pip install flake8 pytest 190 | pip install -r requirements.txt 191 | 192 | - name: Build Python Package 193 | working-directory: ./Python 194 | run: ./buildandinstall.cmd 195 | 196 | - name: Run pytest 197 | working-directory: ./Python/tests 198 | run: | 199 | pytest 200 | env: 201 | USER: "AirlineUserdbowner" 202 | PASSWORD: "${{ env.dbPassword }}" 203 | PASSWORD_AIRLINE_USER: "${{ env.dbPassword }}" 204 | continue-on-error: true 205 | -------------------------------------------------------------------------------- /Python/README.md: -------------------------------------------------------------------------------- 1 | # sqlmlutils 2 | 3 | sqlmlutils is a python package to help execute Python code on a SQL Server machine. It is built to work with ML Services for SQL Server. 4 | 5 | # Installation 6 | 7 | To install from PyPI, run: 8 | ``` 9 | pip install sqlmlutils 10 | ``` 11 | To install from file, download the latest release from https://github.com/microsoft/sqlmlutils/releases: 12 | ``` 13 | pip install sqlmlutils-1.1.0.zip 14 | ``` 15 | 16 | If you are developing on your own branch and want to rebuild and install the package, you can use the buildandinstall.cmd script that is included. 17 | 18 | # Getting started 19 | 20 | Shown below are the important functions sqlmlutils provides: 21 | ```python 22 | SQLPythonExecutor functions: 23 | execute_function_in_sql # Execute a python function inside the SQL database 24 | execute_script_in_sql # Execute a python script inside the SQL database 25 | execute_sql_query # Execute a sql query in the database and return the resultant table 26 | 27 | create_sproc_from_function # Create a stored procedure based on a Python function inside the SQL database 28 | create_sproc_from_script # Create a stored procedure based on a Python script inside the SQL database 29 | check_sproc # Check whether a stored procedure exists in the SQL database 30 | drop_sproc # Drop a stored procedure from the SQL database 31 | execute_sproc # Execute a stored procedure in the SQL database 32 | 33 | SQLPackageManager functions: 34 | install # Install a Python package on the SQL database 35 | uninstall # Remove a Python package from the SQL database 36 | list # Enumerate packages that are installed on the SQL database 37 | _get_packages_by_user # Enumerate external libraries installed by specific user in specific scope 38 | ``` 39 | 40 | # Examples 41 | 42 | ### Execute in SQL 43 | ##### Execute a python function in database 44 | 45 | ```python 46 | import sqlmlutils 47 | 48 | def foo(): 49 | return "bar" 50 | 51 | # For Linux SQL Server, you must specify the ODBC Driver and the username/password because there is no Trusted_Connection/Implied Authentication support yet. 52 | # connection = sqlmlutils.ConnectionInfo(driver="ODBC Driver 13 for SQL Server", server="localhost", database="master", uid="username", pwd="password") 53 | 54 | connection = sqlmlutils.ConnectionInfo(server="localhost", database="master") 55 | 56 | sqlpy = sqlmlutils.SQLPythonExecutor(connection) 57 | result = sqlpy.execute_function_in_sql(foo) 58 | assert result == "bar" 59 | ``` 60 | 61 | ##### Generate a scatter plot without the data leaving the machine 62 | 63 | ```python 64 | import sqlmlutils 65 | from PIL import Image 66 | 67 | 68 | def scatter_plot(input_df, x_col, y_col): 69 | import matplotlib.pyplot as plt 70 | import io 71 | 72 | title = x_col + " vs. " + y_col 73 | 74 | plt.scatter(input_df[x_col], input_df[y_col]) 75 | plt.xlabel(x_col) 76 | plt.ylabel(y_col) 77 | plt.title(title) 78 | 79 | # Save scatter plot image as a png 80 | buf = io.BytesIO() 81 | plt.savefig(buf, format="png") 82 | buf.seek(0) 83 | 84 | # Returns the bytes of the png to the client 85 | return buf 86 | 87 | # For Linux SQL Server, you must specify the ODBC Driver and the username/password because there is no Trusted_Connection/Implied Authentication support yet. 88 | # connection = sqlmlutils.ConnectionInfo(driver="ODBC Driver 13 for SQL Server", server="localhost", database="AirlineTestDB", uid="username", pwd="password") 89 | 90 | connection = sqlmlutils.ConnectionInfo(server="localhost", database="AirlineTestDB") 91 | 92 | sqlpy = sqlmlutils.SQLPythonExecutor(connection) 93 | 94 | sql_query = "select top 100 * from airline5000" 95 | plot_data = sqlpy.execute_function_in_sql(func=scatter_plot, input_data_query=sql_query, 96 | x_col="ArrDelay", y_col="CRSDepTime") 97 | im = Image.open(plot_data) 98 | im.show() 99 | ``` 100 | 101 | ##### Perform linear regression on data stored in SQL Server without the data leaving the machine 102 | 103 | You can use the AirlineTestDB (supplied as a .bak file above) to run these examples. 104 | 105 | ```python 106 | import sqlmlutils 107 | 108 | def linear_regression(input_df, x_col, y_col): 109 | from sklearn import linear_model 110 | 111 | X = input_df[[x_col]] 112 | y = input_df[y_col] 113 | 114 | lr = linear_model.LinearRegression() 115 | lr.fit(X, y) 116 | 117 | return lr 118 | 119 | # For Linux SQL Server, you must specify the ODBC Driver and the username/password because there is no Trusted_Connection/Implied Authentication support yet. 120 | # connection = sqlmlutils.ConnectionInfo(driver="ODBC Driver 13 for SQL Server", server="localhost", database="AirlineTestDB", uid="username", pwd="password") 121 | 122 | connection = sqlmlutils.ConnectionInfo(server="localhost", database="AirlineTestDB") 123 | 124 | sqlpy = sqlmlutils.SQLPythonExecutor(connection) 125 | sql_query = "select top 1000 CRSDepTime, CRSArrTime from airline5000" 126 | regression_model = sqlpy.execute_function_in_sql(linear_regression, input_data_query=sql_query, 127 | x_col="CRSDepTime", y_col="CRSArrTime") 128 | print(regression_model) 129 | print(regression_model.coef_) 130 | ``` 131 | 132 | ##### Execute a SQL Query from Python 133 | 134 | ```python 135 | import sqlmlutils 136 | import pytest 137 | 138 | # For Linux SQL Server, you must specify the ODBC Driver and the username/password because there is no Trusted_Connection/Implied Authentication support yet. 139 | # connection = sqlmlutils.ConnectionInfo(driver="ODBC Driver 13 for SQL Server", server="localhost", database="AirlineTestDB", uid="username", pwd="password") 140 | 141 | connection = sqlmlutils.ConnectionInfo(server="localhost", database="AirlineTestDB") 142 | 143 | sqlpy = sqlmlutils.SQLPythonExecutor(connection) 144 | sql_query = "select top 10 * from airline5000" 145 | data_table = sqlpy.execute_sql_query(sql_query) 146 | assert len(data_table.columns) == 30 147 | assert len(data_table) == 10 148 | ``` 149 | 150 | ### Stored Procedure 151 | ##### Create and call a T-SQL stored procedure based on a Python function 152 | 153 | ```python 154 | import sqlmlutils 155 | import pytest 156 | 157 | def principal_components(input_table: str, output_table: str): 158 | import sqlalchemy 159 | from urllib import parse 160 | import pandas as pd 161 | from sklearn.decomposition import PCA 162 | 163 | # Internal ODBC connection string used by process executing inside SQL Server 164 | connection_string = "Driver=SQL Server;Server=localhost;Database=AirlineTestDB;Trusted_Connection=Yes;" 165 | engine = sqlalchemy.create_engine("mssql+pyodbc:///?odbc_connect={}".format(parse.quote_plus(connection_string))) 166 | 167 | input_df = pd.read_sql("select top 200 ArrDelay, CRSDepTime from {}".format(input_table), engine).dropna() 168 | 169 | 170 | pca = PCA(n_components=2) 171 | components = pca.fit_transform(input_df) 172 | 173 | output_df = pd.DataFrame(components) 174 | output_df.to_sql(output_table, engine, if_exists="replace") 175 | 176 | 177 | # For Linux SQL Server, you must specify the ODBC Driver and the username/password because there is no Trusted_Connection/Implied Authentication support yet. 178 | # connection = sqlmlutils.ConnectionInfo(driver="ODBC Driver 13 for SQL Server", server="localhost", database="AirlineTestDB", uid="username", pwd="password") 179 | 180 | connection = sqlmlutils.ConnectionInfo(server="localhost", database="AirlineTestDB") 181 | 182 | input_table = "airline5000" 183 | output_table = "AirlineDemoPrincipalComponents" 184 | 185 | sp_name = "SavePrincipalComponents" 186 | 187 | sqlpy = sqlmlutils.SQLPythonExecutor(connection) 188 | 189 | if sqlpy.check_sproc(sp_name): 190 | sqlpy.drop_sproc(sp_name) 191 | 192 | sqlpy.create_sproc_from_function(sp_name, principal_components) 193 | 194 | # You can check the stored procedure exists in the db with this: 195 | assert sqlpy.check_sproc(sp_name) 196 | 197 | sqlpy.execute_sproc(sp_name, input_table=input_table, output_table=output_table) 198 | 199 | sqlpy.drop_sproc(sp_name) 200 | assert not sqlpy.check_sproc(sp_name) 201 | ``` 202 | 203 | ### Package Management 204 | 205 | ##### Python package management with sqlmlutils is supported in SQL Server 2019 CTP 2.4 and later. 206 | 207 | ##### Install and remove packages from SQL Server 208 | 209 | ```python 210 | import sqlmlutils 211 | 212 | # For Linux SQL Server, you must specify the ODBC Driver and the username/password because there is no Trusted_Connection/Implied Authentication support yet. 213 | # connection = sqlmlutils.ConnectionInfo(driver="ODBC Driver 13 for SQL Server", server="localhost", database="AirlineTestDB", uid="username", pwd="password") 214 | 215 | connection = sqlmlutils.ConnectionInfo(server="localhost", database="AirlineTestDB") 216 | pkgmanager = sqlmlutils.SQLPackageManager(connection) 217 | pkgmanager.install("astor") 218 | 219 | def import_astor(): 220 | import astor 221 | 222 | # import the astor package to make sure it installed properly 223 | sqlpy = sqlmlutils.SQLPythonExecutor(connection) 224 | val = sqlpy.execute_function_in_sql(import_astor) 225 | 226 | pkgmanager.uninstall("astor") 227 | ``` 228 | 229 | # Notes for Developers 230 | 231 | ### Running the tests 232 | 233 | 1. Make sure a SQL Server with an updated ML Services Python is running on localhost. 234 | 2. Restore the AirlineTestDB from the .bak file in this repo 235 | 3. Make sure Trusted (Windows) authentication works for connecting to the database 236 | 4. Setup a user with db_owner role (and not server admin) with uid: "AirlineUser" and password "FakeT3sterPwd!" 237 | 238 | ### Notable TODOs and open issues 239 | 240 | 1. Testing from a Linux client has not been performed. 241 | 2. The way we get dependencies of a package to install is sort of hacky (parsing pip output) 242 | -------------------------------------------------------------------------------- /Python/sqlmlutils/packagemanagement/sqlpackagemanager.py: -------------------------------------------------------------------------------- 1 | # Copyright(c) Microsoft Corporation. 2 | # Licensed under the MIT license. 3 | 4 | import os 5 | import tempfile 6 | import warnings 7 | import zipfile 8 | 9 | from sqlmlutils import ConnectionInfo, SQLPythonExecutor 10 | from sqlmlutils.packagemanagement import messages, servermethods 11 | from sqlmlutils.packagemanagement.dependencyresolver import DependencyResolver 12 | from sqlmlutils.packagemanagement.packagesqlbuilder import CreateLibraryBuilder, CheckLibraryBuilder, \ 13 | DropLibraryBuilder, clean_library_name 14 | from sqlmlutils.packagemanagement.pipdownloader import PipDownloader 15 | from sqlmlutils.packagemanagement.pkgutils import get_package_name_from_file, get_package_version_from_file 16 | from sqlmlutils.packagemanagement.scope import Scope 17 | from sqlmlutils.sqlqueryexecutor import execute_query, SQLQueryExecutor 18 | 19 | 20 | class SQLPackageManager: 21 | 22 | def __init__(self, connection_info: ConnectionInfo, language_name: str = "Python"): 23 | """Initialize a SQLPackageManager to manage packages on the SQL Server. 24 | 25 | :param connection_info: The ConnectionInfo object that holds the connection string and other information. 26 | :param language_name: The name of the language to be executed in sp_execute_external_script, if using EXTERNAL LANGUAGE. 27 | """ 28 | self._connection_info = connection_info 29 | self._pyexecutor = SQLPythonExecutor(connection_info, language_name=language_name) 30 | self._language_name = language_name 31 | 32 | def install(self, 33 | package: str, 34 | upgrade: bool = False, 35 | version: str = None, 36 | install_dependencies: bool = True, 37 | scope: Scope = None, 38 | out_file: str = None): 39 | """Install Python package into a SQL Server Python Services environment using pip. 40 | 41 | :param package: Package name to install on the SQL Server. Can also be a filename. 42 | :param upgrade: If True, will update the package if it exists on the specified SQL Server. 43 | If False, will not try to update an existing package. 44 | :param version: Not yet supported. Package version to install. If not specified, 45 | current stable version for server environment as determined by PyPi/Anaconda repos. 46 | :param install_dependencies: If True, installs required dependencies of package (similar to how default 47 | pip install or conda install works). False not yet supported. 48 | :param scope: Specifies whether to install packages into private or public scope. Default is private scope. 49 | This installs packages into a private path for the SQL principal you connect as. If your principal has the 50 | db_owner role, you can also specify scope as public. This will install packages into a public path for all 51 | users. Note: if you connect as dbo, you can only install packages into the public path. 52 | :param out_file: INSTEAD of running the actual installation, print the t-sql commands to a text file to use as script. 53 | 54 | >>> from sqlmlutils import ConnectionInfo, SQLPythonExecutor, SQLPackageManager 55 | >>> connection = ConnectionInfo(server="localhost", database="AirlineTestsDB") 56 | >>> pyexecutor = SQLPythonExecutor(connection) 57 | >>> pkgmanager = SQLPackageManager(connection) 58 | >>> 59 | >>> def use_tensorflow(): 60 | >>> import tensorflow as tf 61 | >>> node1 = tf.constant(3.0, tf.float32) 62 | >>> return str(node1.dtype) 63 | >>> 64 | >>> pkgmanager.install("tensorflow") 65 | >>> ret = pyexecutor.execute_function_in_sql(connection=connection, use_tensorflow) 66 | >>> pkgmanager.uninstall("tensorflow") 67 | 68 | """ 69 | if not install_dependencies: 70 | raise ValueError("Dependencies will always be installed - " 71 | "single package install without dependencies not yet supported.") 72 | if scope is None: 73 | scope = self._get_default_scope() 74 | 75 | if os.path.isfile(package): 76 | self._install_from_file(package, scope, upgrade, out_file=out_file) 77 | else: 78 | self._install_from_pypi(package, upgrade, version, install_dependencies, scope, out_file=out_file) 79 | 80 | def uninstall(self, 81 | package_name: str, 82 | scope: Scope = None, 83 | out_file: str = None): 84 | """Remove Python package from a SQL Server Python environment. 85 | 86 | :param package_name: Package name to remove on the SQL Server. 87 | :param scope: Specifies whether to uninstall packages from private or public scope. Default is private scope. 88 | This uninstalls packages from a private path for the SQL principal you connect as. If your principal has the 89 | db_owner role, you can also specify scope as public. This will uninstall packages from a public path for all 90 | users. Note: if you connect as dbo, you can only uninstall packages from the public path. 91 | :param out_file: INSTEAD of running the actual installation, print the t-sql commands to a text file to use as script. 92 | """ 93 | 94 | if scope is None: 95 | scope = self._get_default_scope() 96 | 97 | print("Uninstalling {package_name} only, not dependencies".format(package_name=package_name)) 98 | self._drop_sql_package(package_name, scope, out_file) 99 | 100 | def list(self): 101 | """List packages installed on server, similar to output of pip freeze. 102 | 103 | :return: List of tuples, each tuple[0] is package name and tuple[1] is package version. 104 | """ 105 | return self._pyexecutor.execute_function_in_sql(servermethods.show_installed_packages) 106 | 107 | def _get_default_scope(self): 108 | query = "SELECT IS_SRVROLEMEMBER ('sysadmin') as is_sysadmin" 109 | is_sysadmin = self._pyexecutor.execute_sql_query(query)["is_sysadmin"].iloc[0] 110 | return Scope.public_scope() if is_sysadmin == 1 else Scope.private_scope() 111 | 112 | def _get_packages_by_user(self, owner='', scope: Scope=Scope.private_scope()): 113 | scope_num = 1 if scope == Scope.private_scope() else 0 114 | 115 | if scope_num == 0 and owner == '': 116 | owner = "dbo" 117 | 118 | query = "DECLARE @principalId INT; \ 119 | DECLARE @currentUser NVARCHAR(128); \ 120 | SELECT @currentUser = " 121 | 122 | if owner != '': 123 | query += "?;\n" 124 | else: 125 | query += "CURRENT_USER;\n" 126 | 127 | query += "SELECT @principalId = USER_ID(@currentUser); \ 128 | SELECT name, language, scope \ 129 | FROM sys.external_libraries AS elib \ 130 | WHERE elib.principal_id=@principalId \ 131 | AND elib.language='{language_name}' AND elib.scope={scope_num} \ 132 | ORDER BY elib.name ASC; \ 133 | GO".format(language_name=self._language_name, 134 | scope_num=scope_num) 135 | return self._pyexecutor.execute_sql_query(query, owner) 136 | 137 | def _drop_sql_package(self, sql_package_name: str, scope: Scope, out_file: str = None): 138 | builder = DropLibraryBuilder(sql_package_name=sql_package_name, scope=scope, language_name=self._language_name) 139 | execute_query(builder, self._connection_info, out_file) 140 | 141 | # TODO: Support not dependencies 142 | def _install_from_pypi(self, 143 | target_package: str, 144 | upgrade: bool = False, 145 | version: str = None, 146 | install_dependencies: bool = True, 147 | scope: Scope = Scope.private_scope(), 148 | out_file: str = None): 149 | 150 | if not install_dependencies: 151 | raise ValueError("Dependencies will always be installed - " 152 | "single package install without dependencies not yet supported.") 153 | 154 | if version is not None: 155 | target_package = target_package + "==" + version 156 | 157 | with tempfile.TemporaryDirectory() as temporary_directory: 158 | pipdownloader = PipDownloader(self._connection_info, temporary_directory, target_package, language_name = self._language_name) 159 | target_package_file = pipdownloader.download_single() 160 | self._install_from_file(target_package_file, scope, upgrade, out_file=out_file) 161 | 162 | def _install_from_file(self, target_package_file: str, scope: Scope, upgrade: bool = False, out_file: str = None): 163 | name = get_package_name_from_file(target_package_file) 164 | version = get_package_version_from_file(target_package_file) 165 | 166 | resolver = DependencyResolver(self.list(), name) 167 | if resolver.requirement_met(upgrade, version): 168 | serverversion = resolver.get_target_server_version() 169 | print(messages.no_upgrade(name, serverversion, version)) 170 | return 171 | 172 | # Download requirements from PyPI 173 | with tempfile.TemporaryDirectory() as temporary_directory: 174 | pipdownloader = PipDownloader(self._connection_info, temporary_directory, target_package_file, language_name = self._language_name) 175 | 176 | # For now, we download all target package dependencies from PyPI. 177 | target_package_requirements, requirements_downloaded = pipdownloader.download() 178 | 179 | # Resolve which package dependencies need to be installed or upgraded on server. 180 | required_installs = resolver.get_required_installs(target_package_requirements) 181 | dependencies_to_install = self._get_required_files_to_install(requirements_downloaded, required_installs) 182 | self._install_many(target_package_file, dependencies_to_install, scope, out_file=out_file) 183 | 184 | def _install_many(self, target_package_file: str, dependency_files, scope: Scope, out_file:str=None): 185 | target_name = get_package_name_from_file(target_package_file) 186 | 187 | with SQLQueryExecutor(connection=self._connection_info) as sqlexecutor: 188 | sqlexecutor._cnxn.autocommit = False 189 | try: 190 | print("Installing dependencies...") 191 | for pkgfile in dependency_files: 192 | self._install_single(sqlexecutor, pkgfile, scope, out_file=out_file) 193 | 194 | print("Done with dependencies, installing main package...") 195 | self._install_single(sqlexecutor, target_package_file, scope, True, out_file=out_file) 196 | sqlexecutor._cnxn.commit() 197 | except Exception as e: 198 | sqlexecutor._cnxn.rollback() 199 | raise RuntimeError("Package installation failed, installed dependencies were rolled back.") from e 200 | 201 | def _install_single(self, sqlexecutor: SQLQueryExecutor, package_file: str, scope: Scope, is_target=False, out_file: str=None): 202 | name = str(get_package_name_from_file(package_file)) 203 | version = str(get_package_version_from_file(package_file)) 204 | print("Installing {name} version: {version}".format(name=name, version=version)) 205 | 206 | with tempfile.TemporaryDirectory() as temporary_directory: 207 | prezip = os.path.join(temporary_directory, name + "PREZIP.zip") 208 | with zipfile.ZipFile(prezip, 'w') as zipf: 209 | zipf.write(package_file, os.path.basename(package_file)) 210 | 211 | builder = CreateLibraryBuilder(pkg_name=name, pkg_filename=prezip, scope=scope, language_name=self._language_name) 212 | sqlexecutor.execute(builder, out_file=out_file) 213 | 214 | builder = CheckLibraryBuilder(pkg_name=name, scope=scope, language_name=self._language_name) 215 | sqlexecutor.execute(builder, out_file=out_file) 216 | 217 | @staticmethod 218 | def _get_required_files_to_install(pkgfiles, requirements): 219 | return [file for file in pkgfiles 220 | if SQLPackageManager._pkgfile_in_requirements(file, requirements)] 221 | 222 | @staticmethod 223 | def _pkgfile_in_requirements(pkgfile: str, requirements): 224 | pkgname = get_package_name_from_file(pkgfile) 225 | return any([DependencyResolver.clean_requirement_name(pkgname.lower()) == 226 | DependencyResolver.clean_requirement_name(req.lower()) 227 | for req in requirements]) 228 | -------------------------------------------------------------------------------- /Python/sqlmlutils/sqlpythonexecutor.py: -------------------------------------------------------------------------------- 1 | # Copyright(c) Microsoft Corporation. 2 | # Licensed under the MIT license. 3 | 4 | import dill 5 | import sys 6 | 7 | from typing import Callable 8 | from pandas import DataFrame 9 | 10 | from .connectioninfo import ConnectionInfo 11 | from .sqlqueryexecutor import execute_query, execute_raw_query 12 | from .sqlbuilder import SpeesBuilder, SpeesBuilderFromFunction, StoredProcedureBuilder, \ 13 | ExecuteStoredProcedureBuilder, DropStoredProcedureBuilder 14 | from .sqlbuilder import StoredProcedureBuilderFromFunction 15 | from .sqlbuilder import RETURN_COLUMN_NAME, STDOUT_COLUMN_NAME, STDERR_COLUMN_NAME 16 | 17 | 18 | class SQLPythonExecutor: 19 | 20 | def __init__(self, connection_info: ConnectionInfo, language_name: str = "Python"): 21 | """Initialize a PythonExecutor to execute functions or queries in SQL Server. 22 | 23 | :param connection_info: The ConnectionInfo object that holds the connection string and other information. 24 | :param language_name: The name of the language to be executed in sp_execute_external_script, if using EXTERNAL LANGUAGE. 25 | """ 26 | self._connection_info = connection_info 27 | self._language_name = language_name 28 | 29 | def execute_function_in_sql(self, 30 | func: Callable, *args, 31 | input_data_query: str = "", 32 | **kwargs): 33 | """Execute a function in SQL Server. 34 | 35 | :param func: function to execute_function_in_sql. NOTE: This function is shipped to SQL as text. 36 | Functions should be self contained and import statements should be inline. 37 | :param args: positional args to pass to function to execute_function_in_sql. 38 | :param input_data_query: sql query to fill the first argument of the function. The argument gets the result of 39 | the query as a pandas DataFrame (uses the @input_data_1 parameter in sp_execute_external_script) 40 | :param kwargs: keyword arguments to pass to function to execute_function_in_sql. 41 | :return: value returned by func 42 | 43 | >>> from sqlmlutils import ConnectionInfo, SQLPythonExecutor 44 | >>> 45 | >>> def foo(val1, val2): 46 | >>> import math 47 | >>> print(val1) 48 | >>> return [math.cos(val2), math.cos(val2)] 49 | >>> 50 | >>> sqlpy = SQLPythonExecutor(ConnectionInfo("localhost", database="AirlineTestDB")) 51 | >>> ret = sqlpy.execute_function_in_sql(foo, val1="blah", val2=5) 52 | blah 53 | >>> print(ret) 54 | [0.28366218546322625, 0.28366218546322625] 55 | """ 56 | df, _ = execute_query(SpeesBuilderFromFunction(func, 57 | self._language_name, 58 | input_data_query, 59 | *args, 60 | **kwargs), 61 | self._connection_info) 62 | 63 | results, output, error = self._get_results(df) 64 | 65 | if output is not None: 66 | print(output) 67 | if error is not None: 68 | print(error, file=sys.stderr) 69 | return results 70 | 71 | def execute_script_in_sql(self, 72 | path_to_script: str, 73 | input_data_query: str = ""): 74 | """Execute a script in SQL Server. 75 | 76 | :param path_to_script: file path to Python script to execute. 77 | :param input_data_query: sql query to fill InputDataSet global variable with. 78 | (@input_data_1 parameter in sp_execute_external_script) 79 | :return: None 80 | 81 | """ 82 | try: 83 | with open(path_to_script, 'r') as script_file: 84 | content = script_file.read() 85 | except FileNotFoundError: 86 | raise FileNotFoundError("File does not exist!") 87 | execute_query(SpeesBuilder(content, input_data_query=input_data_query, language_name=self._language_name), connection=self._connection_info) 88 | 89 | def execute_sql_query(self, 90 | sql_query: str, 91 | params = ()): 92 | """Execute a sql query in SQL Server. 93 | 94 | :param sql_query: the sql query to execute in the server 95 | :return: table returned by the sql_query 96 | """ 97 | df, _ = execute_raw_query(conn=self._connection_info, query=sql_query, params=params) 98 | return df 99 | 100 | def create_sproc_from_function(self, name: str, func: Callable, 101 | input_params: dict = None, output_params: dict = None): 102 | """Create a SQL Server stored procedure based on a Python function. 103 | NOTE: Type annotations are needed either in the function definition or in the input_params dictionary 104 | WARNING: Output parameters can be used when creating the stored procedure, but Stored Procedures with 105 | output parameters other than a single DataFrame cannot be executed with sqlmlutils 106 | 107 | :param name: name of stored procedure. 108 | :param func: function used to define stored procedure. parameters to the function are used to define parameters 109 | to the stored procedure. type annotations of the parameters are used to infer SQL types of parameters to the 110 | stored procedure. currently supported type annotations are "str", "int", "float", and "DataFrame". 111 | :param input_params: optional dictionary of type annotations for each argument to func; 112 | if func has type annotations this is not necessary. If both are provided, they must match 113 | :param output_params optional dictionary of type annotations for each output parameter 114 | :return: True if creation succeeded 115 | 116 | >>> from sqlmlutils import ConnectionInfo, SQLPythonExecutor 117 | >>> 118 | >>> def foo(val1: int, val2: str): 119 | >>> from pandas import DataFrame 120 | >>> print(val2) 121 | >>> df = DataFrame() 122 | >>> df["col1"] = [val1, val1, val1] 123 | >>> return df 124 | >>> 125 | >>> sqlpy = SQLPythonExecutor(ConnectionInfo("localhost", database="AutoRegressTestDB")) 126 | >>> sqlpy.create_sproc_from_function("MyStoredProcedure", foo, with_results_set=True) 127 | >>> 128 | >>> # You can execute_function_in_sql the procedure in the usual way from sql: exec MyStoredProcedure 5, 'bar' 129 | >>> # You can also call the stored procedure from Python 130 | >>> ret = sqlpy.execute_sproc(name="MyStoredProcedure", val1=5, val2="bar") 131 | >>> sqlpy.drop_sproc(name="MyStoredProcedure") 132 | 133 | """ 134 | if input_params is None: 135 | input_params = {} 136 | if output_params is None: 137 | output_params = {} 138 | 139 | # We modify input_params/output_params because we add stdout and stderr as params. 140 | # We copy here to avoid modifying the underlying contents. 141 | # 142 | in_copy = input_params.copy() if input_params is not None else None 143 | out_copy = output_params.copy() if output_params is not None else None 144 | 145 | # Save the stored procedure in database 146 | execute_query(StoredProcedureBuilderFromFunction(name=name, 147 | func=func, 148 | input_params=in_copy, 149 | output_params=out_copy, 150 | language_name=self._language_name), 151 | self._connection_info) 152 | return True 153 | 154 | def create_sproc_from_script(self, name: str, path_to_script: str, 155 | input_params: dict = None, output_params: dict = None): 156 | """Create a SQL Server stored procedure based on a Python script 157 | 158 | :param name: name of stored procedure. 159 | :param path_to_script: file path to Python script to create a sproc from. 160 | :param input_params: optional dictionary of type annotations for inputs in the script 161 | :param output_params optional dictionary of type annotations for each output variable 162 | :return: True if creation succeeded 163 | 164 | >>> from sqlmlutils import ConnectionInfo, SQLPythonExecutor 165 | >>> 166 | >>> 167 | >>> sqlpy = SQLPythonExecutor(ConnectionInfo("localhost", database="AutoRegressTestDB")) 168 | >>> sqlpy.create_sproc_from_script(name="script_sproc", path_to_script="path/to/script") 169 | >>> 170 | >>> # This will execute the script in sql; with no inputs or outputs it will just run and return nothing 171 | >>> sqlpy.execute_sproc(name="script_sproc") 172 | >>> sqlpy.drop_sproc(name="script_sproc") 173 | 174 | """ 175 | if input_params is None: 176 | input_params = {} 177 | if output_params is None: 178 | output_params = {} 179 | # Save the stored procedure in database 180 | try: 181 | with open(path_to_script, 'r') as script_file: 182 | content = script_file.read() 183 | print("File does exist, using " + path_to_script) 184 | except FileNotFoundError: 185 | raise FileNotFoundError("File does not exist!") 186 | 187 | # We modify input_params/output_params because we add stdout and stderr as params. 188 | # We copy here to avoid modifying the underlying contents. 189 | # 190 | in_copy = input_params.copy() if input_params is not None else None 191 | out_copy = output_params.copy() if output_params is not None else None 192 | 193 | execute_query(StoredProcedureBuilder(name=name, 194 | script=content, 195 | input_params=in_copy, 196 | output_params=out_copy, 197 | language_name=self._language_name), 198 | self._connection_info) 199 | return True 200 | 201 | def check_sproc(self, name: str) -> bool: 202 | """Check to see if a SQL Server stored procedure exists in the database. 203 | 204 | >>> from sqlmlutils import ConnectionInfo, SQLPythonExecutor 205 | >>> 206 | >>> sqlpy = SQLPythonExecutor(ConnectionInfo("localhost", database="AutoRegressTestDB")) 207 | >>> if sqlpy.check_sproc("MyStoredProcedure"): 208 | >>> print("MyStoredProcedure exists") 209 | >>> else: 210 | >>> print("MyStoredProcedure does not exist") 211 | 212 | :param name: name of stored procedure. 213 | :return: boolean whether the Stored Procedure exists in the database 214 | """ 215 | check_query = "SELECT OBJECT_ID (?, N'P')" 216 | rows = execute_raw_query(conn=self._connection_info, query=check_query, params=name)[0] 217 | return rows.loc[0].iloc[0] is not None 218 | 219 | def execute_sproc(self, name: str, output_params: dict = None, **kwargs) -> DataFrame: 220 | """Call a stored procedure on a SQL Server database. 221 | WARNING: Output parameters can be used when creating the stored procedure, but Stored Procedures with 222 | output parameters other than a single DataFrame cannot be executed with sqlmlutils 223 | 224 | :param name: name of stored procedure 225 | :param output_params: output parameters (if any) for the stored procedure 226 | :param kwargs: keyword arguments to pass to stored procedure 227 | :return: tuple with a DataFrame representing the output data set of the stored procedure 228 | and a dictionary of output parameters 229 | """ 230 | 231 | # We modify output_params because we add stdout and stderr as output params. 232 | # We copy here to avoid modifying the underlying contents. 233 | # 234 | out_copy = output_params.copy() if output_params is not None else None 235 | return execute_query(ExecuteStoredProcedureBuilder(name, out_copy, **kwargs), 236 | self._connection_info) 237 | 238 | def drop_sproc(self, name: str): 239 | """Drop a SQL Server stored procedure if it exists. 240 | 241 | :param name: name of stored procedure. 242 | :return: None 243 | """ 244 | if self.check_sproc(name): 245 | execute_query(DropStoredProcedureBuilder(name), self._connection_info) 246 | 247 | @staticmethod 248 | def _get_results(df : DataFrame): 249 | hexstring = df[RETURN_COLUMN_NAME][0] 250 | stdout_string = df[STDOUT_COLUMN_NAME][0] 251 | stderr_string = df[STDERR_COLUMN_NAME][0] 252 | return dill.loads(bytes.fromhex(hexstring)), stdout_string, stderr_string 253 | --------------------------------------------------------------------------------