├── .gitattributes ├── .github └── workflows │ └── iris-contest-workflows.yml ├── .gitignore ├── .vscode ├── launch.json └── settings.json ├── LICENSE ├── README.md ├── docker-compose.yml ├── iris-aa-server ├── Dockerfile ├── README.md ├── data │ ├── Campaign.csv │ ├── HateSpeech.xml │ ├── IrisDataset.xml │ ├── Loader.xml │ ├── LoanPerformance.xml │ ├── NLPUtils.cls │ ├── appointment-noshows.csv │ ├── breast-cancer.csv │ ├── hate-speech.tar │ ├── loans.gof │ ├── readmission.csv │ └── titanic.csv ├── iris.script └── src │ └── Util │ └── Loader.cls ├── jupyter-samples ├── ED_visit_90_day.ipynb ├── biomedical-integratedml-PyODBC.ipynb ├── campaign-integratedml-jdbc.ipynb ├── intersystems-jdbc-3.1.0.jar ├── libirisodbcu35.so ├── odbc.ini ├── odbcinst.ini └── readmission-integratedml-jdbc.ipynb └── tf2-jupyter-jdbc ├── Dockerfile └── requirements.txt /.gitattributes: -------------------------------------------------------------------------------- 1 | *.cls linguist-language=ObjectScript 2 | *.mac linguist-language=ObjectScript 3 | *.int linguist-language=ObjectScript 4 | *.inc linguist-language=ObjectScript 5 | *.csp linguist-language=Html 6 | 7 | *.sh text eol=lf 8 | *.cls text eol=lf 9 | *.mac text eol=lf 10 | *.int text eol=lf 11 | *.inc text eol=lf 12 | Dockerfil* text eol=lf 13 | -------------------------------------------------------------------------------- /.github/workflows/iris-contest-workflows.yml: -------------------------------------------------------------------------------- 1 | name: objectscriptquality 2 | on: push 3 | 4 | jobs: 5 | linux: 6 | name: Linux build 7 | runs-on: ubuntu-latest 8 | 9 | steps: 10 | - name: Execute ObjectScript Quality Analysis 11 | run: wget https://raw.githubusercontent.com/litesolutions/objectscriptquality-jenkins-integration/master/iris-community-hook.sh && sh ./iris-community-hook.sh 12 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .ipynb_checkpoints 2 | -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "0.2.0", 3 | "configurations": [ 4 | { 5 | "type": "objectscript", 6 | "request": "launch", 7 | "name": "ObjectScript Debug Class", 8 | "program": "##class(PackageSample.ObjectScript).Test()", 9 | }, 10 | { 11 | "type": "objectscript", 12 | "request": "attach", 13 | "name": "ObjectScript Attach", 14 | "processId": "${command:PickProcess}", 15 | "system": true 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "files.associations": { 3 | 4 | "iris.script": "objectscript", 5 | }, 6 | "objectscript.conn" :{ 7 | "ns": "USER", 8 | "username": "superuser", 9 | "active": true, 10 | "docker-compose": { 11 | "service": "irisimlsvr", 12 | "internalPort": 52773 13 | }, 14 | "links": { 15 | "IRIS IntegratedML Jupyter": "http://localhost:8896/tree", 16 | "Webterminal": "http://localhost:8092/terminal/" 17 | } 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 InterSystems Developer Community 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # integratedml-demo-template 2 | This is a template for IntegratedML - InterSystems Github repository. 3 | 4 | This repository comes with a few example Jupyter notebooks (http://jupyter.org) which demonstrate how to use IntegratedML in InterSystems IRIS Community Edition (Advanced Analytics including IntegratedML) in a docker container. 5 | 6 | ## Contents 7 | * [What is IntegratedML?](#what-is-integratedml) 8 | * [What's inside this template](#whats-inside-this-template) 9 | * [Pre-configured environment, and sample data](#pre-configured-environment-and-sample-data) 10 | * [Sample notebooks to get you started](#sample-notebooks-to-get-you-started) 11 | * [Demo environment topology](#demo-environment-topology) 12 | * [Prerequisites](#prerequisites) 13 | * [Tested environments](#tested-environments) 14 | * [Installation](#installation) 15 | * [How to develop your IntegragedML solution with the IntegratedML Template Repo](#how-to-develop-your-integragedml-solution-with-the-integratedml-template-repository) 16 | * [Use this template](#use-this-template) 17 | * [Checkout the repo](#checkout-the-repo) 18 | * [Start developing](#start-developing) 19 | * [How to Import data into InterSystems IRIS](#how-to-import-data-into-intersystems-iris) 20 | * [Importing data from CSV file](#importing-data-from-csv-file) 21 | * [Importing data from CSV URL](#importing-data-from-csv-url) 22 | 23 | ## What is IntegratedML? 24 | IntegratedML is a feature of the InterSystems IRIS data platform that brings machine learning to SQL developers. 25 |

26 | 27 |

28 | 29 | IntegratedML is 30 | - all-SQL -- Build and train machine learning models using intuitive custom SQL commands, fully integrated within the InterSystems IRIS SQL processor 31 | - turnkey -- no packages or programming languages to learn, nothing to install 32 | - modular -- leverages "best of breed" open source and proprietary AutoML frameworks 33 | 34 | Learn more about InterSystems IRIS and IntegratedML at the [InterSystems Learning site](https://learning.intersystems.com/course/view.php?name=Learn%20IntegratedML) 35 | 36 | ## What's inside this template 37 | 38 | ### Pre-configured environment, and sample data 39 | This template creates a docker environment (via "docker-compose up") of 2 pre-configured containers: 40 | 1. tf2jupyter: Jupyter+Tensorflow2.2(without GPU), with a few sample notebook files (in its Dockerfile) 41 | 2. irisimlsvr another one for an IRIS 2020.3 Community Edition, with pre-loaded sample data in USER namespace(see its [Dockerfile](iris-aa-server/Dockerfile) and [iris.script](iris-aa-server/iris.script) that is run at startup) 42 | 43 | ### Sample notebooks to get you started 44 | 4 sample notebook files -- by default this template starts Jupyter at http://localhost:8896/tree : 45 | - [campaign-integratedml-jdbc.ipynb](jupyter-samples/campaign-integratedml-jdbc.ipynb): A simple JDBC connection from tf2jupyter into a sample data table (Marketing Campaign data) within InterSystems IRIS's USER namespace, showing some use of IntegratedML including VALIDATE MODEL command usage. 46 | - [readmission-integratedml-jdbc.ipynb](jupyter-samples/readmission-integratedml-jdbc.ipynb): Demonstrates use of IntegratedML on a hospital readmission prediction dataset. 47 | - [biomedical-integratedml-PyODBC.ipynb](jupyter-samples/biomedical-integratedml-PyODBC.ipynb): Connection to InterSystems IRIS server over PyODBC, building and using an IntegratedML machine learning model, with a complex SQL query using the PREDICT() and PROBABILITY() IntegratedML SQL functions. 48 | - [ED_visit_90_day.ipynb](jupyter-samples/ED_visit_90_day.ipynb): Building and using an IntegratedML machine learning model to predict visits to Emergency Department, utilizing data from a Health Insight server, kindly provided by Joseph Cofone at Baystate Health. *NOTE: this notebook is not executable!* 49 | 50 | ## Demo environment topology 51 |

52 | 53 |

54 | 55 | ## Prerequisites 56 | Make sure you have [git](https://git-scm.com/book/en/v2/Getting-Started-Installing-Git) and [Docker desktop](https://www.docker.com/products/docker-desktop) installed. 57 | 58 | ## Tested environments 59 | This template is tested breifly on AWS Ubuntu, Mac OS, and Windows 10(using Docker Toolbox only). It should work on other Docker environment too - let us know if you encounter any issues. 60 | 61 | ## Installation 62 | 63 | Clone/git pull the repo into any local directory 64 | 65 | ``` 66 | $ git clone https://github.com/intersystems-community/integratedml-demo-template.git 67 | ``` 68 | 69 | Open a Docker terminal in this directory and run: 70 | 71 | ``` 72 | $ docker-compose build 73 | ``` 74 | 75 | 3. Run the IRIS container, and Jupyter notebook server images: 76 | 77 | ``` 78 | $ docker-compose up -d 79 | ``` 80 | 81 | 4. Open browser to access the notebooks 82 | 83 | ``` 84 | http://localhost:8896/tree 85 | ``` 86 | Note: use `docker-compose ps` to confirm tf2juyter's ports; make sure right localhost port is used if over SSL tunneling to remotehost) 87 | 88 | 5. Examine the test data with webterminal 89 | Open terminal with: SuperUser / SYS credentials 90 | ``` 91 | http://localhost:8092/terminal/ 92 | ``` 93 | Enter **/sql** mode and make SQL queries to examine data in IRIS. 94 | Screenshot 2023-10-28 at 12 08 44 AM 95 | 96 | # How to develop your IntegragedML solution with the IntegratedML Template Repository 97 | ## Use this template 98 | Click the button "Use this template" on Github to create a new repository which will be the copy of this one. 99 | 100 | ## Checkout the repo 101 | Clone your new repo to a local folder. 102 | 103 | ## Start developing 104 | Install [VSCode](https://code.visualstudio.com/), [Docker Desctop](https://marketplace.visualstudio.com/items?itemName=ms-azuretools.vscode-docker) and [ObjectScript](https://marketplace.visualstudio.com/items?itemName=daimor.vscode-objectscript) plugin and open the folder in VSCode. 105 | 106 | Import your data as listed below, rebuild containers to let the data be imported, and use IntegratedML via SQL tools, as described in Jupyter notebooks. 107 | 108 | 109 | # How to Import data into InterSystems IRIS 110 | ## Importing data from CSV file 111 | 1. Add csv file into the repository, e.g. like [this titanic.csv](https://github.com/intersystems-community/integratedml-demo-template/blob/master/iris-aa-server/data/titanic.csv) 112 | 2. Introduce an import data call into your IRIS initalisation script. 113 | This is an [example line to import titanic.csv](https://github.com/intersystems-community/integratedml-demo-template/blob/0db187b7fd127ff5432b68617bca7cfdadaaed2b/iris-aa-server/iris.script#L13) into IRIS Titanic.Passenger class along with data. 114 | 3. Query the data from any SQL tool, web terminal or from InterSystems ObjectScript with: 115 | ``` 116 | SELECT * From Titanic.Passenger 117 | ``` 118 | ## Importing data from CSV URL 119 | If your file is accessible remotely, you can import it as follows: 120 | 1. Add the import CSV from URL line into [iris.script](). 121 | Here is an example line to [import countries.csv data from URL](https://github.com/intersystems-community/integratedml-demo-template/blob/7feaffef0a47c7c46cc683d89bdbaedbce48071c/iris-aa-server/iris.script#L17) 122 | 2. Rebuild the docker image (the easiest way is to rebuild via _docker-compose_ -- ```docker-compose build```). This will create User.Countries class and import data which you can query with SQL from Countries table: 123 | ``` 124 | SELECT * FROM COUNTRIES 125 | ``` 126 | 127 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | 3 | services: 4 | 5 | irisimlsvr: 6 | build: 7 | context: ./iris-aa-server 8 | image: iris-aa-server:2021.2 9 | hostname: irisimlsvr 10 | restart: on-failure 11 | ports: 12 | - 8091:1972 # 1972 is the superserver default port 13 | - 8092:52773 # 52773 is the webserver/management portal port 14 | volumes: 15 | - ./iris-shared:/shared 16 | tf2jupyter: # tensorflow with jupyter 17 | build: 18 | context: ./tf2-jupyter-jdbc 19 | image: tf2-jupyter-jdbc:1.0.0-iml-template 20 | hostname: tf2jupyter 21 | restart: on-failure 22 | ports: 23 | - 8896:8888 # 8888 is the docker jupyter service port 24 | - 6026:6006 # 6006 is the tensorboard port 25 | volumes: 26 | - ./jupyter-samples:/tf #shared volumes 27 | 28 | 29 | -------------------------------------------------------------------------------- /iris-aa-server/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG IMAGE=store/intersystems/iris-aa-community:2020.3.0AA.331.0 2 | ARG IMAGE=intersystemsdc/iris-aa-community:2020.3.0AA.331.0-zpm 3 | ARG IMAGE=intersystemsdc/iris-ml-community:2021.2.0.651.0-zpm 4 | ARG IMAGE=intersystemsdc/iris-ml-community 5 | FROM $IMAGE 6 | LABEL maintainer="Thomas Dyar " 7 | 8 | USER root 9 | RUN mkdir /data \ 10 | && chown irisowner /data 11 | USER irisowner 12 | 13 | # copy files 14 | COPY data /data 15 | COPY src /data/src 16 | COPY iris.script /tmp/iris.script 17 | 18 | # special extract treatment for hate-speech dataset 19 | # RUN mkdir /data/hate-speech/ \ 20 | # && tar -xf /data/hate-speech.tar -C /data/ 21 | 22 | # load demo stuff 23 | RUN iris start IRIS \ 24 | && iris session IRIS < /tmp/iris.script 25 | 26 | # RUN rm -r /data/* 27 | -------------------------------------------------------------------------------- /iris-aa-server/README.md: -------------------------------------------------------------------------------- 1 | # IRIS Advanced Analytics with IntegratedML Demo 2 | 3 | This folder contains a few simple datasets to demonstrate InterSystems IRIS IntegratedML (previously known as QuickML). The enclosed Dockerfile can be used separately from the rest of the integratedml-demo-template, if you do not want to use the Jupyter Notebook interface. 4 | 5 | ## How to build 6 | 7 | The included Dockerfile will pull the IRIS Advanced Analytics Community Edition (with IntegratedML) container image from the InterSystems public Docker repository, and set up a few simple datasets. 8 | 9 | ``` 10 | docker build --tag integratedml-demo . 11 | ``` 12 | 13 | To start your container, use the following command (or your favourite equivalent, as this one will drop your container after stopping) 14 | 15 | ``` 16 | docker run --rm -d -p 9091:51773 -p 9092:52773 --name integratedml integratedml-demo 17 | ``` 18 | 19 | The IRIS password is initialized as SYS, but you can get in directly through the following command, the SMP or connecting through a SQL client such as [DBeaver](https://dbeaver.io/) 20 | 21 | ``` 22 | docker exec -it integratedml iris sql IRIS 23 | ``` 24 | 25 | ## How to demo 26 | 27 | Using IntegratedML takes only three simple commands: 28 | 29 | ```sql 30 | CREATE MODEL Flowers PREDICTING (Species) FROM DataMining.IrisDataset; 31 | TRAIN MODEL Flowers FROM DataMining.IrisDataset; 32 | SELECT TOP 20 PREDICT(Flowers) AS PredictedSpecies, Species AS ActualSpecies FROM DataMining.IrisDataset; 33 | ``` 34 | 35 | Note that the semicolons at the end are for use in a multiline-style client such as DBeaver or SQuirreL and not part of regular IRIS SQL. See the [IntegratedML Syntax overview](https://usconfluence.iscinternal.com/display/TBD/IntegratedML+Syntax) if you want to be more creative. For example, you can add ```USING { "provider": "H2O" }``` to your CREATE or TRAIN commands to test the H2O provider instead of the default one. 36 | 37 | ### Included datasets 38 | 39 | These are broadly available datasets, but we may not have permission to _re_-distribute them, so keep this repo to yourself: 40 | - \[SQLUser.\]Campaign: as used in the campaign showcase in the [ML Toolkit](https://github.com/intersystems/MLToolkit). The target column to put your crosshairs on is RESPONSE 41 | - \[SQLUser.\]BreastCancer 42 | -------------------------------------------------------------------------------- /iris-aa-server/data/HateSpeech.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | %Persistent 5 | 65407,55491.800962 6 | 65407,46597.061384 7 | 8 | 9 | %String 10 | 11 | 12 | 13 | 14 | %String 15 | 16 | 17 | 18 | %String 19 | 20 | 21 | 22 | %String 23 | 24 | 25 | 26 | %Integer 27 | 28 | 29 | 30 | 34 | 35 | 36 | 37 | %String 38 | 39 | 40 | 41 | %String 42 | 43 | 44 | 45 | 1 46 | pDir:%String="C:\Users\bdeboe\Documents\GitHub\hate-speech-dataset\" 47 | %Status 48 | WC") 62 | set tRow = ..%New() 63 | set tRow.FileName = $piece(tLine,",",1)_".txt" 64 | set tRow.UserId = $piece(tLine,",",2) 65 | set tRow.SubforumId = $piece(tLine,",",3) 66 | set tRow.ContextNeeded = $piece(tLine,",",4) 67 | set tRow.Label = $piece(tLine,",",5) 68 | 69 | set tCommentFile = ##class(%Stream.FileCharacter).%New() 70 | set tSC = tCommentFile.LinkToFile(pDir_"all_files"_tSeparator_tRow.FileName) 71 | quit:$$$ISERR(tSC) 72 | set tRow.Comment = tCommentFile.Read() 73 | 74 | if ##class(%File).Exists(pDir_"sampled_train"_tSeparator_tRow.FileName) { 75 | set tRow.Sample = "train" 76 | } elseif ##class(%File).Exists(pDir_"sampled_test"_tSeparator_tRow.FileName) { 77 | set tRow.Sample = "test" 78 | } 79 | 80 | set tSC = tRow.%Save() 81 | quit:$$$ISERR(tSC) 82 | } 83 | 84 | } catch (ex) { 85 | set tSC = ex.AsStatus() 86 | } 87 | quit tSC 88 | ]]> 89 | 90 | 91 | 92 | %Storage.Persistent 93 | ^NLP.HateSpeechD 94 | HateSpeechDefaultData 95 | ^NLP.HateSpeechD 96 | ^NLP.HateSpeechI 97 | ^NLP.HateSpeechS 98 | 99 | 100 | %%CLASSNAME 101 | 102 | 103 | Comment 104 | 105 | 106 | FileName 107 | 108 | 109 | UserId 110 | 111 | 112 | SubforumId 113 | 114 | 115 | ContextNeeded 116 | 117 | 118 | Label 119 | 120 | 121 | Sample 122 | 123 | 124 | 125 | 126 | 127 | -------------------------------------------------------------------------------- /iris-aa-server/data/IrisDataset.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | The data set contains 3 classes of 50 instances each, where each class refers to a type of iris plant. One class is linearly separable from the other 2; the latter are NOT linearly separable from each other. Each record has 5 attributes: 8 |
    9 |
  1. sepal length in cm 10 |
  2. sepal width in cm 11 |
  3. petal length in cm 12 |
  4. petal width in cm 13 |
  5. class (species): 14 |
      15 |
    • Iris Setosa 16 |
    • Iris Versicolour 17 |
    • Iris Virginica 18 |
    19 |
20 |

21 | The dataset is taken from UCI Machine Learning Repository [http://archive.ics.uci.edu/ml]. Irvine, CA: University of California, School of Information and Computer Science by Frank, A. & Asuncion, A. (2010).]]> 22 | %Persistent 23 | 64926,50261.117 24 | 62312,41576.508001 25 | 26 | 27 | %Double 28 | 29 | 30 | 31 | %Double 32 | 33 | 34 | 35 | %Double 36 | 37 | 38 | 39 | %Double 40 | 41 | 42 | 43 | %String 44 | 45 | 46 | 47 | 1 48 | %Status 49 | " 71 | 72 | set count = $i(count) 73 | 74 | set flower = ..%New() 75 | set flower.SepalLength = $p(line,",",1) 76 | set flower.SepalWidth = $p(line,",",2) 77 | set flower.PetalLength = $p(line,",",3) 78 | set flower.PetalWidth = $p(line,",",4) 79 | set flower.Species = $p(line,",",5) 80 | 81 | set sc = flower.%Save() 82 | Quit:$$$ISERR(sc) 83 | } 84 | 85 | } Catch (ex) { 86 | set sc = ex.AsStatus() 87 | } 88 | 89 | if ($$$ISERR(sc)) { 90 | do $system.OBJ.DisplayError(sc) 91 | } 92 | 93 | Quit sc 94 | ]]> 95 | 96 | 97 | 98 | 100 | 252 | 253 | ]]> 254 | 255 | 256 | 257 | %Storage.Persistent 258 | ^DataMining.IrisDatasetD 259 | IrisDatasetDefaultData 260 | ^DataMining.IrisDatasetD 261 | ^DataMining.IrisDatasetI 262 | ^DataMining.IrisDatasetS 263 | 264 | listnode 265 | 266 | 267 | %%CLASSNAME 268 | 269 | 270 | SepalLength 271 | 272 | 273 | SepalWidth 274 | 275 | 276 | PetalLength 277 | 278 | 279 | PetalWidth 280 | 281 | 282 | Species 283 | 284 | 285 | 286 | 287 | 288 | 289 | 290 | DataMining.IrisDataset.]]> 292 | 1 293 | %DeepSee.PMML.Definition 294 | 65204,51678.214 295 | 62888,42919.529995 296 | 297 | 298 | http://www.intersystems.com/deepsee/pmml 299 | 301 |

302 | 303 | 03/11/2013 11:54:41 304 |
305 | 306 | 307 | 308 | 309 | 310 | 311 | 312 | 313 | 314 | 315 | SELECT ID, PetalLength, PetalWidth, SepalLength, SepalWidth, Species FROM DataMining.IrisDataset 316 | 317 | 318 | 319 | 320 | 321 | 322 | 323 | 324 | 325 | 326 | 327 | 328 | 329 | 330 | 331 | 332 | 333 | 334 | 335 | 336 | 337 | 338 | 339 | 340 | 341 | 342 | 343 | 344 | 345 | 346 | 347 | 348 | 349 | 350 | 351 | 352 | 353 | 354 | 355 | 356 | 357 | 358 | 359 | 360 | 361 | 362 | 363 | 364 | 365 | 366 | 367 | 368 | 369 | 370 | 371 | 372 | 373 | 374 | 375 | 376 | 377 | 378 | 379 | 380 | 381 | 382 | 383 | 384 | 385 | 386 | 387 | 388 | 389 | 390 | 391 | 392 | 393 | 394 | 395 | 396 | 397 | 398 | 399 | 400 | 402 | 403 | 404 | 405 | 406 | 407 | 408 | 409 | 410 | 411 | 412 | 413 | 414 | 415 | 416 | 417 | 418 | 419 | 420 | 421 | 422 | 423 | 424 | 425 | 426 | 427 | 428 | 429 | 430 | 431 | 432 | 433 | 434 | 435 | 436 | 437 | 438 | 439 | 440 | 441 | 442 | 443 | ]]> 444 | 445 |
446 |
447 | -------------------------------------------------------------------------------- /iris-aa-server/data/Loader.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 65330,70445.358752 5 | 65330,67823.562348 6 | 7 | 8 | 1 9 | pFile:%String,pClass:%String,pSeparator:%String="," 10 | %Status 11 | WC") 25 | for i = 1:1:$l(tHeader,pSeparator) { 26 | set h = $p(tHeader,pSeparator,i) 27 | quit:h="" 28 | set tColumns($i(tColumns))=tProps($$$UPPER($tr(h,"_"))), 29 | tTypes(tColumns) = tPropTypes($$$UPPER($tr(h,"_"))) 30 | } 31 | 32 | while 'tFile.AtEnd { 33 | set tLine = $zstrip(tFile.ReadLine(),"<>WC") 34 | continue:tLine="" 35 | 36 | set tObj = $classmethod(pClass,"%New") 37 | for i=1:1:tColumns { 38 | set tValue = $piece(tLine,pSeparator,i) 39 | set:tTypes(i)="%Library.Date" tValue = $zdateh($tr(tValue,"/","-"),3) 40 | set $property(tObj, tColumns(i)) = tValue 41 | } 42 | set tSC = tObj.%Save() 43 | quit:$$$ISERR(tSC) 44 | set c = $i(c) 45 | } 46 | quit:$$$ISERR(tSC) 47 | 48 | write !,"Read ",c," records for table ",pClass 49 | 50 | } catch (ex) { 51 | set tSC = ex.AsStatus() 52 | } 53 | quit tSC 54 | ]]> 55 | 56 | 57 | 58 | -------------------------------------------------------------------------------- /iris-aa-server/data/LoanPerformance.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | %Persistent 5 | 65329,33578.231722 6 | 65268,55574.517277 7 | 8 | 9 | %Integer 10 | 11 | 12 | 13 | %Integer 14 | 15 | 16 | 17 | %Double 18 | 19 | 20 | 21 | %Integer 22 | 23 | 24 | 25 | %Integer 26 | 27 | 28 | 29 | %Integer 30 | 31 | 32 | 33 | %Integer 34 | 35 | 36 | 37 | %Date 38 | 39 | 40 | 41 | %String 42 | 43 | 44 | 45 | %Date 46 | 47 | 48 | 49 | %Integer 50 | 51 | 52 | 53 | %Integer 54 | 55 | 56 | 57 | %Boolean 58 | 59 | 60 | 61 | %Boolean 62 | 63 | 64 | 65 | %Boolean 66 | 67 | 68 | 69 | %Boolean 70 | 71 | 72 | 73 | %Boolean 74 | 75 | 76 | 77 | %Boolean 78 | 79 | 80 | 81 | %Integer 82 | 83 | 84 | 85 | %String 86 | 87 | 88 | 89 | 90 | %Integer 91 | 92 | 93 | 94 | %Integer 95 | 96 | 97 | 98 | %Integer 99 | 100 | 101 | 102 | %Integer 103 | 104 | 105 | 106 | %Integer 107 | 108 | 109 | 110 | %Integer 111 | 112 | 113 | 114 | %Integer 115 | 116 | 117 | 118 | %Integer 119 | 120 | 121 | 122 | %Integer 123 | 124 | 125 | 126 | %Integer 127 | 128 | 129 | 130 | %Integer 131 | 132 | 133 | 134 | %Integer 135 | 136 | 137 | 138 | %Integer 139 | 140 | 141 | 142 | %Integer 143 | 144 | 145 | 146 | %Integer 147 | 148 | 149 | 150 | %Integer 151 | 152 | 153 | 154 | %Integer 155 | 156 | 157 | 158 | %Integer 159 | 160 | 161 | 162 | %Integer 163 | 164 | 165 | 166 | %Boolean 167 | 168 | 169 | 170 | bitmap 171 | LoanDefault 172 | 173 | 174 | 175 | AssetCost 176 | 177 | 178 | 179 | 1 180 | csv:%String 181 | 194 | 195 | 196 | 197 | 1 198 | line:%String 199 | 245 | 246 | 247 | 248 | 1 249 | str 250 | %Integer 251 | 257 | 258 | 259 | 260 | 1 261 | str 262 | %Date 263 | 270 | 271 | 272 | 273 | %Storage.Persistent 274 | ^User.LoanPerformanceD 275 | LoanPerformanceDefaultData 276 | ^User.LoanPerformanceD 277 | ^User.LoanPerformanceI 278 | ^User.LoanPerformanceS 279 | sequence 280 | 281 | 282 | %%CLASSNAME 283 | 284 | 285 | DisbursedAmount 286 | 287 | 288 | AssetCost 289 | 290 | 291 | LTV 292 | 293 | 294 | BranchId 295 | 296 | 297 | SupplierId 298 | 299 | 300 | ManufacturerId 301 | 302 | 303 | CurrentPincodeId 304 | 305 | 306 | DateOfBirth 307 | 308 | 309 | EmploymentType 310 | 311 | 312 | DisbursalDate 313 | 314 | 315 | StateId 316 | 317 | 318 | EmployeeCodeId 319 | 320 | 321 | MobileNumberAvailable 322 | 323 | 324 | Aadhar 325 | 326 | 327 | PAN 328 | 329 | 330 | Voter 331 | 332 | 333 | Driving 334 | 335 | 336 | Passport 337 | 338 | 339 | CNSScore 340 | 341 | 342 | PrimaryAccounts 343 | 344 | 345 | PrimaryAccountsActive 346 | 347 | 348 | PrimaryAccountsOverdue 349 | 350 | 351 | PrimaryCurrentBalance 352 | 353 | 354 | PrimarySanctionedAmount 355 | 356 | 357 | PrimaryDisbursedAmount 358 | 359 | 360 | SecondaryAccounts 361 | 362 | 363 | SecondaryAccoutsActive 364 | 365 | 366 | SecondaryAccountsOverdue 367 | 368 | 369 | SecondaryCurrentBalance 370 | 371 | 372 | SecondarySanctionedAmount 373 | 374 | 375 | SecondaryDisbursedAmount 376 | 377 | 378 | PrimaryInstallmentAmount 379 | 380 | 381 | SecondaryInstallmentAmount 382 | 383 | 384 | NewAccountsLastSixMonths 385 | 386 | 387 | DelinquentAccountsLastSixMonths 388 | 389 | 390 | AverageAccountAgeMonths 391 | 392 | 393 | CreditHistoryLengthMonths 394 | 395 | 396 | Inquiries 397 | 398 | 399 | LoanDefault 400 | 401 | 402 | CNSScoreDescription 403 | 404 | 405 | SecondaryAccountsActive 406 | 407 | 408 | 409 | 410 | 411 | -------------------------------------------------------------------------------- /iris-aa-server/data/NLPUtils.cls: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | %IKInclude 5 | 65407,36216.674515 6 | 7 | 8 | 1 9 | pViewName:%String 10 | %Status 11 | Drop_NLP 12 | 1 13 | 33 | 34 | 35 | 36 | 1 37 | 38 | %Status 39 | Build_NLP 40 | 1 41 | [](){}/\|.;,:=?"), 155 | tPropNames(tEntUniId)=tPropName, 156 | tEntUniIds = tEntUniIds_","_tEntUniId 157 | 158 | if $d(tPropIndex(tPropName)) { 159 | // duplicate! - ignore for now, we'll just sum up frequencies 160 | } else { 161 | do AddColumn(tTableClass,tPropName,"Frequency for '"_tEntValue_"' (ID:"_tEntUniId_")") 162 | } 163 | set tPropIndex(tPropName,tEntUniId)="" 164 | } 165 | set tSC = tTableClass.%Save() 166 | quit:$$$ISERR(tSC) 167 | 168 | set tSC = $system.OBJ.Compile(tTableClass.Name,"ck"_$s(pVerbose:"d",1:"-d")) 169 | quit:$$$ISERR(tSC) 170 | 171 | write:pVerbose !,"Populating feature table" 172 | set tSC = tStatement.%Prepare("SELECT EntUniId, FrequencyAsConcept+FrequencyAsRelation Frequency FROM %iKnow_Objects.EntityInSourceDetails WHERE DomainId = "_tDomainId_" AND SourceId = ? AND EntUniId IN ("_$e(tEntUniIds,2,*)_")") 173 | quit:$$$ISERR(tSC) 174 | set tRS1 = ##class(%SQL.Statement).%ExecDirect(,"SELECT SourceId, LocalReference FROM %iKnow_Objects.Source where domainid = "_tDomainId) 175 | while tRS1.%Next() { 176 | set tRow = $classmethod(tTableClass.Name,"%New") 177 | set tRow.NLPID = tRS1.%Get("LocalReference") 178 | set tRS2 = tStatement.%Execute(tRS1.%Get("SourceId")) 179 | while tRS2.%Next() { 180 | set $property(tRow, tPropNames(tRS2.%Get("EntUniId"))) = $property(tRow, tPropNames(tRS2.%Get("EntUniId"))) + tRS2.%Get("Frequency") 181 | } 182 | set tSC = tRow.%Save() 183 | quit:$$$ISERR(tSC) 184 | } 185 | quit:$$$ISERR(tSC) 186 | 187 | write:pVerbose ! 188 | 189 | } catch (ex) { 190 | set tSC = ex.AsStatus() 191 | } 192 | if $$$ISERR(tSC) && pVerbose { 193 | write ! 194 | do $system.OBJ.DisplayError(tSC) 195 | write ! 196 | } 197 | quit tSC 198 | 199 | AddColumn(cls, name, desc, type="%Integer") 200 | set prop = ##class(%Dictionary.PropertyDefinition).%New() 201 | set prop.parent = cls 202 | if '$zname(name,6) { 203 | set prop.SqlFieldName = name 204 | set prop.Name = $tr(name,"_- ""'+$=<>[](){}/\|.;,:=?") 205 | } else { 206 | set prop.Name = name 207 | } 208 | set prop.Description = desc 209 | set prop.Type = type 210 | quit 211 | 212 | AddParam(cls, name, value) 213 | set param = ##class(%Dictionary.ParameterDefinition).%New() 214 | set param.parent = cls 215 | set param.Name = name 216 | set param.Default = value 217 | ]]> 218 | 219 | 220 | 221 | -------------------------------------------------------------------------------- /iris-aa-server/data/hate-speech.tar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intersystems-community/integratedml-demo-template/f7ed655425bed01d5aad595a26554d8cd5757f4b/iris-aa-server/data/hate-speech.tar -------------------------------------------------------------------------------- /iris-aa-server/data/loans.gof: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intersystems-community/integratedml-demo-template/f7ed655425bed01d5aad595a26554d8cd5757f4b/iris-aa-server/data/loans.gof -------------------------------------------------------------------------------- /iris-aa-server/iris.script: -------------------------------------------------------------------------------- 1 | zn "%SYS" 2 | w ##class(Security.Users).UnExpireUserPasswords("*") 3 | 4 | zn "USER" 5 | zpm "install sslclient" 6 | zpm "install csvgen" 7 | zpm "install webterminal" 8 | 9 | // Load all the class definitions from the data directory 10 | do $system.OBJ.LoadDir("/data","cuk",,1) 11 | 12 | // Show how csvgen can load data from csv files locally or remotely 13 | do ##class(community.csvgen).Generate("/data/titanic.csv",",","Titanic.Passenger") 14 | do ##class(community.csvgen).GenerateFromURL("https://raw.githubusercontent.com/datasciencedojo/datasets/master/WorldDBTables/CountryTable.csv",",","SQLUser.Countries") 15 | do ##class(community.csvgen).Generate("/data/readmission.csv",",","Patient.Readmission") 16 | do ##class(community.csvgen).Generate("/data/Campaign.csv",";","Marketing.Campaign") 17 | do ##class(community.csvgen).Generate("/data/breast-cancer.csv",",","Biomedical.BreastCancer") 18 | 19 | // Load globals and build indices for the LoanPerformance table 20 | do $system.OBJ.Load("/data/loans.gof") 21 | do ##class(User.LoanPerformance).%BuildIndices() 22 | 23 | // do $system.OBJ.Load("/data/Loader.xml","cf") 24 | 25 | 26 | halt 27 | -------------------------------------------------------------------------------- /iris-aa-server/src/Util/Loader.cls: -------------------------------------------------------------------------------- 1 | Class Util.Loader 2 | { 3 | 4 | ClassMethod Load(pFile As %String, pClass As %String, pSeparator As %String = ",") As %Status 5 | { 6 | set tSC = $$$OK 7 | try { 8 | do $classmethod(pClass,"%KillExtent") 9 | set tClassDef = ##class(%Dictionary.ClassDefinition).%OpenId(pClass) 10 | for i=1:1:tClassDef.Properties.Count() { 11 | set tProp = tClassDef.Properties.GetAt(i) 12 | set tProps($$$UPPER(tProp.Name)) = tProp.Name 13 | set tPropTypes($$$UPPER(tProp.Name)) = tProp.Type 14 | } 15 | 16 | set tFile = ##class(%Stream.FileCharacter).%New() 17 | do tFile.LinkToFile(pFile) 18 | set tHeader = $zstrip(tFile.ReadLine(),"<>WC") 19 | for i = 1:1:$l(tHeader,pSeparator) { 20 | set h = $p(tHeader,pSeparator,i) 21 | quit:h="" 22 | set tColumns($i(tColumns))=tProps($$$UPPER($tr(h,"_"))), 23 | tTypes(tColumns) = tPropTypes($$$UPPER($tr(h,"_"))) 24 | } 25 | 26 | while 'tFile.AtEnd { 27 | set tLine = $zstrip(tFile.ReadLine(),"<>WC") 28 | continue:tLine="" 29 | 30 | set tObj = $classmethod(pClass,"%New") 31 | for i=1:1:tColumns { 32 | set tValue = $piece(tLine,pSeparator,i) 33 | set:tTypes(i)="%Library.Date" tValue = $zdateh($tr(tValue,"/","-"),3) 34 | set $property(tObj, tColumns(i)) = tValue 35 | } 36 | set tSC = tObj.%Save() 37 | quit:$$$ISERR(tSC) 38 | set c = $i(c) 39 | } 40 | quit:$$$ISERR(tSC) 41 | 42 | write !,"Read ",c," records for table ",pClass,! 43 | 44 | } catch (ex) { 45 | set tSC = ex.AsStatus() 46 | } 47 | quit tSC 48 | } 49 | 50 | } 51 | -------------------------------------------------------------------------------- /jupyter-samples/ED_visit_90_day.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Train an IntegratedML model on ED Readmit likelihood Dataset\n", 8 | "## Use JDBC to connect to InterSystems IRIS database\n", 9 | "**NOTE: This Notebook will not run as-is!**\n", 10 | "This Notebook demonstrates:\n", 11 | "- Using the JayDeBeApi Python library to connect to InterSystems IRIS\n", 12 | "- Creating views to segment data into training and test sets\n", 13 | "- Defining and training an IntegratedML model to predict ED Readmits in the next 90 days\n", 14 | "- Comparing the resulting model's predictions to data in the test set (that the model was not trained on)\n", 15 | "- Using the IntegratedML \"VALIDATE MODEL\" command to calculate accuracy metrics on the test set data" 16 | ] 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "metadata": {}, 21 | "source": [ 22 | "### 1. Get jdbc connection and cursor" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | <<<<<<< HEAD 28 | "execution_count": null, 29 | ======= 30 | "execution_count": 1, 31 | >>>>>>> 730f3d3b116a644bb0466c140bb03f80ba73e6c9 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "import pandas as pd" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | <<<<<<< HEAD 41 | "execution_count": null, 42 | ======= 43 | "execution_count": 2, 44 | >>>>>>> 730f3d3b116a644bb0466c140bb03f80ba73e6c9 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "%run -i '../Initializations/Conns.py'" 49 | ] 50 | }, 51 | { 52 | "cell_type": "markdown", 53 | "metadata": {}, 54 | "source": [ 55 | "### 2. Create and specify the source data table(s)" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | <<<<<<< HEAD 61 | "execution_count": null, 62 | ======= 63 | "execution_count": 3, 64 | >>>>>>> 730f3d3b116a644bb0466c140bb03f80ba73e6c9 65 | "metadata": {}, 66 | "outputs": [], 67 | "source": [ 68 | "#Use this block to create a starting data set that you can/will build upon.\n", 69 | "#NOTE: It is always useful to have a unique identifier in the data\n", 70 | "TargetTable = 'Data.EDEncsPredB90View'\n", 71 | "TrainTable = 'Data.EDEncsTraining'\n", 72 | "TestTable = 'Data.EDEncsTesting'" 73 | ] 74 | }, 75 | { 76 | "cell_type": "markdown", 77 | "metadata": {}, 78 | "source": [ 79 | "### 3. Review the data to ensure the Target variable and Independent variables are in good standing." 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | <<<<<<< HEAD 85 | "execution_count": null, 86 | "metadata": { 87 | "scrolled": true 88 | }, 89 | "outputs": [], 90 | ======= 91 | "execution_count": 4, 92 | "metadata": { 93 | "scrolled": true 94 | }, 95 | "outputs": [ 96 | { 97 | "name": "stdout", 98 | "output_type": "stream", 99 | "text": [ 100 | " ID DRID HPS Age Gend Mar InsRel PlanCode T30 T60 ... S180 S365 \\\n", 101 | "0 1 1 4.0 52.0 F M S L05 0.0 0.0 ... 53.0 87.0 \n", 102 | "1 2 2 2.0 48.0 F M 1 13947 0.0 0.0 ... 0.0 1.0 \n", 103 | "2 3 3 1.0 66.0 F M S W01 NaN NaN ... NaN NaN \n", 104 | "3 4 4 NaN 62.0 M D S S20 NaN NaN ... NaN NaN \n", 105 | "4 5 5 NaN 51.0 M S S W01 0.0 1.0 ... NaN NaN \n", 106 | "\n", 107 | " Gen dGen Med dMed Appt dAppt Rx dRx \n", 108 | "0 9.0 7.0 9.0 3.0 0.0 0.0 2.0 2.0 \n", 109 | "1 NaN NaN NaN NaN NaN NaN NaN NaN \n", 110 | "2 NaN NaN NaN NaN NaN NaN NaN NaN \n", 111 | "3 NaN NaN NaN NaN NaN NaN NaN NaN \n", 112 | "4 NaN NaN NaN NaN NaN NaN NaN NaN \n", 113 | "\n", 114 | "[5 rows x 75 columns]\n", 115 | "Index(['ID', 'DRID', 'HPS', 'Age', 'Gend', 'Mar', 'InsRel', 'PlanCode', 'T30',\n", 116 | " 'T60', 'T90', 'E30', 'E90', 'E180', 'E365', 'I180', 'I365', 'O30',\n", 117 | " 'O90', 'O180', 'O365', 'ObsHt', 'dObsHt', 'ObsWt', 'dObsWt', 'ObsBMI',\n", 118 | " 'dObsBMI', 'ObsBSA', 'dObsBSA', 'ObsTemp', 'dObsTemp', 'ObsPulse',\n", 119 | " 'dObsPulse', 'ObsBPS', 'dObsBPS', 'ObsBPD', 'dObsBPD', 'ObsO2',\n", 120 | " 'dObsO2', 'Labs', 'dLabs', 'Meds', 'dMeds', 'Rads', 'dRads', 'Vax',\n", 121 | " 'dVax', 'HypChol_E78', 'dHypChol_E78', 'TII_E11', 'dTII_E11',\n", 122 | " 'GenEnc_Z00', 'dGenEnc_Z00', 'Scrn_Z13', 'dScrn_Z13', 'Couns_Z71',\n", 123 | " 'dCouns_Z71', 'OWt_E66', 'dOWt_E66', 'HypThy_E03', 'dHypThy_E03',\n", 124 | " 'Scrn_Z12', 'dScrn_Z12', 'S30', 'S90', 'S180', 'S365', 'Gen', 'dGen',\n", 125 | " 'Med', 'dMed', 'Appt', 'dAppt', 'Rx', 'dRx'],\n", 126 | " dtype='object')\n" 127 | ] 128 | } 129 | ], 130 | >>>>>>> 730f3d3b116a644bb0466c140bb03f80ba73e6c9 131 | "source": [ 132 | "tKeep()\n", 133 | "import pandas as pd\n", 134 | "from IPython.display import display\n", 135 | "\n", 136 | <<<<<<< HEAD 137 | "df = pd.read_sql(\"select top 3 * from Data.PatientCostData\", iconn)\n", 138 | ======= 139 | "df = pd.read_sql(\"select top 5 * from Data.PatientCostData\", iconn)\n", 140 | >>>>>>> 730f3d3b116a644bb0466c140bb03f80ba73e6c9 141 | "print(df)\n", 142 | "print(df.columns)" 143 | ] 144 | }, 145 | { 146 | "cell_type": "markdown", 147 | "metadata": {}, 148 | "source": [ 149 | "### Drop and unwanted fields" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | <<<<<<< HEAD 155 | "execution_count": null, 156 | "metadata": {}, 157 | "outputs": [], 158 | ======= 159 | "execution_count": 5, 160 | "metadata": {}, 161 | "outputs": [ 162 | { 163 | "data": { 164 | "text/plain": [ 165 | "'ID,DRID,HPS,Age,Gend,Mar,InsRel,PlanCode,E30,E90,E180,E365,I180,I365,O30,O90,O180,O365,ObsHt,dObsHt,ObsWt,dObsWt,ObsBMI,dObsBMI,ObsBSA,dObsBSA,ObsTemp,dObsTemp,ObsPulse,dObsPulse,ObsBPS,dObsBPS,ObsBPD,dObsBPD,ObsO2,dObsO2,Labs,dLabs,Meds,dMeds,Rads,dRads,Vax,dVax,HypChol_E78,dHypChol_E78,TII_E11,dTII_E11,GenEnc_Z00,dGenEnc_Z00,Scrn_Z13,dScrn_Z13,Couns_Z71,dCouns_Z71,OWt_E66,dOWt_E66,HypThy_E03,dHypThy_E03,Scrn_Z12,dScrn_Z12,S30,S90,S180,S365,Gen,dGen,Med,dMed,Appt,dAppt,Rx,dRx'" 166 | ] 167 | }, 168 | "execution_count": 5, 169 | "metadata": {}, 170 | "output_type": "execute_result" 171 | } 172 | ], 173 | >>>>>>> 730f3d3b116a644bb0466c140bb03f80ba73e6c9 174 | "source": [ 175 | "df = df.drop(['T30', 'T60', 'T90'], axis = 1)\n", 176 | "Usable = str(list(df.columns)).replace(\"', '\", \",\")[2:-2]\n", 177 | "Usable" 178 | ] 179 | }, 180 | { 181 | "cell_type": "code", 182 | <<<<<<< HEAD 183 | "execution_count": null, 184 | "metadata": {}, 185 | "outputs": [], 186 | ======= 187 | "execution_count": 6, 188 | "metadata": {}, 189 | "outputs": [ 190 | { 191 | "data": { 192 | "text/html": [ 193 | "
\n", 194 | "\n", 207 | "\n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | "
Recs
0126633
\n", 221 | "
" 222 | ], 223 | "text/plain": [ 224 | " Recs\n", 225 | "0 126633" 226 | ] 227 | }, 228 | "metadata": {}, 229 | "output_type": "display_data" 230 | } 231 | ], 232 | >>>>>>> 730f3d3b116a644bb0466c140bb03f80ba73e6c9 233 | "source": [ 234 | "icurs.execute(' \\\n", 235 | " create or replace view %s as \\\n", 236 | " select case when t90 > 0 then 1 else 0 end as B90, %s \\\n", 237 | " from Data.PatientCostData' % (TargetTable, Usable))\n", 238 | "df1 = pd.read_sql('SELECT COUNT(*) as Recs FROM %s' % TargetTable, iconn)\n", 239 | "TargetVar = 'B90'\n", 240 | "display(df1)" 241 | ] 242 | }, 243 | { 244 | "cell_type": "code", 245 | <<<<<<< HEAD 246 | "execution_count": null, 247 | "metadata": {}, 248 | "outputs": [], 249 | ======= 250 | "execution_count": 7, 251 | "metadata": {}, 252 | "outputs": [ 253 | { 254 | "data": { 255 | "text/html": [ 256 | "
\n", 257 | "\n", 270 | "\n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | "
B90Recs
00120181
116452
\n", 291 | "
" 292 | ], 293 | "text/plain": [ 294 | " B90 Recs\n", 295 | "0 0 120181\n", 296 | "1 1 6452" 297 | ] 298 | }, 299 | "execution_count": 7, 300 | "metadata": {}, 301 | "output_type": "execute_result" 302 | } 303 | ], 304 | >>>>>>> 730f3d3b116a644bb0466c140bb03f80ba73e6c9 305 | "source": [ 306 | "Distro = pd.read_sql('select %s, count(*) as Recs from %s group by %s' % (TargetVar, TargetTable, TargetVar), iconn)\n", 307 | "Distro" 308 | ] 309 | }, 310 | { 311 | "cell_type": "markdown", 312 | "metadata": {}, 313 | "source": [ 314 | "### 4. Assess the probability of your target and sample accordingly into split training and testing datasets" 315 | ] 316 | }, 317 | { 318 | "cell_type": "code", 319 | <<<<<<< HEAD 320 | "execution_count": null, 321 | ======= 322 | "execution_count": 8, 323 | >>>>>>> 730f3d3b116a644bb0466c140bb03f80ba73e6c9 324 | "metadata": {}, 325 | "outputs": [], 326 | "source": [ 327 | "#we want to split the data into Training (80%) and Test (20%), ...\n", 328 | "# but also reduce the ratio of Negative (ED Enc = 0) to Positive\n", 329 | "Train = 0.8\n", 330 | "TVRatio = 2\n", 331 | "PT_List = pd.read_sql('select DRID, %s from %s order by %s, DRID' % (TargetVar, TargetTable, TargetVar), iconn)\n", 332 | "PT_List.index = PT_List['DRID']" 333 | ] 334 | }, 335 | { 336 | "cell_type": "code", 337 | <<<<<<< HEAD 338 | "execution_count": null, 339 | "metadata": { 340 | "scrolled": true 341 | }, 342 | "outputs": [], 343 | ======= 344 | "execution_count": 9, 345 | "metadata": { 346 | "scrolled": true 347 | }, 348 | "outputs": [ 349 | { 350 | "name": "stdout", 351 | "output_type": "stream", 352 | "text": [ 353 | " DRID\n", 354 | "B90 \n", 355 | "0 10323\n", 356 | "1 5161\n", 357 | " DRID\n", 358 | "B90 \n", 359 | "0 27787\n", 360 | "1 1291\n" 361 | ] 362 | } 363 | ], 364 | >>>>>>> 730f3d3b116a644bb0466c140bb03f80ba73e6c9 365 | "source": [ 366 | "#Create the lists, check the ratios, and create the \"In lists\":\n", 367 | "TrainList = PT_List[PT_List[TargetVar] == 0].sample(int(Distro['Recs'].loc[1]*TVRatio*Train)) \\\n", 368 | " .append(PT_List[PT_List[TargetVar] == 1].sample(int(Distro['Recs'].loc[1]*Train)))\n", 369 | "TrainList['Flag'] = 1\n", 370 | "TrainList.index = TrainList['DRID']\n", 371 | "print(TrainList.pivot_table(index = TargetVar, values = 'DRID', aggfunc = 'count'))\n", 372 | "#NOTE: It is IMPERATIVE that Test does NOT contain any Train data\n", 373 | "TestList = PT_List.join(TrainList['Flag'], how = 'left')\n", 374 | "TestList = TestList[(TestList['Flag'] != 1)]\n", 375 | "TestList = TestList[(TestList[TargetVar] == 1)].append(TestList[TestList[TargetVar] == 0].sample(int(len(TestList)*0.25)))\n", 376 | "print(TestList.pivot_table(index = TargetVar, values = 'DRID', aggfunc = 'count'))\n", 377 | "TrainIns = str(list(TrainList['DRID']))[1:-1]\n", 378 | "TestIns = str(list(TestList['DRID']))[1:-1]" 379 | ] 380 | }, 381 | { 382 | "cell_type": "code", 383 | <<<<<<< HEAD 384 | "execution_count": null, 385 | ======= 386 | "execution_count": 10, 387 | >>>>>>> 730f3d3b116a644bb0466c140bb03f80ba73e6c9 388 | "metadata": {}, 389 | "outputs": [], 390 | "source": [ 391 | "# Training set view\n", 392 | "icurs.execute(''' \\\n", 393 | " CREATE or replace VIEW %s AS \\\n", 394 | " SELECT * FROM %s \n", 395 | " WHERE DRID in (%s)''' \\\n", 396 | " % (TrainTable, TargetTable, TrainIns))\n", 397 | "# Prediction set\n", 398 | "icurs.execute(''' \\\n", 399 | " CREATE or replace VIEW %s AS \\\n", 400 | " SELECT * FROM %s \n", 401 | " WHERE DRID in (%s)''' \\\n", 402 | " % (TestTable, TargetTable, TestIns))" 403 | ] 404 | }, 405 | { 406 | "cell_type": "markdown", 407 | "metadata": {}, 408 | "source": [ 409 | "### 6. Create and Train an IntegratedML Model using default settings\n", 410 | "IntegratedML only needs a model name, the name of the column that is the target column to predict, and a table (or SELECT query to specify input columns." 411 | ] 412 | }, 413 | { 414 | "cell_type": "code", 415 | <<<<<<< HEAD 416 | "execution_count": null, 417 | ======= 418 | "execution_count": 11, 419 | >>>>>>> 730f3d3b116a644bb0466c140bb03f80ba73e6c9 420 | "metadata": {}, 421 | "outputs": [], 422 | "source": [ 423 | "try:\n", 424 | " icurs.execute(\"CREATE MODEL NewEncModel PREDICTING (%s) FROM %s\" % (TargetVar, TrainTable))\n", 425 | "except:\n", 426 | " icurs.execute(\"DROP MODEL NewEncModel\")\n", 427 | " icurs.execute(\"CREATE MODEL NewEncModel PREDICTING (%s) FROM %s\" % (TargetVar, TrainTable))" 428 | ] 429 | }, 430 | { 431 | "cell_type": "markdown", 432 | "metadata": {}, 433 | "source": [ 434 | "Now that the model is defined, you can TRAIN it, which invokes the AutoML machine learning procedure." 435 | ] 436 | }, 437 | { 438 | "cell_type": "code", 439 | <<<<<<< HEAD 440 | "execution_count": null, 441 | "metadata": {}, 442 | "outputs": [], 443 | ======= 444 | "execution_count": 15, 445 | "metadata": {}, 446 | "outputs": [ 447 | { 448 | "ename": "DatabaseError", 449 | "evalue": "java.sql.SQLException: [SQLCODE: <-400>:]\r\n[Location: ]\r\n[%msg: ]", 450 | "output_type": "error", 451 | "traceback": [ 452 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 453 | "\u001b[0;31mException\u001b[0m Traceback (most recent call last)", 454 | "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/_jpype.cpython-36m-x86_64-linux-gnu.so\u001b[0m in \u001b[0;36mcom.intersystems.jdbc.IRISPreparedStatement.execute\u001b[0;34m()\u001b[0m\n", 455 | "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/_jpype.cpython-36m-x86_64-linux-gnu.so\u001b[0m in \u001b[0;36mcom.intersystems.jdbc.IRISPreparedStatement.Update\u001b[0;34m()\u001b[0m\n", 456 | "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/_jpype.cpython-36m-x86_64-linux-gnu.so\u001b[0m in \u001b[0;36mcom.intersystems.jdbc.InStream.readMessage\u001b[0;34m()\u001b[0m\n", 457 | "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/_jpype.cpython-36m-x86_64-linux-gnu.so\u001b[0m in \u001b[0;36mcom.intersystems.jdbc.InStream.readMessage\u001b[0;34m()\u001b[0m\n", 458 | "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/_jpype.cpython-36m-x86_64-linux-gnu.so\u001b[0m in \u001b[0;36mcom.intersystems.jdbc.InStream.readMessage\u001b[0;34m()\u001b[0m\n", 459 | "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/_jpype.cpython-36m-x86_64-linux-gnu.so\u001b[0m in \u001b[0;36mcom.intersystems.jdbc.IRISConnection.processError\u001b[0;34m()\u001b[0m\n", 460 | "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/_jpype.cpython-36m-x86_64-linux-gnu.so\u001b[0m in \u001b[0;36mcom.intersystems.jdbc.IRISConnection.getServerError\u001b[0;34m()\u001b[0m\n", 461 | "\u001b[0;31mException\u001b[0m: Java Exception", 462 | "\nThe above exception was the direct cause of the following exception:\n", 463 | "\u001b[0;31mjava.sql.SQLException\u001b[0m Traceback (most recent call last)", 464 | "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/jaydebeapi/__init__.py\u001b[0m in \u001b[0;36mexecute\u001b[0;34m(self, operation, parameters)\u001b[0m\n\u001b[1;32m 533\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 534\u001b[0;31m \u001b[0mis_rs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_prep\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 535\u001b[0m \u001b[0;32mexcept\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 465 | "\u001b[0;31mjava.sql.SQLException\u001b[0m: java.sql.SQLException: [SQLCODE: <-400>:]\r\n[Location: ]\r\n[%msg: ]", 466 | "\nDuring handling of the above exception, another exception occurred:\n", 467 | "\u001b[0;31mDatabaseError\u001b[0m Traceback (most recent call last)", 468 | "\u001b[0;32m~/Initializations/Conns.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;31m#icurs.execute(\"TRAIN MODEL NewEncModel as NewEncModel_Auto\")\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0micurs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"set ml configuration DRCfg\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0micurs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"TRAIN MODEL NewEncModel as NewEncModel_DR\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 469 | "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/jaydebeapi/__init__.py\u001b[0m in \u001b[0;36mexecute\u001b[0;34m(self, operation, parameters)\u001b[0m\n\u001b[1;32m 534\u001b[0m \u001b[0mis_rs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_prep\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 535\u001b[0m \u001b[0;32mexcept\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 536\u001b[0;31m \u001b[0m_handle_sql_exception\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 537\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mis_rs\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 538\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_rs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_prep\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgetResultSet\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 470 | "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/jaydebeapi/__init__.py\u001b[0m in \u001b[0;36m_handle_sql_exception_jpype\u001b[0;34m()\u001b[0m\n\u001b[1;32m 163\u001b[0m \u001b[0mexc_type\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mInterfaceError\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 164\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 165\u001b[0;31m \u001b[0mreraise\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mexc_type\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mexc_info\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mexc_info\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 166\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 167\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_jdbc_connect_jpype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mjclassname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdriver_args\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mjars\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlibs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 471 | "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/jaydebeapi/__init__.py\u001b[0m in \u001b[0;36mreraise\u001b[0;34m(tp, value, tb)\u001b[0m\n\u001b[1;32m 55\u001b[0m \u001b[0mvalue\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtp\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 56\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mtb\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 57\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwith_traceback\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtb\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 58\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 59\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 472 | "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/jaydebeapi/__init__.py\u001b[0m in \u001b[0;36mexecute\u001b[0;34m(self, operation, parameters)\u001b[0m\n\u001b[1;32m 532\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_set_stmt_parms\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_prep\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mparameters\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 533\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 534\u001b[0;31m \u001b[0mis_rs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_prep\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 535\u001b[0m \u001b[0;32mexcept\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 536\u001b[0m \u001b[0m_handle_sql_exception\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 473 | "\u001b[0;31mDatabaseError\u001b[0m: java.sql.SQLException: [SQLCODE: <-400>:]\r\n[Location: ]\r\n[%msg: ]" 474 | ] 475 | } 476 | ], 477 | >>>>>>> 730f3d3b116a644bb0466c140bb03f80ba73e6c9 478 | "source": [ 479 | "icurs.execute(\"set ml configuration %AutoML\")\n", 480 | "icurs.execute(\"TRAIN MODEL NewEncModel as NewEncModel_Auto\")\n", 481 | "icurs.execute(\"set ml configuration DRCfg\")\n", 482 | "icurs.execute(\"TRAIN MODEL NewEncModel as NewEncModel_DR\")" 483 | ] 484 | }, 485 | { 486 | "cell_type": "markdown", 487 | "metadata": {}, 488 | "source": [ 489 | "Once that finishes, you can see some information about the model in the \"ML_TRAINED_MODELS\" table." 490 | ] 491 | }, 492 | { 493 | "cell_type": "code", 494 | <<<<<<< HEAD 495 | "execution_count": null, 496 | "metadata": {}, 497 | "outputs": [], 498 | ======= 499 | "execution_count": 16, 500 | "metadata": {}, 501 | "outputs": [ 502 | { 503 | "data": { 504 | "text/html": [ 505 | "
\n", 506 | "\n", 519 | "\n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | "
MODEL_NAMETRAINED_MODEL_NAMEPROVIDERTRAINED_TIMESTAMPMODEL_TYPEMODEL_INFO
0NoShowModelNoShowModel_t2AutoML2020-10-26 15:57:18.731000classificationModelType:Random Forest, Package:sklearn, Prob...
1NewEncModelTDNewEncModelAutoMLAutoML2020-10-27 01:46:39.459000classificationModelType:Logistic Regression, Package:sklearn...
\n", 552 | "
" 553 | ], 554 | "text/plain": [ 555 | " MODEL_NAME TRAINED_MODEL_NAME PROVIDER TRAINED_TIMESTAMP \\\n", 556 | "0 NoShowModel NoShowModel_t2 AutoML 2020-10-26 15:57:18.731000 \n", 557 | "1 NewEncModelTD NewEncModelAutoML AutoML 2020-10-27 01:46:39.459000 \n", 558 | "\n", 559 | " MODEL_TYPE MODEL_INFO \n", 560 | "0 classification ModelType:Random Forest, Package:sklearn, Prob... \n", 561 | "1 classification ModelType:Logistic Regression, Package:sklearn... " 562 | ] 563 | }, 564 | "execution_count": 16, 565 | "metadata": {}, 566 | "output_type": "execute_result" 567 | } 568 | ], 569 | >>>>>>> 730f3d3b116a644bb0466c140bb03f80ba73e6c9 570 | "source": [ 571 | "pd.read_sql(\"SELECT * FROM INFORMATION_SCHEMA.ML_TRAINED_MODELS\", iconn)" 572 | ] 573 | }, 574 | { 575 | "cell_type": "markdown", 576 | "metadata": {}, 577 | "source": [ 578 | "### 7. Compare model output to data it has not seen yet\n", 579 | "Now you can use SQL to SELECT data from another table, run the IntegratedML model on this new data, and see how well the predictions match the data!" 580 | ] 581 | }, 582 | { 583 | "cell_type": "code", 584 | <<<<<<< HEAD 585 | "execution_count": null, 586 | "metadata": {}, 587 | "outputs": [], 588 | "source": [ 589 | "TestSet = pd.read_sql('''\n", 590 | " SELECT PREDICT(NewEncModel use NewEncModel_Auto) AS PredictedEncs,\n", 591 | " case when B90 = 1 then 1 end AS ActualPos,\n", 592 | " case when B90 != 1 then 0 end AS ActualNeg\n", 593 | " FROM %s''' % (TestTable), iconn)" 594 | ======= 595 | "execution_count": 14, 596 | "metadata": {}, 597 | "outputs": [ 598 | { 599 | "ename": "DatabaseError", 600 | "evalue": "Execution failed on sql 'SELECT PREDICT(NewEncModel use NewEncModel_Auto) AS PredictedEncs, case when B90 = 1 then 1 end AS ActualPos, case when B90 != 1 then 0 end AS ActualNeg FROM Data.EDEncsTesting': java.sql.SQLException: [SQLCODE: <-181>:]\r\n[Location: ]\r\n[%msg: < Model 'NewEncModel_Auto' not found>]", 601 | "output_type": "error", 602 | "traceback": [ 603 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 604 | "\u001b[0;31mException\u001b[0m Traceback (most recent call last)", 605 | "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/_jpype.cpython-36m-x86_64-linux-gnu.so\u001b[0m in \u001b[0;36mcom.intersystems.jdbc.IRISConnection.prepareStatement\u001b[0;34m()\u001b[0m\n", 606 | "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/_jpype.cpython-36m-x86_64-linux-gnu.so\u001b[0m in \u001b[0;36mcom.intersystems.jdbc.IRISConnection.prepareStatement\u001b[0;34m()\u001b[0m\n", 607 | "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/_jpype.cpython-36m-x86_64-linux-gnu.so\u001b[0m in \u001b[0;36mcom.intersystems.jdbc.IRISConnection.getOrCreatePossiblyShardedIRISPreparedStatement\u001b[0;34m()\u001b[0m\n", 608 | "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/_jpype.cpython-36m-x86_64-linux-gnu.so\u001b[0m in \u001b[0;36mcom.intersystems.jdbc.IRISPreparedStatement.\u001b[0;34m()\u001b[0m\n", 609 | "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/_jpype.cpython-36m-x86_64-linux-gnu.so\u001b[0m in \u001b[0;36mcom.intersystems.jdbc.IRISPreparedStatement.prepare\u001b[0;34m()\u001b[0m\n", 610 | "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/_jpype.cpython-36m-x86_64-linux-gnu.so\u001b[0m in \u001b[0;36mcom.intersystems.jdbc.IRISPreparedStatement.prepareInternal\u001b[0;34m()\u001b[0m\n", 611 | "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/_jpype.cpython-36m-x86_64-linux-gnu.so\u001b[0m in \u001b[0;36mcom.intersystems.jdbc.InStream.readMessage\u001b[0;34m()\u001b[0m\n", 612 | "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/_jpype.cpython-36m-x86_64-linux-gnu.so\u001b[0m in \u001b[0;36mcom.intersystems.jdbc.InStream.readMessage\u001b[0;34m()\u001b[0m\n", 613 | "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/_jpype.cpython-36m-x86_64-linux-gnu.so\u001b[0m in \u001b[0;36mcom.intersystems.jdbc.InStream.readMessage\u001b[0;34m()\u001b[0m\n", 614 | "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/_jpype.cpython-36m-x86_64-linux-gnu.so\u001b[0m in \u001b[0;36mcom.intersystems.jdbc.IRISConnection.processError\u001b[0;34m()\u001b[0m\n", 615 | "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/_jpype.cpython-36m-x86_64-linux-gnu.so\u001b[0m in \u001b[0;36mcom.intersystems.jdbc.IRISConnection.getServerError\u001b[0;34m()\u001b[0m\n", 616 | "\u001b[0;31mException\u001b[0m: Java Exception", 617 | "\nThe above exception was the direct cause of the following exception:\n", 618 | "\u001b[0;31mjava.sql.SQLException\u001b[0m Traceback (most recent call last)", 619 | "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/pandas/io/sql.py\u001b[0m in \u001b[0;36mexecute\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1680\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1681\u001b[0;31m \u001b[0mcur\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1682\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mcur\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 620 | "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/jaydebeapi/__init__.py\u001b[0m in \u001b[0;36mexecute\u001b[0;34m(self, operation, parameters)\u001b[0m\n\u001b[1;32m 530\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_close_last\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 531\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_prep\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_connection\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjconn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mprepareStatement\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moperation\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 532\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_set_stmt_parms\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_prep\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mparameters\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 621 | "\u001b[0;31mjava.sql.SQLException\u001b[0m: java.sql.SQLException: [SQLCODE: <-181>:]\r\n[Location: ]\r\n[%msg: < Model 'NewEncModel_Auto' not found>]", 622 | "\nThe above exception was the direct cause of the following exception:\n", 623 | "\u001b[0;31mDatabaseError\u001b[0m Traceback (most recent call last)", 624 | "\u001b[0;32m~/Initializations/Conns.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mcase\u001b[0m \u001b[0mwhen\u001b[0m \u001b[0mB90\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m1\u001b[0m \u001b[0mthen\u001b[0m \u001b[0;36m1\u001b[0m \u001b[0mend\u001b[0m \u001b[0mAS\u001b[0m \u001b[0mActualPos\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m\\\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mcase\u001b[0m \u001b[0mwhen\u001b[0m \u001b[0mB90\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0;36m1\u001b[0m \u001b[0mthen\u001b[0m \u001b[0;36m0\u001b[0m \u001b[0mend\u001b[0m \u001b[0mAS\u001b[0m \u001b[0mActualNeg\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m\\\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m FROM %s\" % (TestTable), iconn)\n\u001b[0m", 625 | "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/pandas/io/sql.py\u001b[0m in \u001b[0;36mread_sql\u001b[0;34m(sql, con, index_col, coerce_float, params, parse_dates, columns, chunksize)\u001b[0m\n\u001b[1;32m 487\u001b[0m \u001b[0mcoerce_float\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcoerce_float\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 488\u001b[0m \u001b[0mparse_dates\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mparse_dates\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 489\u001b[0;31m \u001b[0mchunksize\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mchunksize\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 490\u001b[0m )\n\u001b[1;32m 491\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 626 | "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/pandas/io/sql.py\u001b[0m in \u001b[0;36mread_query\u001b[0;34m(self, sql, index_col, coerce_float, params, parse_dates, chunksize)\u001b[0m\n\u001b[1;32m 1725\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1726\u001b[0m \u001b[0margs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_convert_params\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msql\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mparams\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1727\u001b[0;31m \u001b[0mcursor\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1728\u001b[0m \u001b[0mcolumns\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mcol_desc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mcol_desc\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mcursor\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdescription\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1729\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 627 | "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/pandas/io/sql.py\u001b[0m in \u001b[0;36mexecute\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1691\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1692\u001b[0m \u001b[0mex\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mDatabaseError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"Execution failed on sql '{args[0]}': {exc}\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1693\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mex\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mexc\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1694\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1695\u001b[0m \u001b[0;34m@\u001b[0m\u001b[0mstaticmethod\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 628 | "\u001b[0;31mDatabaseError\u001b[0m: Execution failed on sql 'SELECT PREDICT(NewEncModel use NewEncModel_Auto) AS PredictedEncs, case when B90 = 1 then 1 end AS ActualPos, case when B90 != 1 then 0 end AS ActualNeg FROM Data.EDEncsTesting': java.sql.SQLException: [SQLCODE: <-181>:]\r\n[Location: ]\r\n[%msg: < Model 'NewEncModel_Auto' not found>]" 629 | ] 630 | } 631 | ], 632 | "source": [ 633 | "TestSet = pd.read_sql(\"SELECT PREDICT(NewEncModel use NewEncModel_Auto) AS PredictedEncs, \\\n", 634 | " case when B90 = 1 then 1 end AS ActualPos, \\\n", 635 | " case when B90 != 1 then 0 end AS ActualNeg \\\n", 636 | " FROM %s\" % (TestTable), iconn)" 637 | >>>>>>> 730f3d3b116a644bb0466c140bb03f80ba73e6c9 638 | ] 639 | }, 640 | { 641 | "cell_type": "code", 642 | "execution_count": null, 643 | "metadata": {}, 644 | "outputs": [], 645 | "source": [ 646 | "print(pd.pivot_table(TestSet, index = 'PredictedEncs', values = ['ActualPos', 'ActualNeg'], aggfunc = 'count'))\n", 647 | "print('Accuracy: '+str(round((len(TestSet[(TestSet['PredictedEncs'] == TestSet['ActualPos']) \\\n", 648 | " | (TestSet['PredictedEncs'] == TestSet['ActualNeg'])])/len(TestSet))*100))+'%')\n", 649 | "print('Misclassification Rate: '+str(round((len(TestSet[(TestSet['PredictedEncs'] != TestSet['ActualPos']) \\\n", 650 | " & (TestSet['PredictedEncs'] != TestSet['ActualNeg'])])/len(TestSet))*100))+'%')\n", 651 | "print('%FP: '+str(round((len(TestSet[(TestSet['PredictedEncs'] == 1) & (TestSet['ActualNeg'] == 0)])/ \\\n", 652 | " len(TestSet[TestSet['ActualNeg'] == 0]))*100))+'%')\n", 653 | "print('%FN: '+str(round((len(TestSet[(TestSet['PredictedEncs'] == 0) & (TestSet['ActualPos'] == 1)])/ \\\n", 654 | " len(TestSet[TestSet['ActualPos'] == 1]))*100))+'%')" 655 | ] 656 | }, 657 | { 658 | "cell_type": "markdown", 659 | "metadata": {}, 660 | "source": [ 661 | "### 8. VALIDATE MODEL command calculates accuracy metrics\n", 662 | "You can certainly take that output above and calculate the accuracy using a standard formula, but IntegratedML has a built-in function to do that!\n", 663 | "\n", 664 | "Each time you run the command \"VALIDATE MODEL...\" it generates a set of metrics calculated on the data passed into the query. Since this table can be a bit difficult to read in its raw form we use a simple \"pivot\" call to arrange the data." 665 | ] 666 | }, 667 | { 668 | "cell_type": "code", 669 | "execution_count": null, 670 | "metadata": {}, 671 | "outputs": [], 672 | "source": [ 673 | <<<<<<< HEAD 674 | "icurs.execute(\"VALIDATE model NewEncModel use NewEncModel_Auto FROM Data.EDEncsTesting\")\n", 675 | ======= 676 | "#icurs.execute(\"VALIDATE model NewEncModel FROM Data.EDEncsTesting\")\n", 677 | >>>>>>> 730f3d3b116a644bb0466c140bb03f80ba73e6c9 678 | "#df5 = pd.read_sql(\"SELECT * FROM INFORMATION_SCHEMA.ML_VALIDATION_METRICS\", iconn)\n", 679 | "#df5\n", 680 | "#df6 = df5.pivot(index='VALIDATION_RUN_NAME', columns='METRIC_NAME', values='METRIC_VALUE')\n", 681 | "#display(df6)" 682 | ] 683 | } 684 | ], 685 | "metadata": { 686 | "kernelspec": { 687 | "display_name": "Python 3", 688 | "language": "python", 689 | "name": "python3" 690 | }, 691 | "language_info": { 692 | "codemirror_mode": { 693 | "name": "ipython", 694 | "version": 3 695 | }, 696 | "file_extension": ".py", 697 | "mimetype": "text/x-python", 698 | "name": "python", 699 | "nbconvert_exporter": "python", 700 | "pygments_lexer": "ipython3", 701 | "version": "3.6.8" 702 | } 703 | }, 704 | "nbformat": 4, 705 | "nbformat_minor": 4 706 | } 707 | -------------------------------------------------------------------------------- /jupyter-samples/biomedical-integratedml-PyODBC.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# IntegratedML applied to biomedical data, using PyODBC\n", 8 | "This notebook demonstrates the following:\n", 9 | "- Connecting to InterSystems IRIS via PyODBC connection\n", 10 | "- Creating, Training and Executing (PREDICT() function) an IntegratedML machine learning model, applied to breast cancer tumor diagnoses\n", 11 | "- INSERTING machine learning predictions into a new SQL table\n", 12 | "- Executing a relatively complex SQL query containing IntegratedML PREDICT() and PROBABILITY() functions, and flexibly using the results to filter and sort the output" 13 | ] 14 | }, 15 | { 16 | "cell_type": "markdown", 17 | "metadata": {}, 18 | "source": [ 19 | "### ODBC and pyODBC Resources\n", 20 | "Often, connecting to a database is more than half the battle when developing SQL-heavy applications, especially if you are not familiar with the tools, or more importantly the particular database system. If this is the case, and you are just getting started using PyODBC and InterSystems IRIS, this notebook and these resources below may help you get up to speed!\n", 21 | "\n", 22 | "https://gettingstarted.intersystems.com/development-setup/odbc-connections/\n", 23 | "\n", 24 | "https://irisdocs.intersystems.com/irislatest/csp/docbook/DocBook.UI.Page.cls?KEY=BNETODBC_support#BNETODBC_support_pyodbc\n", 25 | "\n", 26 | "https://stackoverflow.com/questions/46405777/connect-docker-python-to-sql-server-with-pyodbc\n", 27 | "\n", 28 | "https://stackoverflow.com/questions/44527452/cant-open-lib-odbc-driver-13-for-sql-server-sym-linking-issue" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 1, 34 | "metadata": {}, 35 | "outputs": [ 36 | { 37 | "data": { 38 | "text/html": [ 39 | "" 40 | ], 41 | "text/plain": [ 42 | "" 43 | ] 44 | }, 45 | "metadata": {}, 46 | "output_type": "display_data" 47 | } 48 | ], 49 | "source": [ 50 | "# make the notebook full screen\n", 51 | "from IPython.core.display import display, HTML\n", 52 | "display(HTML(\"\"))" 53 | ] 54 | }, 55 | { 56 | "cell_type": "markdown", 57 | "metadata": {}, 58 | "source": [ 59 | "### 1. Install system packages for ODBC" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": 2, 65 | "metadata": {}, 66 | "outputs": [ 67 | { 68 | "name": "stdout", 69 | "output_type": "stream", 70 | "text": [ 71 | "Hit:1 http://archive.ubuntu.com/ubuntu bionic InRelease\n", 72 | "Get:2 http://archive.ubuntu.com/ubuntu bionic-updates InRelease [88.7 kB]\n", 73 | "Get:3 http://security.ubuntu.com/ubuntu bionic-security InRelease [88.7 kB] \n", 74 | "Get:4 http://archive.ubuntu.com/ubuntu bionic-backports InRelease [74.6 kB] \n", 75 | "Fetched 252 kB in 1s (322 kB/s) \n", 76 | "Reading package lists... Done\n", 77 | "Reading package lists... Done\n", 78 | "Building dependency tree \n", 79 | "Reading state information... Done\n", 80 | "gcc is already the newest version (4:7.4.0-1ubuntu2.3).\n", 81 | "0 upgraded, 0 newly installed, 0 to remove and 103 not upgraded.\n", 82 | "Reading package lists... Done\n", 83 | "Building dependency tree \n", 84 | "Reading state information... Done\n", 85 | "unixodbc-dev is already the newest version (2.3.4-1.1ubuntu3).\n", 86 | "tdsodbc is already the newest version (1.00.82-2ubuntu0.1).\n", 87 | "0 upgraded, 0 newly installed, 0 to remove and 103 not upgraded.\n", 88 | "Reading package lists... Done\n", 89 | "Building dependency tree \n", 90 | "Reading state information... Done\n", 91 | "unixodbc-bin is already the newest version (2.3.0-4build1).\n", 92 | "0 upgraded, 0 newly installed, 0 to remove and 103 not upgraded.\n" 93 | ] 94 | } 95 | ], 96 | "source": [ 97 | "!apt-get update\n", 98 | "!apt-get install gcc\n", 99 | "!apt-get install -y tdsodbc unixodbc-dev\n", 100 | "!apt install unixodbc-bin -y\n", 101 | "!apt-get clean " 102 | ] 103 | }, 104 | { 105 | "cell_type": "markdown", 106 | "metadata": {}, 107 | "source": [ 108 | "#### Use this command to troubleshoot a failed pyodbc installation:\n", 109 | "!pip install --upgrade --global-option=build_ext --global-option=\"-I/usr/local/include\" --global-option=\"-L/usr/local/lib\" pyodbc" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": 3, 115 | "metadata": {}, 116 | "outputs": [ 117 | { 118 | "name": "stdout", 119 | "output_type": "stream", 120 | "text": [ 121 | "Requirement already satisfied: pyodbc in /usr/local/lib/python3.6/dist-packages (4.0.32)\n", 122 | "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\n" 123 | ] 124 | } 125 | ], 126 | "source": [ 127 | "!pip install pyodbc" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": 4, 133 | "metadata": {}, 134 | "outputs": [], 135 | "source": [ 136 | "!rm /etc/odbcinst.ini\n", 137 | "!rm /etc/odbc.ini" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": 5, 143 | "metadata": {}, 144 | "outputs": [], 145 | "source": [ 146 | "!ln -s /tf/odbcinst.ini /etc/odbcinst.ini\n", 147 | "!ln -s /tf/odbc.ini /etc/odbc.ini" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": 6, 153 | "metadata": {}, 154 | "outputs": [ 155 | { 156 | "name": "stdout", 157 | "output_type": "stream", 158 | "text": [ 159 | "[InterSystems ODBC35]\r\n", 160 | "UsageCount=1\r\n", 161 | "Driver=/tf/libirisodbcu35.so\r\n", 162 | "Setup=/tf/libirisodbcu35.so\r\n", 163 | "SQLLevel=1\r\n", 164 | "FileUsage=0\r\n", 165 | "DriverODBCVer=02.10\r\n", 166 | "ConnectFunctions=YYN\r\n", 167 | "APILevel=1\r\n", 168 | "DEBUG=1\r\n", 169 | "CPTimeout=\r\n", 170 | "\r\n" 171 | ] 172 | } 173 | ], 174 | "source": [ 175 | "!cat /tf/odbcinst.ini" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": 7, 181 | "metadata": {}, 182 | "outputs": [ 183 | { 184 | "name": "stdout", 185 | "output_type": "stream", 186 | "text": [ 187 | "[user]\r\n", 188 | "Driver=InterSystems ODBC35\r\n", 189 | "Protocol=TCP\r\n", 190 | "Host=irisimlsvr\r\n", 191 | "Port=51773\r\n", 192 | "Namespace=USER\r\n", 193 | "UID=SUPERUSER\r\n", 194 | "Password=SYS\r\n", 195 | "Description=Sample namespace\r\n", 196 | "Query Timeout=0\r\n", 197 | "Static Cursors=0\r\n", 198 | "\r\n" 199 | ] 200 | } 201 | ], 202 | "source": [ 203 | "!cat /tf/odbc.ini" 204 | ] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "execution_count": 8, 209 | "metadata": {}, 210 | "outputs": [ 211 | { 212 | "name": "stdout", 213 | "output_type": "stream", 214 | "text": [ 215 | "unixODBC 2.3.4\r\n", 216 | "DRIVERS............: /etc/odbcinst.ini\r\n", 217 | "SYSTEM DATA SOURCES: /etc/odbc.ini\r\n", 218 | "FILE DATA SOURCES..: /etc/ODBCDataSources\r\n", 219 | "USER DATA SOURCES..: /root/.odbc.ini\r\n", 220 | "SQLULEN Size.......: 8\r\n", 221 | "SQLLEN Size........: 8\r\n", 222 | "SQLSETPOSIROW Size.: 8\r\n" 223 | ] 224 | } 225 | ], 226 | "source": [ 227 | "!odbcinst -j" 228 | ] 229 | }, 230 | { 231 | "cell_type": "markdown", 232 | "metadata": {}, 233 | "source": [ 234 | "### 2. Verify you see \"InterSystems ODBC35\" in the drivers list" 235 | ] 236 | }, 237 | { 238 | "cell_type": "code", 239 | "execution_count": 9, 240 | "metadata": {}, 241 | "outputs": [ 242 | { 243 | "name": "stdout", 244 | "output_type": "stream", 245 | "text": [ 246 | "['InterSystems ODBC35']\n" 247 | ] 248 | } 249 | ], 250 | "source": [ 251 | "import pyodbc\n", 252 | "print(pyodbc.drivers())" 253 | ] 254 | }, 255 | { 256 | "cell_type": "markdown", 257 | "metadata": {}, 258 | "source": [ 259 | "### 3. Get an ODBC connection " 260 | ] 261 | }, 262 | { 263 | "cell_type": "code", 264 | "execution_count": 10, 265 | "metadata": {}, 266 | "outputs": [], 267 | "source": [ 268 | "import pyodbc \n", 269 | "import time\n", 270 | "\n", 271 | "\n", 272 | "#input(\"Hit any key to start\")\n", 273 | "dsn = 'IRIS IntegratedML demo via PyODBC'\n", 274 | "server = 'irisimlsvr' #'192.168.99.101' \n", 275 | "port = '1972' #'9091'\n", 276 | "database = 'USER' \n", 277 | "username = 'SUPERUSER' \n", 278 | "password = 'SYS' \n", 279 | "cnxn = pyodbc.connect('DRIVER={InterSystems ODBC35};SERVER='+server+';PORT='+port+';DATABASE='+database+';UID='+username+';PWD='+ password)\n", 280 | "\n", 281 | "### Ensure it read strings correctly.\n", 282 | "cnxn.setdecoding(pyodbc.SQL_CHAR, encoding='utf8')\n", 283 | "cnxn.setdecoding(pyodbc.SQL_WCHAR, encoding='utf8')\n", 284 | "cnxn.setencoding(encoding='utf8')" 285 | ] 286 | }, 287 | { 288 | "cell_type": "markdown", 289 | "metadata": {}, 290 | "source": [ 291 | "### 4. Get a cursor; start the timer" 292 | ] 293 | }, 294 | { 295 | "cell_type": "code", 296 | "execution_count": 11, 297 | "metadata": {}, 298 | "outputs": [], 299 | "source": [ 300 | "cursor = cnxn.cursor()\n", 301 | "start= time.clock()" 302 | ] 303 | }, 304 | { 305 | "cell_type": "markdown", 306 | "metadata": {}, 307 | "source": [ 308 | "### 5. Specify the training data, and give a model name" 309 | ] 310 | }, 311 | { 312 | "cell_type": "code", 313 | "execution_count": 12, 314 | "metadata": {}, 315 | "outputs": [], 316 | "source": [ 317 | "dataTable = 'Biomedical.BreastCancer'\n", 318 | "dataTablePredict = 'Result02'\n", 319 | "dataColumn = 'Diagnosis'\n", 320 | "dataColumnPredict = \"PredictedDiagnosis\"\n", 321 | "modelName = \"bc\" #chose a name - must be unique in server end" 322 | ] 323 | }, 324 | { 325 | "cell_type": "markdown", 326 | "metadata": {}, 327 | "source": [ 328 | " ### Cleaning before retrying" 329 | ] 330 | }, 331 | { 332 | "cell_type": "code", 333 | "execution_count": 13, 334 | "metadata": {}, 335 | "outputs": [], 336 | "source": [ 337 | "#If we re-run the notebook just drop model and table\n", 338 | "#cursor.execute(\"DROP MODEL %s\" % modelName)\n", 339 | "#cursor.execute(\"DROP TABLE %s\" % dataTablePredict)" 340 | ] 341 | }, 342 | { 343 | "cell_type": "markdown", 344 | "metadata": {}, 345 | "source": [ 346 | "### 6. Train and predict" 347 | ] 348 | }, 349 | { 350 | "cell_type": "code", 351 | "execution_count": 14, 352 | "metadata": {}, 353 | "outputs": [], 354 | "source": [ 355 | "cursor.execute(\"CREATE MODEL %s PREDICTING (%s) FROM %s\" % (modelName, dataColumn, dataTable))\n", 356 | "cursor.execute(\"TRAIN MODEL %s FROM %s\" % (modelName, dataTable))\n", 357 | "cursor.execute(\"Create Table %s (%s VARCHAR(100), %s VARCHAR(100))\" % (dataTablePredict, dataColumnPredict, dataColumn))\n", 358 | "cursor.execute(\"INSERT INTO %s SELECT TOP 20 PREDICT(%s) AS %s, %s FROM %s\" % (dataTablePredict, modelName, dataColumnPredict, dataColumn, dataTable)) \n", 359 | "cnxn.commit()" 360 | ] 361 | }, 362 | { 363 | "cell_type": "markdown", 364 | "metadata": {}, 365 | "source": [ 366 | "### 7. Show the predict result" 367 | ] 368 | }, 369 | { 370 | "cell_type": "code", 371 | "execution_count": 15, 372 | "metadata": {}, 373 | "outputs": [ 374 | { 375 | "data": { 376 | "text/html": [ 377 | "
\n", 378 | "\n", 391 | "\n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | "
PredictedDiagnosisDiagnosis
0MM
1MM
2MM
3MM
4MM
5MM
6MM
7MM
8MM
9MM
10MM
11MM
12MM
13MM
14MM
15MM
16MM
17MM
18MM
19BB
\n", 502 | "
" 503 | ], 504 | "text/plain": [ 505 | " PredictedDiagnosis Diagnosis\n", 506 | "0 M M\n", 507 | "1 M M\n", 508 | "2 M M\n", 509 | "3 M M\n", 510 | "4 M M\n", 511 | "5 M M\n", 512 | "6 M M\n", 513 | "7 M M\n", 514 | "8 M M\n", 515 | "9 M M\n", 516 | "10 M M\n", 517 | "11 M M\n", 518 | "12 M M\n", 519 | "13 M M\n", 520 | "14 M M\n", 521 | "15 M M\n", 522 | "16 M M\n", 523 | "17 M M\n", 524 | "18 M M\n", 525 | "19 B B" 526 | ] 527 | }, 528 | "metadata": {}, 529 | "output_type": "display_data" 530 | } 531 | ], 532 | "source": [ 533 | "import pandas as pd\n", 534 | "from IPython.display import display\n", 535 | "\n", 536 | "df1 = pd.read_sql(\"SELECT * from %s ORDER BY ID\" % dataTablePredict, cnxn)\n", 537 | "display(df1)" 538 | ] 539 | }, 540 | { 541 | "cell_type": "markdown", 542 | "metadata": {}, 543 | "source": [ 544 | "### 8. Show a complicated query\n", 545 | "IntegratedML function PREDICT() and PROBABILITY() can appear virtually anywhere in a SQL query, for maximal flexibility!\n", 546 | "Below we are SELECTing columns as well as the result of the PROBABILITY function, and then filtering on the result of the PREDICT function. To top it off, ORDER BY is using the output of PROBSBILITY for sorting." 547 | ] 548 | }, 549 | { 550 | "cell_type": "code", 551 | "execution_count": 16, 552 | "metadata": {}, 553 | "outputs": [ 554 | { 555 | "data": { 556 | "text/html": [ 557 | "
\n", 558 | "\n", 571 | "\n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | " \n", 650 | " \n", 651 | " \n", 652 | " \n", 653 | " \n", 654 | " \n", 655 | " \n", 656 | " \n", 657 | " \n", 658 | " \n", 659 | " \n", 660 | " \n", 661 | " \n", 662 | " \n", 663 | " \n", 664 | " \n", 665 | " \n", 666 | " \n", 667 | " \n", 668 | " \n", 669 | " \n", 670 | " \n", 671 | " \n", 672 | " \n", 673 | " \n", 674 | " \n", 675 | " \n", 676 | " \n", 677 | " \n", 678 | " \n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | " \n", 683 | " \n", 684 | " \n", 685 | " \n", 686 | " \n", 687 | " \n", 688 | " \n", 689 | " \n", 690 | " \n", 691 | " \n", 692 | " \n", 693 | " \n", 694 | " \n", 695 | " \n", 696 | " \n", 697 | " \n", 698 | " \n", 699 | " \n", 700 | " \n", 701 | " \n", 702 | " \n", 703 | " \n", 704 | " \n", 705 | " \n", 706 | " \n", 707 | " \n", 708 | " \n", 709 | " \n", 710 | " \n", 711 | " \n", 712 | " \n", 713 | " \n", 714 | " \n", 715 | " \n", 716 | " \n", 717 | " \n", 718 | " \n", 719 | " \n", 720 | " \n", 721 | " \n", 722 | " \n", 723 | " \n", 724 | " \n", 725 | " \n", 726 | " \n", 727 | " \n", 728 | " \n", 729 | " \n", 730 | " \n", 731 | " \n", 732 | " \n", 733 | " \n", 734 | " \n", 735 | " \n", 736 | " \n", 737 | " \n", 738 | " \n", 739 | " \n", 740 | " \n", 741 | " \n", 742 | " \n", 743 | " \n", 744 | " \n", 745 | " \n", 746 | " \n", 747 | " \n", 748 | " \n", 749 | " \n", 750 | " \n", 751 | " \n", 752 | " \n", 753 | " \n", 754 | " \n", 755 | " \n", 756 | "
IDProbabilitydiagnosis
0740.508227M
12980.675269M
22160.863261M
3420.955022M
41470.961170M
51010.994392M
6450.995220M
760.995779M
8400.996360M
91940.998938M
1080.999320M
111270.999456M
125020.999470M
131720.999546M
14150.999659M
15440.999668M
164360.999765M
171060.999839M
18480.999894M
192300.999894M
2090.999991M
215130.999994M
221970.999995M
23650.999995M
24320.999999M
2541.000000M
263801.000000M
27101.000000M
282041.000000M
\n", 757 | "
" 758 | ], 759 | "text/plain": [ 760 | " ID Probability diagnosis\n", 761 | "0 74 0.508227 M\n", 762 | "1 298 0.675269 M\n", 763 | "2 216 0.863261 M\n", 764 | "3 42 0.955022 M\n", 765 | "4 147 0.961170 M\n", 766 | "5 101 0.994392 M\n", 767 | "6 45 0.995220 M\n", 768 | "7 6 0.995779 M\n", 769 | "8 40 0.996360 M\n", 770 | "9 194 0.998938 M\n", 771 | "10 8 0.999320 M\n", 772 | "11 127 0.999456 M\n", 773 | "12 502 0.999470 M\n", 774 | "13 172 0.999546 M\n", 775 | "14 15 0.999659 M\n", 776 | "15 44 0.999668 M\n", 777 | "16 436 0.999765 M\n", 778 | "17 106 0.999839 M\n", 779 | "18 48 0.999894 M\n", 780 | "19 230 0.999894 M\n", 781 | "20 9 0.999991 M\n", 782 | "21 513 0.999994 M\n", 783 | "22 197 0.999995 M\n", 784 | "23 65 0.999995 M\n", 785 | "24 32 0.999999 M\n", 786 | "25 4 1.000000 M\n", 787 | "26 380 1.000000 M\n", 788 | "27 10 1.000000 M\n", 789 | "28 204 1.000000 M" 790 | ] 791 | }, 792 | "metadata": {}, 793 | "output_type": "display_data" 794 | } 795 | ], 796 | "source": [ 797 | "df2 = pd.read_sql(\"SELECT ID, PROBABILITY(bc FOR 'M') AS Probability, Diagnosis FROM %s \\\n", 798 | " WHERE Mean_Area BETWEEN 300 AND 600 AND Mean_Radius > 5 AND PREDICT(%s) = 'M' \\\n", 799 | " ORDER BY Probability\" % (dataTable, modelName),cnxn) \n", 800 | "display(df2)" 801 | ] 802 | }, 803 | { 804 | "cell_type": "markdown", 805 | "metadata": {}, 806 | "source": [ 807 | "### 9. Close and clean " 808 | ] 809 | }, 810 | { 811 | "cell_type": "code", 812 | "execution_count": 17, 813 | "metadata": {}, 814 | "outputs": [ 815 | { 816 | "name": "stdout", 817 | "output_type": "stream", 818 | "text": [ 819 | "Total elapsed time: \n", 820 | "0.731681\n" 821 | ] 822 | } 823 | ], 824 | "source": [ 825 | "cnxn.close()\n", 826 | "end= time.clock()\n", 827 | "print (\"Total elapsed time: \")\n", 828 | "print (end-start)\n", 829 | "#input(\"Hit any key to end\")" 830 | ] 831 | } 832 | ], 833 | "metadata": { 834 | "kernelspec": { 835 | "display_name": "Python 3", 836 | "language": "python", 837 | "name": "python3" 838 | }, 839 | "language_info": { 840 | "codemirror_mode": { 841 | "name": "ipython", 842 | "version": 3 843 | }, 844 | "file_extension": ".py", 845 | "mimetype": "text/x-python", 846 | "name": "python", 847 | "nbconvert_exporter": "python", 848 | "pygments_lexer": "ipython3", 849 | "version": "3.6.9" 850 | } 851 | }, 852 | "nbformat": 4, 853 | "nbformat_minor": 2 854 | } 855 | -------------------------------------------------------------------------------- /jupyter-samples/campaign-integratedml-jdbc.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Train an IntegratedML model on Marketing Campaign Dataset\n", 8 | "## Use JDBC to connect to InterSystems IRIS database\n", 9 | "This Notebook demonstrates:\n", 10 | "- Using the JayDeBeApi Python library to connect to InterSystems IRIS\n", 11 | "- Creating views to segment data into training and test sets\n", 12 | "- Defining and training an IntegratedML model to predict marketing campaign responses\n", 13 | "- Comparing the resulting model's predictions to data in the test set (that the model was not trained on)\n", 14 | "- Using the IntegratedML \"VALIDATE MODEL\" command to calculate accuracy metrics on the test set data" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 1, 20 | "metadata": {}, 21 | "outputs": [ 22 | { 23 | "data": { 24 | "text/html": [ 25 | "" 26 | ], 27 | "text/plain": [ 28 | "" 29 | ] 30 | }, 31 | "metadata": {}, 32 | "output_type": "display_data" 33 | } 34 | ], 35 | "source": [ 36 | "from IPython.core.display import display, HTML\n", 37 | "display(HTML(\"\"))" 38 | ] 39 | }, 40 | { 41 | "cell_type": "markdown", 42 | "metadata": {}, 43 | "source": [ 44 | "### 1. Set environment variables, if necessary" 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "metadata": {}, 50 | "source": [ 51 | "### 2. Get jdbc connection and cursor" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": 2, 57 | "metadata": {}, 58 | "outputs": [], 59 | "source": [ 60 | "\n", 61 | "import jaydebeapi\n", 62 | "url = \"jdbc:IRIS://irisimlsvr:1972/USER\"\n", 63 | "driver = \"com.intersystems.jdbc.IRISDriver\"\n", 64 | "user = \"SUPERUSER\"\n", 65 | "password = \"SYS\"\n", 66 | "#libx = \"C:/InterSystems/IRIS20194/dev/java/lib/JDK18\"\n", 67 | "#jarfile = \"C:/InterSystems/IRIS20194/dev/java/lib/JDK18/intersystems-jdbc-3.0.0.jar\"\n", 68 | "jarfile = \"./intersystems-jdbc-3.1.0.jar\"" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": 3, 74 | "metadata": {}, 75 | "outputs": [], 76 | "source": [ 77 | "conn = jaydebeapi.connect(driver, url, [user, password], jarfile)\n", 78 | "curs = conn.cursor()" 79 | ] 80 | }, 81 | { 82 | "cell_type": "markdown", 83 | "metadata": {}, 84 | "source": [ 85 | "### 3. specify the source data table" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": 4, 91 | "metadata": {}, 92 | "outputs": [], 93 | "source": [ 94 | "dataTable = 'Marketing.Campaign'\n", 95 | "trainingTable = \"Marketing.CampaignTrainingSmall\"\n", 96 | "predictTable = \"Marketing.CampaignPredictSmall\"" 97 | ] 98 | }, 99 | { 100 | "cell_type": "markdown", 101 | "metadata": {}, 102 | "source": [ 103 | "### 4. Execute a query and display results in Pandas DataFrame" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": 5, 109 | "metadata": {}, 110 | "outputs": [ 111 | { 112 | "data": { 113 | "text/html": [ 114 | "
\n", 115 | "\n", 128 | "\n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | "
AGEEDUCATIONMARITAL_STATUSOCCUPATIONRELATIONSHIPRACEGENDERPROSPECT_IDEMPLOYMENTYEARS_EDUCCOUNTRY_OF_BIRTHINV_EARNINGSINV_LOSSESWORKWEEK_LENGTHRESPONSEAMOUNT
050HIGHSCHOOLMARRIEDCIVSPOREPAIRCRAFTHUSBANDWHITEMALE77001107BUSINESS9USA00400123.90
149HIGHSCHOOLMARRIEDCIVSPOLOGISTICSTRANSPHUSBANDWHITEMALE77001140BUSINESS9USA0040097.91
237HIGHSCHOOLMARRIEDCIVSPOREPAIRCRAFTHUSBANDWHITEMALE77001143BUSINESS9USA00400135.65
343HIGHSCHOOLMARRIEDCIVSPOCLEANINGHANDLINGHUSBANDWHITEMALE77001187BUSINESS9USA0040093.23
454HIGHSCHOOLMARRIEDCIVSPOLOGISTICSTRANSPHUSBANDWHITEMALE77001412BUSINESS9USA00400143.72
527HIGHSCHOOLMARRIEDCIVSPOSERVICEVARIOUSHUSBANDWHITEMALE77001429BUSINESS9USA00400232.20
629HIGHSCHOOLMARRIEDCIVSPOREPAIRCRAFTHUSBANDWHITEMALE77001443BUSINESS9USA00400103.46
729HIGHSCHOOLMARRIEDCIVSPOADMINOFFICEHUSBANDWHITEMALE77001484BUSINESS9USA00400123.53
821HIGHSCHOOLMARRIEDCIVSPOMACHINEOPERINSPHUSBANDWHITEMALE77001526BUSINESS9USA00400230.07
936HIGHSCHOOLMARRIEDCIVSPOMACHINEOPERINSPHUSBANDWHITEMALE77001634BUSINESS9USA0040048.95
1052HIGHSCHOOLMARRIEDCIVSPOSPECIALISTPROFFHUSBANDWHITEMALE77001649BUSINESS9USA0040058.78
1160HIGHSCHOOLMARRIEDCIVSPOREPAIRCRAFTHUSBANDWHITEMALE77001660BUSINESS9USA00400106.29
1228HIGHSCHOOLMARRIEDCIVSPOMACHINEOPERINSPHUSBANDWHITEMALE77001661BUSINESS9USA00400211.26
1361HIGHSCHOOLMARRIEDCIVSPOREPAIRCRAFTHUSBANDWHITEMALE77001719BUSINESS9USA00400104.34
1434HIGHSCHOOLMARRIEDCIVSPOMACHINEOPERINSPHUSBANDWHITEMALE77001794BUSINESS9USA00400184.73
1562HIGHSCHOOLMARRIEDCIVSPOMACHINEOPERINSPHUSBANDWHITEMALE77001874BUSINESS9USA0040066.32
1656HIGHSCHOOLMARRIEDCIVSPOMACHINEOPERINSPHUSBANDWHITEMALE77001913BUSINESS9USA0040060.13
1734HIGHSCHOOLMARRIEDCIVSPOMACHINEOPERINSPHUSBANDWHITEMALE77001942BUSINESS9USA0040077.02
1829HIGHSCHOOLMARRIEDCIVSPOSUPPORTTECHNICHUSBANDWHITEMALE77001977BUSINESS9USA00400122.43
1933HIGHSCHOOLMARRIEDCIVSPOMACHINEOPERINSPHUSBANDWHITEMALE77002003BUSINESS9USA00400257.23
\n", 533 | "
" 534 | ], 535 | "text/plain": [ 536 | " AGE EDUCATION MARITAL_STATUS OCCUPATION RELATIONSHIP RACE \\\n", 537 | "0 50 HIGHSCHOOL MARRIEDCIVSPO REPAIRCRAFT HUSBAND WHITE \n", 538 | "1 49 HIGHSCHOOL MARRIEDCIVSPO LOGISTICSTRANSP HUSBAND WHITE \n", 539 | "2 37 HIGHSCHOOL MARRIEDCIVSPO REPAIRCRAFT HUSBAND WHITE \n", 540 | "3 43 HIGHSCHOOL MARRIEDCIVSPO CLEANINGHANDLING HUSBAND WHITE \n", 541 | "4 54 HIGHSCHOOL MARRIEDCIVSPO LOGISTICSTRANSP HUSBAND WHITE \n", 542 | "5 27 HIGHSCHOOL MARRIEDCIVSPO SERVICEVARIOUS HUSBAND WHITE \n", 543 | "6 29 HIGHSCHOOL MARRIEDCIVSPO REPAIRCRAFT HUSBAND WHITE \n", 544 | "7 29 HIGHSCHOOL MARRIEDCIVSPO ADMINOFFICE HUSBAND WHITE \n", 545 | "8 21 HIGHSCHOOL MARRIEDCIVSPO MACHINEOPERINSP HUSBAND WHITE \n", 546 | "9 36 HIGHSCHOOL MARRIEDCIVSPO MACHINEOPERINSP HUSBAND WHITE \n", 547 | "10 52 HIGHSCHOOL MARRIEDCIVSPO SPECIALISTPROFF HUSBAND WHITE \n", 548 | "11 60 HIGHSCHOOL MARRIEDCIVSPO REPAIRCRAFT HUSBAND WHITE \n", 549 | "12 28 HIGHSCHOOL MARRIEDCIVSPO MACHINEOPERINSP HUSBAND WHITE \n", 550 | "13 61 HIGHSCHOOL MARRIEDCIVSPO REPAIRCRAFT HUSBAND WHITE \n", 551 | "14 34 HIGHSCHOOL MARRIEDCIVSPO MACHINEOPERINSP HUSBAND WHITE \n", 552 | "15 62 HIGHSCHOOL MARRIEDCIVSPO MACHINEOPERINSP HUSBAND WHITE \n", 553 | "16 56 HIGHSCHOOL MARRIEDCIVSPO MACHINEOPERINSP HUSBAND WHITE \n", 554 | "17 34 HIGHSCHOOL MARRIEDCIVSPO MACHINEOPERINSP HUSBAND WHITE \n", 555 | "18 29 HIGHSCHOOL MARRIEDCIVSPO SUPPORTTECHNIC HUSBAND WHITE \n", 556 | "19 33 HIGHSCHOOL MARRIEDCIVSPO MACHINEOPERINSP HUSBAND WHITE \n", 557 | "\n", 558 | " GENDER PROSPECT_ID EMPLOYMENT YEARS_EDUC COUNTRY_OF_BIRTH INV_EARNINGS \\\n", 559 | "0 MALE 77001107 BUSINESS 9 USA 0 \n", 560 | "1 MALE 77001140 BUSINESS 9 USA 0 \n", 561 | "2 MALE 77001143 BUSINESS 9 USA 0 \n", 562 | "3 MALE 77001187 BUSINESS 9 USA 0 \n", 563 | "4 MALE 77001412 BUSINESS 9 USA 0 \n", 564 | "5 MALE 77001429 BUSINESS 9 USA 0 \n", 565 | "6 MALE 77001443 BUSINESS 9 USA 0 \n", 566 | "7 MALE 77001484 BUSINESS 9 USA 0 \n", 567 | "8 MALE 77001526 BUSINESS 9 USA 0 \n", 568 | "9 MALE 77001634 BUSINESS 9 USA 0 \n", 569 | "10 MALE 77001649 BUSINESS 9 USA 0 \n", 570 | "11 MALE 77001660 BUSINESS 9 USA 0 \n", 571 | "12 MALE 77001661 BUSINESS 9 USA 0 \n", 572 | "13 MALE 77001719 BUSINESS 9 USA 0 \n", 573 | "14 MALE 77001794 BUSINESS 9 USA 0 \n", 574 | "15 MALE 77001874 BUSINESS 9 USA 0 \n", 575 | "16 MALE 77001913 BUSINESS 9 USA 0 \n", 576 | "17 MALE 77001942 BUSINESS 9 USA 0 \n", 577 | "18 MALE 77001977 BUSINESS 9 USA 0 \n", 578 | "19 MALE 77002003 BUSINESS 9 USA 0 \n", 579 | "\n", 580 | " INV_LOSSES WORKWEEK_LENGTH RESPONSE AMOUNT \n", 581 | "0 0 40 0 123.90 \n", 582 | "1 0 40 0 97.91 \n", 583 | "2 0 40 0 135.65 \n", 584 | "3 0 40 0 93.23 \n", 585 | "4 0 40 0 143.72 \n", 586 | "5 0 40 0 232.20 \n", 587 | "6 0 40 0 103.46 \n", 588 | "7 0 40 0 123.53 \n", 589 | "8 0 40 0 230.07 \n", 590 | "9 0 40 0 48.95 \n", 591 | "10 0 40 0 58.78 \n", 592 | "11 0 40 0 106.29 \n", 593 | "12 0 40 0 211.26 \n", 594 | "13 0 40 0 104.34 \n", 595 | "14 0 40 0 184.73 \n", 596 | "15 0 40 0 66.32 \n", 597 | "16 0 40 0 60.13 \n", 598 | "17 0 40 0 77.02 \n", 599 | "18 0 40 0 122.43 \n", 600 | "19 0 40 0 257.23 " 601 | ] 602 | }, 603 | "metadata": {}, 604 | "output_type": "display_data" 605 | } 606 | ], 607 | "source": [ 608 | "import pandas as pd\n", 609 | "from IPython.display import display\n", 610 | "\n", 611 | "df = pd.read_sql(\"select TOP 20 * from %s\" % dataTable, conn)\n", 612 | "display(df)" 613 | ] 614 | }, 615 | { 616 | "cell_type": "code", 617 | "execution_count": 6, 618 | "metadata": {}, 619 | "outputs": [ 620 | { 621 | "data": { 622 | "text/html": [ 623 | "
\n", 624 | "\n", 637 | "\n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | " \n", 650 | "
Aggregate_1
048842
\n", 651 | "
" 652 | ], 653 | "text/plain": [ 654 | " Aggregate_1\n", 655 | "0 48842" 656 | ] 657 | }, 658 | "metadata": {}, 659 | "output_type": "display_data" 660 | } 661 | ], 662 | "source": [ 663 | "# Show number rows\n", 664 | "df1 = pd.read_sql(\"SELECT COUNT(*) FROM %s\" % dataTable, conn)\n", 665 | "display(df1)" 666 | ] 667 | }, 668 | { 669 | "cell_type": "markdown", 670 | "metadata": {}, 671 | "source": [ 672 | "### Cleaning before retrying" 673 | ] 674 | }, 675 | { 676 | "cell_type": "code", 677 | "execution_count": 7, 678 | "metadata": {}, 679 | "outputs": [], 680 | "source": [ 681 | "# Before executing the notebook again, drop model, tables, views,... previously created\n", 682 | "#curs.execute(\"DROP VIEW %s\" % trainingTable)\n", 683 | "#curs.execute(\"DROP VIEW %s\" % predictTable)\n", 684 | "#curs.execute(\"DROP MODEL CampaignModel\")" 685 | ] 686 | }, 687 | { 688 | "cell_type": "markdown", 689 | "metadata": {}, 690 | "source": [ 691 | "### 5. Make some views to split training and testing datasets" 692 | ] 693 | }, 694 | { 695 | "cell_type": "code", 696 | "execution_count": 8, 697 | "metadata": {}, 698 | "outputs": [], 699 | "source": [ 700 | "# Small training set view\n", 701 | "curs.execute(\"CREATE VIEW %s AS SELECT * FROM %s WHERE ID<3001\" % (trainingTable,dataTable))\n", 702 | "# Small prediction set\n", 703 | "curs.execute(\"CREATE VIEW %s AS SELECT * FROM %s WHERE ID>47000\" % (predictTable,dataTable))" 704 | ] 705 | }, 706 | { 707 | "cell_type": "markdown", 708 | "metadata": {}, 709 | "source": [ 710 | "### 6. Create and Train an IntegratedML Model using default settings\n", 711 | "IntegratedML only needs a model name, the name of the column that is the target column to predict, and a table (or SELECT query to specify input columns." 712 | ] 713 | }, 714 | { 715 | "cell_type": "code", 716 | "execution_count": 9, 717 | "metadata": {}, 718 | "outputs": [], 719 | "source": [ 720 | "curs.execute(\"CREATE MODEL CampaignModel PREDICTING (RESPONSE) FROM %s\" % trainingTable)" 721 | ] 722 | }, 723 | { 724 | "cell_type": "markdown", 725 | "metadata": {}, 726 | "source": [ 727 | "Now that the model is defined, you can TRAIN it, which invokes the AutoML machine learning procedure." 728 | ] 729 | }, 730 | { 731 | "cell_type": "code", 732 | "execution_count": 10, 733 | "metadata": {}, 734 | "outputs": [], 735 | "source": [ 736 | "curs.execute(\"TRAIN MODEL CampaignModel\")" 737 | ] 738 | }, 739 | { 740 | "cell_type": "markdown", 741 | "metadata": {}, 742 | "source": [ 743 | "Once that finishes, you can see some information about the model in the \"ML_TRAINED_MODELS\" table." 744 | ] 745 | }, 746 | { 747 | "cell_type": "code", 748 | "execution_count": 11, 749 | "metadata": {}, 750 | "outputs": [ 751 | { 752 | "data": { 753 | "text/html": [ 754 | "
\n", 755 | "\n", 768 | "\n", 769 | " \n", 770 | " \n", 771 | " \n", 772 | " \n", 773 | " \n", 774 | " \n", 775 | " \n", 776 | " \n", 777 | " \n", 778 | " \n", 779 | " \n", 780 | " \n", 781 | " \n", 782 | " \n", 783 | " \n", 784 | " \n", 785 | " \n", 786 | " \n", 787 | " \n", 788 | " \n", 789 | " \n", 790 | " \n", 791 | " \n", 792 | " \n", 793 | " \n", 794 | " \n", 795 | " \n", 796 | " \n", 797 | " \n", 798 | " \n", 799 | " \n", 800 | "
MODEL_NAMETRAINED_MODEL_NAMEPROVIDERTRAINED_TIMESTAMPMODEL_TYPEMODEL_INFO
0CampaignModelCampaignModel_t1AutoML2022-04-22 08:01:09.859000classificationModelType:TensorFlow Neural Network, Package:T...
1bcbc_t1AutoML2022-04-22 07:54:38.592000classificationModelType:TensorFlow Neural Network, Package:T...
\n", 801 | "
" 802 | ], 803 | "text/plain": [ 804 | " MODEL_NAME TRAINED_MODEL_NAME PROVIDER TRAINED_TIMESTAMP \\\n", 805 | "0 CampaignModel CampaignModel_t1 AutoML 2022-04-22 08:01:09.859000 \n", 806 | "1 bc bc_t1 AutoML 2022-04-22 07:54:38.592000 \n", 807 | "\n", 808 | " MODEL_TYPE MODEL_INFO \n", 809 | "0 classification ModelType:TensorFlow Neural Network, Package:T... \n", 810 | "1 classification ModelType:TensorFlow Neural Network, Package:T... " 811 | ] 812 | }, 813 | "metadata": {}, 814 | "output_type": "display_data" 815 | } 816 | ], 817 | "source": [ 818 | "df3 = pd.read_sql(\"SELECT * FROM INFORMATION_SCHEMA.ML_TRAINED_MODELS\", conn)\n", 819 | "display(df3)" 820 | ] 821 | }, 822 | { 823 | "cell_type": "markdown", 824 | "metadata": {}, 825 | "source": [ 826 | "### 7. Compare model output to data it has not seen yet\n", 827 | "Now you can use SQL to SELECT data from another table, run the IntegratedML model on this new data, and see how well the predictions match the data!" 828 | ] 829 | }, 830 | { 831 | "cell_type": "code", 832 | "execution_count": 12, 833 | "metadata": {}, 834 | "outputs": [ 835 | { 836 | "data": { 837 | "text/html": [ 838 | "
\n", 839 | "\n", 852 | "\n", 853 | " \n", 854 | " \n", 855 | " \n", 856 | " \n", 857 | " \n", 858 | " \n", 859 | " \n", 860 | " \n", 861 | " \n", 862 | " \n", 863 | " \n", 864 | " \n", 865 | " \n", 866 | " \n", 867 | " \n", 868 | " \n", 869 | " \n", 870 | " \n", 871 | " \n", 872 | " \n", 873 | " \n", 874 | " \n", 875 | " \n", 876 | " \n", 877 | " \n", 878 | " \n", 879 | " \n", 880 | " \n", 881 | " \n", 882 | " \n", 883 | " \n", 884 | " \n", 885 | " \n", 886 | " \n", 887 | " \n", 888 | " \n", 889 | " \n", 890 | " \n", 891 | " \n", 892 | " \n", 893 | " \n", 894 | " \n", 895 | " \n", 896 | " \n", 897 | " \n", 898 | " \n", 899 | " \n", 900 | " \n", 901 | " \n", 902 | " \n", 903 | " \n", 904 | " \n", 905 | " \n", 906 | " \n", 907 | " \n", 908 | " \n", 909 | " \n", 910 | " \n", 911 | " \n", 912 | " \n", 913 | " \n", 914 | " \n", 915 | " \n", 916 | " \n", 917 | "
PredictedResponseActualResponse
000
100
201
300
401
.........
183700
183800
183900
184000
184100
\n", 918 | "

1842 rows × 2 columns

\n", 919 | "
" 920 | ], 921 | "text/plain": [ 922 | " PredictedResponse ActualResponse\n", 923 | "0 0 0\n", 924 | "1 0 0\n", 925 | "2 0 1\n", 926 | "3 0 0\n", 927 | "4 0 1\n", 928 | "... ... ...\n", 929 | "1837 0 0\n", 930 | "1838 0 0\n", 931 | "1839 0 0\n", 932 | "1840 0 0\n", 933 | "1841 0 0\n", 934 | "\n", 935 | "[1842 rows x 2 columns]" 936 | ] 937 | }, 938 | "metadata": {}, 939 | "output_type": "display_data" 940 | } 941 | ], 942 | "source": [ 943 | "df4 = pd.read_sql(\"SELECT PREDICT(CampaignModel) AS PredictedResponse, \\\n", 944 | " response AS ActualResponse FROM %s\" % predictTable, conn)\n", 945 | "display(df4)" 946 | ] 947 | }, 948 | { 949 | "cell_type": "markdown", 950 | "metadata": {}, 951 | "source": [ 952 | "### 8. VALIDATE MODEL command calculates accuracy metrics\n", 953 | "You can certainly take that output above and calculate the accuracy using a standard formula, but IntegratedML has a built-in function to do that!\n", 954 | "\n", 955 | "Each time you run the command \"VALIDATE MODEL...\" it generates a set of metrics calculated on the data passed into the query. Since this table can be a bit difficult to read in its raw form we use a simple \"pivot\" call to arrange the data." 956 | ] 957 | }, 958 | { 959 | "cell_type": "code", 960 | "execution_count": 13, 961 | "metadata": {}, 962 | "outputs": [ 963 | { 964 | "data": { 965 | "text/html": [ 966 | "
\n", 967 | "\n", 980 | "\n", 981 | " \n", 982 | " \n", 983 | " \n", 984 | " \n", 985 | " \n", 986 | " \n", 987 | " \n", 988 | " \n", 989 | " \n", 990 | " \n", 991 | " \n", 992 | " \n", 993 | " \n", 994 | " \n", 995 | " \n", 996 | " \n", 997 | " \n", 998 | " \n", 999 | " \n", 1000 | " \n", 1001 | " \n", 1002 | " \n", 1003 | " \n", 1004 | " \n", 1005 | " \n", 1006 | "
METRIC_NAMEAccuracyF-MeasurePrecisionRecall
VALIDATION_RUN_NAME
CampaignModel_t1_v10.60.20.810.11
\n", 1007 | "
" 1008 | ], 1009 | "text/plain": [ 1010 | "METRIC_NAME Accuracy F-Measure Precision Recall\n", 1011 | "VALIDATION_RUN_NAME \n", 1012 | "CampaignModel_t1_v1 0.6 0.2 0.81 0.11" 1013 | ] 1014 | }, 1015 | "metadata": {}, 1016 | "output_type": "display_data" 1017 | } 1018 | ], 1019 | "source": [ 1020 | "curs.execute(\"VALIDATE MODEL CampaignModel FROM %s\" % predictTable)\n", 1021 | "df5 = pd.read_sql(\"SELECT * FROM INFORMATION_SCHEMA.ML_VALIDATION_METRICS\", conn)\n", 1022 | "df6 = df5.pivot(index='VALIDATION_RUN_NAME', columns='METRIC_NAME', values='METRIC_VALUE')\n", 1023 | "display(df6)" 1024 | ] 1025 | } 1026 | ], 1027 | "metadata": { 1028 | "kernelspec": { 1029 | "display_name": "Python 3", 1030 | "language": "python", 1031 | "name": "python3" 1032 | }, 1033 | "language_info": { 1034 | "codemirror_mode": { 1035 | "name": "ipython", 1036 | "version": 3 1037 | }, 1038 | "file_extension": ".py", 1039 | "mimetype": "text/x-python", 1040 | "name": "python", 1041 | "nbconvert_exporter": "python", 1042 | "pygments_lexer": "ipython3", 1043 | "version": "3.6.9" 1044 | } 1045 | }, 1046 | "nbformat": 4, 1047 | "nbformat_minor": 2 1048 | } 1049 | -------------------------------------------------------------------------------- /jupyter-samples/intersystems-jdbc-3.1.0.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intersystems-community/integratedml-demo-template/f7ed655425bed01d5aad595a26554d8cd5757f4b/jupyter-samples/intersystems-jdbc-3.1.0.jar -------------------------------------------------------------------------------- /jupyter-samples/libirisodbcu35.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intersystems-community/integratedml-demo-template/f7ed655425bed01d5aad595a26554d8cd5757f4b/jupyter-samples/libirisodbcu35.so -------------------------------------------------------------------------------- /jupyter-samples/odbc.ini: -------------------------------------------------------------------------------- 1 | [user] 2 | Driver=InterSystems ODBC35 3 | Protocol=TCP 4 | Host=irisimlsvr 5 | Port=51773 6 | Namespace=USER 7 | UID=SUPERUSER 8 | Password=SYS 9 | Description=Sample namespace 10 | Query Timeout=0 11 | Static Cursors=0 12 | 13 | -------------------------------------------------------------------------------- /jupyter-samples/odbcinst.ini: -------------------------------------------------------------------------------- 1 | [InterSystems ODBC35] 2 | UsageCount=1 3 | Driver=/tf/libirisodbcu35.so 4 | Setup=/tf/libirisodbcu35.so 5 | SQLLevel=1 6 | FileUsage=0 7 | DriverODBCVer=02.10 8 | ConnectFunctions=YYN 9 | APILevel=1 10 | DEBUG=1 11 | CPTimeout= 12 | 13 | -------------------------------------------------------------------------------- /tf2-jupyter-jdbc/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM tensorflow/tensorflow:2.2.0-jupyter 2 | LABEL maintainer="Zhong Li " 3 | 4 | RUN apt-get update && apt-get install -y default-jre-headless && apt-get clean && rm -rf /var/lib/apt/lists/* 5 | 6 | COPY requirements.txt ./ 7 | 8 | RUN pip install --upgrade pip && \ 9 | pip install --no-cache-dir -r requirements.txt 10 | 11 | CMD /bin/bash -c "source /etc/bash.bashrc && jupyter notebook --notebook-dir=/tf --ip 0.0.0.0 --no-browser --allow-root --NotebookApp.token=''" -------------------------------------------------------------------------------- /tf2-jupyter-jdbc/requirements.txt: -------------------------------------------------------------------------------- 1 | jupyterthemes 2 | JayDeBeApi 3 | pandas 4 | scikit-learn 5 | 6 | --------------------------------------------------------------------------------